#!/home/98/46/2924698/bin/python #-- Let's define the en/decryption functions (crude for now) def xor_key(key): "Produce 100 bytes of key-based pseudo-random bits" import sha s = "" for _ in range(5): s += sha.sha(s+key).digest() return s def crypt(key, s, decrypt): """En/decrypt a string with a key The technique here is still weak. We crypt with rotor to get a reasonable diffusion, then xor against an SHA-based key for stronger confusion, then rotor again to further diffuse the bytes """ import rotor r = rotor.newrotor(key, 12) if decrypt: rot = r.decrypt else: rot = r.encrypt xor = xor_key(key) s = rot(s) chars = [chr(ord(s[i])^ord(xor[i])) for i in range(len(s))] s = "".join(chars) s = rot(s) return s encrypt = lambda k, s, dec=0: crypt(k, s, dec) decrypt = lambda k, s, dec=1: crypt(k, s, dec) #-- Huffman encoding and decoding of strings def read_encoding(fname): """Read in a huffman character map from a file Each line of the encoding should have the form: 1213 E :1111 That is, an ocurrence count (generated from a corpus), whitespace, a character, whitespace, a colon followed by a string of 1s and 0s """ enc = open(fname).readlines() encmap = {} for line in enc: flds = line.split() if len(flds)==3 and flds[2][0]==":": encmap[flds[1]] = tuple(map(int, flds[2][1:])) decmap = dict([(bits,c) for (c,bits) in encmap.items()]) return encmap, decmap def ascii_huffman_encode(s, encmap, encrypt=lambda k,s:s, key=None): "Compress a string as 7-bit ASCII" bits, bytes = [], [] for c in s: bits.extend(encmap[c]) toX8 = (8-len(bits))%8 # we need a whole number of bytes bits.extend([0]*toX8) for i in range(0,len(bits),8): # compute bytes from 8 bit chunks b0,b1,b2,b3,b4,b5,b6,b7 = bits[i:i+8] v = 128*b0 + 64*b1 + 32*b2 + 16*b3 + 8*b4 + 4*b5 + 2*b6 + b7 bytes.append(chr(v)) crypt = encrypt(key, "".join(bytes)) return crypt.encode('base64').rstrip().replace('=','') hex2bits = {'0':(0,0,0,0),'1':(0,0,0,1),'2':(0,0,1,0),'3':(0,0,1,1), '4':(0,1,0,0),'5':(0,1,0,1),'6':(0,1,1,0),'7':(0,1,1,1), '8':(1,0,0,0),'9':(1,0,0,1),'a':(1,0,1,0),'b':(1,0,1,1), 'c':(1,1,0,0),'d':(1,1,0,1),'e':(1,1,1,0),'f':(1,1,1,1)} def string2bits(s): "Convert a string to a bit list, based on the ord of each byte" bits = [] for hexrep in ['%02x' % ord(c) for c in s]: for c in hexrep: bits.extend(hex2bits[c]) return bits def ascii_huffman_decode(enc, decmap, decrypt=lambda k,s:s, key=None): "Restore an huffman encoded 7-bit string" s = ("%s%s\n" % (enc,"="*(len(enc)%4))).decode('base64') plain = decrypt(key, s) bits = string2bits(plain) decoded, head = [], 0 for pos in range(len(bits)+1): if decmap.has_key(tuple(bits[head:pos])): decoded.append(decmap[tuple(bits[head:pos])]) head = pos return "".join(decoded) #-- General utility functions def address_like(s): import re return re.match(r'[\w_+./!-]+@([\w-]+\.)+[A-Za-z]{2,4}', s) if __name__=='__main__': import sys, os key = open('ANONYM_PASSPHRASE').readlines()[0] encmap, decmap = read_encoding('huffman') plain, coded = 0,0 for addr in sys.stdin.xreadlines(): addr = addr.strip().upper() enc_addr = ascii_huffman_encode(addr, encmap, encrypt, key) if not address_like(addr): print addr #print len(addr), len(enc_addr), addr, enc_addr print ascii_huffman_decode(enc_addr, decmap, decrypt, key) plain += len(addr) coded += len(enc_addr) sys.stderr.write('Encoded bits-per-char %.2f\n' % (8.*coded/plain))