-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhuffman.py
100 lines (81 loc) · 2.59 KB
/
huffman.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from bisect import bisect_left
from collections import namedtuple
from pprint import pp
import sys
CodeTree = namedtuple("CodeTree",
["symbols", "weight", "left", "right"],
defaults=[None, None])
def main():
if len(sys.argv) != 2:
print(f"usage: {sys.argv[0]} infile")
sys.exit(1)
pairs = []
with open(sys.argv[1], "r") as f:
for line in f.readlines():
sym, freq = line.split()
pairs.append((sym, int(freq)))
t = make_huffman_tree(pairs)
pp(t)
with open("hamlet-sanitized.txt", "r") as f:
m = f.read().strip().split()
do(m, t)
def do(msg, tree):
e = encode(msg, tree)
assert decode(e, tree) == msg
print(e, len(e))
def encode(msg, tree):
def encode_one(sym, tree):
assert sym in tree.symbols, "ERROR: symbol does not match tree"
if is_leaf(tree):
return []
else:
if sym in tree.left.symbols:
return [0] + encode_one(sym, tree.left)
else:
return [1] + encode_one(sym, tree.right)
enc = []
for sym in msg:
enc += encode_one(sym, tree)
return enc
def decode(bits, tree):
def choose_next(bit, curr):
assert bit in [0, 1], f"ERROR: bad bit {bit}"
if bit == 0:
return curr.left
return curr.right
def decode_one(bits, curr):
if not bits:
return []
branch = curr.left if bits[0] == 0 else curr.right
if is_leaf(branch):
return branch.symbols + decode_one(bits[1:], tree)
else:
return decode_one(bits[1:], branch)
return decode_one(bits, tree)
def make_huffman_tree(pairs):
return successive_merge(make_leaves(pairs))
def successive_merge(trees):
if not trees:
return []
elif not trees[1:]:
return trees[0]
else:
x, y, rest = trees[0], trees[1], trees[2:]
return successive_merge(adjoin_tree(make_code_tree(x, y), rest))
def adjoin_tree(t, trees):
trees.insert(bisect_left(trees, t.weight, key=lambda t: t.weight), t)
return trees
def make_code_tree(left, right):
return CodeTree(left.symbols + right.symbols,
left.weight + right.weight,
left,
right)
def in_tree(sym, tree):
return sym in tree.symbols
def make_leaves(pairs):
pairs = map(lambda p: ([p[0]], p[1]), pairs)
return [CodeTree(*p) for p in sorted(pairs, key = lambda p: p[1])]
def is_leaf(tree):
return not tree.left and not tree.right
if __name__ == "__main__":
main()