from itertools import count
from collections import Counter
from heapq import heapify, heappush, heappopdef huffman_tree(s):# 统计每个字符出现的次数s = Counter(s) # 计算可迭代序列中元素的数量,返回字典类型数据chs, freqs = s.keys(), s.values()nums = count() # 创建一个从start=0开始的无限迭代器,每次迭代加上step=1# 构造堆tree = list(zip(freqs, nums, chs)) # 创建列表,列表中每项为一个三元组(fregs:出现频率,nums:序号,chs:字符名)heapify(tree) # 基于tree列表,构建堆# 合并结点,构造哈夫曼树while len(tree)>1:fa, _, a = heappop(tree) # 从堆中弹出并返回最小的元素,保持堆属性fb, _, b = heappop(tree)heappush(tree, (fa + fb, next(nums), [a,b])) # 将一个元素压入堆中,保持堆属性print('='*30)print(tree)print('='*30)return tree[0][2] # tree为列表,tree[0][0]堆顶元素和,tree[0][1]:结点序号,tree[0][2]:字符化的树结构# tree = [(36, 20, [[['r', 'e'], [['d', 'f'], ['j', 'w']]], ['a', [[['s', 'g'], 'c'], 'b']]])]def get_table(tree, prefix=''): # tree:待编码子树,prefix:编码前已确定的树前哈夫曼编码# 遍历哈夫曼树,为每个字符编码if isinstance(tree, str):yield (tree, prefix) # yield:返回生成器returnfor bit, child in zip('01', tree):for pair in get_table(child, prefix+bit):yield pairdef main(s):print(s)tree = huffman_tree(s)print('='*50)gener_test = get_table(tree)for item in gener_test:print(item)print('='*50)table = dict(get_table(tree)) # 单个get_table返回的的是一个生成器。每个生成器的内容为('字符名称','字符对应的哈夫曼编码')print(table)# 根据哈夫曼编码表对字符串进行编码code = ''.join(map(lambda ch: table[ch],s)) # s字符串的更换为哈夫曼编码后,以空格分割print(code)# 根据哈夫曼编码表进行解码code = '0101101101001' # 此处可以输入待解码的哈夫曼编码ss = ''while len(code) > 0:for ch, c in table.items():if code.startswith(c):ss = ss + chcode = code[len(c):] # code右移,移出已匹配的编码print()print(ss)main('baaabbcbbbcdcdfsraejajgfweraawaearaa')
引用位置(不带注释):Python实现哈夫曼编码与解码