[Python] Huffman Encoding
# Huffman Encoding # Tree-Node Typeclass Node: def _ init _ (self, freq): self. left = None self. right = None self. father = None self. freq = freq def isLeft (self): return self. father. left = self # create nodes create leaf Node def createNodes (freqs): return [Node (freq) for freq in freqs] # create Huffman-Tree create Huffman Tree def createHuffmanTree (nodes): queue = nodes [:] while len (queue)> 1: queue. sort (key = lambda item: item. f Req) node_left = queue. pop (0) node_right = queue. pop (0) node_father = Node (nodes + node_right.freq) node_father.left = node_left node_father.right = node_right node_left.father = node_father node_right.father = node_father queue. append (node_father) queue [0]. father = None return queue [0] # Huffman encoding def huffmanEncoding (nodes, root): codes = [''] * len (nodes) for I in range (len (nodes )): node_tm P = nodes [I] while node_tmp! = Root: if node_tmp.isLeft (): codes [I] = '0' + codes [I] else: codes [I] = '1' + codes [I] node_tmp = node_tmp.father return codesif _ name _ = '_ main __': # chars = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'h', 'I ', 'J', 'k', 'l', 'M', 'n'] # freqs =, 9, 6] chars_freqs = [('C', 2), ('G', 2), ('E', 3), ('k', 3 ), ('B', 4), ('F', 4), ('I', 4), ('J', 4), ('D', 5 ), ('h', 6), ('n', 6), ('l', 7), ('M', 9), ('A', 10)] nodes = createNodes ([item [1] for item in chars_freqs]) root = createHuffmanTree (nodes) codes = huffmanEncoding (nodes, root) for item in zip (chars_freqs, codes ): print 'character: % s freq: %-2d encoding: % s' % (item [0] [0], item [0] [1], item [1])
Output result
>>>Character:C freq:2 encoding: 10100Character:G freq:2 encoding: 10101Character:E freq:3 encoding: 0000Character:K freq:3 encoding: 0001Character:B freq:4 encoding: 0100Character:F freq:4 encoding: 0101Character:I freq:4 encoding: 0110Character:J freq:4 encoding: 0111Character:D freq:5 encoding: 1011Character:H freq:6 encoding: 1110Character:N freq:6 encoding: 1111Character:L freq:7 encoding: 001Character:M freq:9 encoding: 100Character:A freq:10 encoding: 110