| 1 |  |  | from pycompressor.priority_queue import MinPQ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | from pycompressor.queue import Queue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | class Node(object): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |     left = None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |     right = None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |     key = None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |     freq = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |     def __init__(self, key=None, freq=None, left=None, right=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |         if key is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |             self.key = key | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |         if freq is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |             self.freq = freq | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |         if left is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |             self.left = left | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |         if right is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |             self.right = right | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |     def is_leaf(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |         return self.left is None and self.right is None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  | def char_at(text, i): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |     if len(text) - 1 < i: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |         return -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |     return ord(text[i]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  | def write_char(char, bit_stream): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |     for i in range(8): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |         bit = char % 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |         bit_stream.enqueue(bit) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |         char = char // 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  | def read_char(bit_stream): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |     a = [0] * 8 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |     for i in range(8): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |         if bit_stream.is_empty(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |             break | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |         a[i] = bit_stream.dequeue() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |     char = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |     for i in range(7, -1, -1): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |         char = char * 2 + a[i] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |     return char | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  | class HuffmanCompressor(object): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |     def compress_to_binary(self, text): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |         trie = self.build_trie(text) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |         bit_stream = Queue() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |         code = {} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |         self.build_code(trie, '', code) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |         self.write_trie(trie, bit_stream) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |         self.write_text(code, text, bit_stream) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |         return bit_stream | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |     def decompress_from_binary(self, bit_stream): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |         trie = self.read_trie(bit_stream) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |         code = {} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |         self.build_code(trie, '', code) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |         rcode = {} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |         for key in code.keys(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |             rcode[code[key]] = key | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |         return self.read_text(rcode, bit_stream) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |     def compress_to_string(self, text): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |         bit_stream = self.compress_to_binary(text) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |         result = '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |         while not bit_stream.is_empty(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |             result = result + chr(read_char(bit_stream)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |         return result | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |     def decompress_from_string(self, text): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |         bit_stream = Queue() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |         for i in range(len(text)): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |             write_char(char_at(text, i), bit_stream) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |         return self.decompress_from_binary(bit_stream) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |     def read_text(self, code, bit_stream): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |         text = '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |         cc = '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |         while not bit_stream.is_empty(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |             bit = bit_stream.dequeue() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |             if bit == 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |                 cc = cc + '0' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |                 cc = cc + '1' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |             if cc in code: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |                 text = text + chr(code[cc]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |                 cc = '' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |         return text | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |     def read_trie(self, bit_stream): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |         bit = bit_stream.dequeue() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |         if bit == 1: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |             return Node(key=read_char(bit_stream)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |         left = self.read_trie(bit_stream) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |         right = self.read_trie(bit_stream) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |         return Node(left=left, right=right) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |     def write_text(self, code, text, bit_stream): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |         for i in range(len(text)): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |             cc = code[char_at(text, i)] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |             for j in range(len(cc)): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |                 if cc[j] == '0': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |                     bit_stream.enqueue(0) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |                 elif cc[j] == '1': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |                     bit_stream.enqueue(1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |     def build_code(self, x, prefix, code): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |         if x.is_leaf(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |             code[x.key] = prefix | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |             return | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |         self.build_code(x.left, prefix + '0', code) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |         self.build_code(x.right, prefix + '1', code) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |     def build_trie(self, text): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |         search_trie = dict() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |         for i in range(len(text)): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |             c = char_at(text, i) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |             if c in search_trie: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |                 search_trie[c].freq += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |                 node = Node(key=c, freq=1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |                 search_trie[c] = node | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |         pq = MinPQ(lambda x, y: x.freq - y.freq) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  |         for node in search_trie.values(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  |             pq.enqueue(node) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  |         while pq.size() > 1: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |             node1 = pq.del_min() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |             node2 = pq.del_min() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  |             freq = node1.freq + node2.freq | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  |             node = Node(freq=freq, left=node1, right=node2) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 |  |  |             pq.enqueue(node) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 |  |  |         return pq.del_min() | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 143 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 144 |  |  |     def write_trie(self, x, bit_stream): | 
            
                                                        
            
                                    
            
            
                | 145 |  |  |         if x is None: | 
            
                                                        
            
                                    
            
            
                | 146 |  |  |             return | 
            
                                                        
            
                                    
            
            
                | 147 |  |  |         if x.is_leaf(): | 
            
                                                        
            
                                    
            
            
                | 148 |  |  |             bit_stream.enqueue(1) | 
            
                                                        
            
                                    
            
            
                | 149 |  |  |             write_char(x.key, bit_stream) | 
            
                                                        
            
                                    
            
            
                | 150 |  |  |             return | 
            
                                                        
            
                                    
            
            
                | 151 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 152 |  |  |         bit_stream.enqueue(0) | 
            
                                                        
            
                                    
            
            
                | 153 |  |  |         self.write_trie(x.left, bit_stream) | 
            
                                                        
            
                                    
            
            
                | 154 |  |  |         self.write_trie(x.right, bit_stream) | 
            
                                                        
            
                                    
            
            
                | 155 |  |  |  |