| 1 |  |  | from pycompressor.priority_queue import MinPQ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | from pycompressor.queue import Queue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | from pycompressor.trie import TernarySearchTrie | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  | class Node(object): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |     left = None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |     right = None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |     key = None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |     freq = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |     def __init__(self, key=None, freq=None, left=None, right=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |         if key is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |             self.key = key | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |         if freq is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |             self.freq = freq | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |         if left is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |             self.left = left | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |         if right is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |             self.right = right | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |     def is_leaf(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |         return self.left is None and self.right is None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  | def char_at(text, i): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |     if len(text) - 1 < i: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |         return -1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |     return ord(text[i]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  | def write_char(char, bit_stream): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |     for i in range(8): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |         bit = char % 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |         bit_stream.enqueue(bit) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |         char = char // 2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  | def read_char(bit_stream): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |     a = [0] * 8 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |     for i in range(8): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |         a[i] = bit_stream.dequeue() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |     char = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |     for i in range(7, -1, -1): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |         char = char * 2 + a[i] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |     return char | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  | class HuffmanCompressor(object): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |     def compress_to_binary(self, text): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |         trie = self.build_trie(text) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |         bit_stream = Queue() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |         code = {} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |         self.build_code(trie, '', code) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |         self.write_trie(trie, bit_stream) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |         self.write_text(code, text, bit_stream) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |         return bit_stream | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 59 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 60 |  |  |     def write_text(self, code, text, bit_stream): | 
            
                                                                        
                            
            
                                    
            
            
                | 61 |  |  |         for i in range(len(text)): | 
            
                                                                        
                            
            
                                    
            
            
                | 62 |  |  |             cc = code[char_at(text, i)] | 
            
                                                                        
                            
            
                                    
            
            
                | 63 |  |  |             for j in range(len(cc)): | 
            
                                                                        
                            
            
                                    
            
            
                | 64 |  |  |                 if cc[j] == '0': | 
            
                                                                        
                            
            
                                    
            
            
                | 65 |  |  |                     bit_stream.enqueue(0) | 
            
                                                                        
                            
            
                                    
            
            
                | 66 |  |  |                 elif cc[j] == '1': | 
            
                                                                        
                            
            
                                    
            
            
                | 67 |  |  |                     bit_stream.enqueue(1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |     def build_code(self, x, prefix, code): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |         if x.is_leaf(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |             code[x.key] = prefix | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |             return | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |         self.build_code(x.left, prefix + '0', code) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |         self.build_code(x.right, prefix + '1', code) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |     def build_trie(self, text): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |         search_trie = dict() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |         for i in range(len(text)): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |             c = char_at(text, i) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |             if c in search_trie: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |                 search_trie[c].freq += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |                 node = Node(key=c, freq=1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |                 search_trie[c] = node | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |         pq = MinPQ(lambda x, y: x.freq - y.freq) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |         for node in search_trie.values(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |             pq.enqueue(node) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |         while pq.size() > 1: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |             node1 = pq.del_min() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |             node2 = pq.del_min() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |             freq = node1.freq + node2.freq | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |             node = Node(freq=freq, left=node1, right=node2) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |             pq.enqueue(node) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |         return pq.del_min() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |     def write_trie(self, x, bit_stream): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |         if x is None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |             return | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |         if x.is_leaf(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |             bit_stream.enqueue(1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |             write_char(x.key, bit_stream) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |             return | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |         bit_stream.enqueue(0) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |         self.write_trie(x.left, bit_stream) | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 110 |  |  |         self.write_trie(x.right, bit_stream) | 
            
                                                        
            
                                    
            
            
                | 111 |  |  |  |