# HG changeset patch
# User Nathann Cohen <nathann.cohen@gmail.com>
# Date 1299058062 3600
# Node ID 9874dc07c1c07cc425ee2b285c1567b93ec3e9c7
# Parent 8438b7c20d79c02a2ece3e1c3f7224a772ff8f07
trac 10864  Updating Huffman to remove unnecessary keywords
diff r 8438b7c20d79 r 9874dc07c1c0 sage/coding/source_coding/huffman.py
a

b


99  99  
100  100  INPUT: 
101  101  
102    ``string``  (default: ``None``) a string from which the Huffman 
103   encoding should be created. 
 102   ``source``  can be either 
104  103  
105    ``table``  (default: ``None``) a dictionary that associates to each 
106   symbol of an alphabet a numeric value. If we consider the frequency of 
107   each alphabetic symbol, then ``table`` is considered as the frequency 
108   table of the alphabet with each numeric (nonnegative integer) value 
109   being the number of occurrences of a symbol. The numeric values can also 
110   represent weights of the symbols. In that case, the numeric values are 
111   not necessarily integers, but can be real numbers. In general, we refer 
112   to ``table`` as a weight table. 
 104   A string from which the Huffman encoding should be created. 
113  105  
114   Exactly one of ``string`` and ``table`` cannot be ``None``. In order to 
115   construct a Huffman code for an alphabet, we use exactly one of the 
116   following methods: 
 106   A dictionary that associates to each symbol of an alphabet a numeric 
 107  value. If we consider the frequency of each alphabetic symbol, then 
 108  ``table`` is considered as the frequency table of the alphabet with 
 109  each numeric (nonnegative integer) value being the number of 
 110  occurrences of a symbol. The numeric values can also represent weights 
 111  of the symbols. In that case, the numeric values are not necessarily 
 112  integers, but can be real numbers. In general, we refer to ``table`` 
 113  as a weight table. 
117  114  
118   #. Let ``string`` be a string of symbols over an alphabet and feed 
119   ``string`` to the constructor of this class. Based on the input string, 
120   a frequency table is constructed that contains the frequency of each 
121   unique symbol in ``string``. The alphabet in question is then all the 
122   unique symbols in ``string``. A significant implication of this is that 
123   any subsequent string that we want to encode must contain only symbols 
124   that can be found in ``string``. 
 115  In order to construct a Huffman code for an alphabet, we use exactly one of 
 116  the following methods: 
125  117  
126   #. Let ``table`` be the frequency table of an alphabet. We can feed this 
127   table to the constructor of this class. The table ``table`` can be a 
128   table of frequency or a table of weights. 
 118  #. Let ``source`` be a string of symbols over an alphabet and feed 
 119  ``source`` to the constructor of this class. Based on the input string, a 
 120  frequency table is constructed that contains the frequency of each unique 
 121  symbol in ``source``. The alphabet in question is then all the unique 
 122  symbols in ``source``. A significant implication of this is that any 
 123  subsequent string that we want to encode must contain only symbols that 
 124  can be found in ``source``. 
 125  
 126  #. Let ``source`` be the frequency table of an alphabet. We can feed this 
 127  table to the constructor of this class. The table ``source`` can be a 
 128  table of frequencies or a table of weights. 
129  129  
130  130  Examples:: 
131  131  
… 
… 

152  152  
153  153  sage: ft = frequency_table("There once was a french fry"); ft 
154  154  {'a': 2, ' ': 5, 'c': 2, 'e': 4, 'f': 2, 'h': 2, 'o': 1, 'n': 2, 's': 1, 'r': 3, 'T': 1, 'w': 1, 'y': 1} 
155   sage: h2 = Huffman(table=ft) 
 155  sage: h2 = Huffman(ft) 
156  156  
157  157  Once ``h1`` has been trained, and hence possesses an encoding table, 
158  158  it is possible to obtain the Huffman encoding of any string 
… 
… 

181  181  
182  182  sage: from sage.coding.source_coding.huffman import Huffman 
183  183  sage: T = {"a":45, "b":13, "c":12, "d":16, "e":9, "f":5} 
184   sage: H = Huffman(table=T) 
 184  sage: H = Huffman(T) 
185  185  sage: L = ["deaf", "bead", "fab", "bee"] 
186  186  sage: E = [] 
187  187  sage: for e in L: 
… 
… 

205  205  True 
206  206  """ 
207  207  
208   def __init__(self, string=None, table=None): 
 208  def __init__(self, source): 
209  209  r""" 
210  210  Constructor for Huffman. 
211  211  
… 
… 

219  219  
220  220  TESTS: 
221  221  
222   If both arguments are supplied, an exception is raised:: 
 222  Feeding anything else than a string or a dictionary:: 
223  223  
224   sage: Huffman(string=str, table={'a':8}) 
 224  sage: Huffman(Graph()) 
225  225  Traceback (most recent call last): 
226  226  ... 
227   ValueError: Exactly one of 'string' and 'table' cannot be None. 
 227  ValueError: Input must be either a string or a dictionary. 
228  228  """ 
229   if (string is not None) and (table is not None): 
230   raise ValueError( 
231   "Exactly one of 'string' and 'table' cannot be None.") 
232  229  
233  230  # alphabetic symbol to Huffman encoding translation table 
234  231  self._character_to_code = [] 
… 
… 

236  233  self._tree = None 
237  234  # index of each alphabetic symbol 
238  235  self._index = None 
239   if string is not None: 
240   self._build_code(frequency_table(string)) 
241   elif table is not None: 
242   self._build_code(table) 
 236  
 237  if isinstance(source,basestring): 
 238  self._build_code(frequency_table(source)) 
 239  elif isinstance(source, dict): 
 240  self._build_code(source) 
 241  else: 
 242  raise ValueError("Input must be either a string or a dictionary.") 
243  243  
244  244  def _build_code_from_tree(self, tree, d, prefix): 
245  245  r""" 