# HG changeset patch
# User Andrew Hou <amhou@uw.edu>
# Date 1258418752 28800
# Node ID faf0b8837f76ed1e651646126bfc09ec42909904
# Parent 9ee1085d76f4763b648cda04791f4b5bc45f9b6a
Fixed documentation.
diff r 9ee1085d76f4 r faf0b8837f76 sage/stats/basic_stats.py
a

b


1  1  """ 
2  2  Basic Statistics 
3  3  
4   This file contains basic descriptive functions. Included as of 11/06/2009 
5   are the mean, median, mode, moving average, standard deviation, and the 
6   variance. When calling a function on data, there are checks for functions 
7   already defined for that data type. 
 4  This file contains basic descriptive functions. Included are the mean, 
 5  median, mode, moving average, standard deviation, and the variance. 
 6  When calling a function on data, there are checks for functions already 
 7  defined for that data type. 
8  8  
9   The 'mean' function returns the arithmetic mean (the sum of all the members 
 9  The ``mean`` function returns the arithmetic mean (the sum of all the members 
10  10  of a list, divided by the number of members). Further revisions may include 
11   the geometric and harmonic mean. The 'median' function returns the number 
12   separating the higher half of a sample from the lower half. The 'mode' 
13   returns the most common occuring member of a sample. The 'moving average' 
14   is a finite impulse response filter, creating a series of averages using a 
15   userdefined number of subsets of the full data set. The 'standard deviation' 
16   and the 'variance' return a measurement of how far data points tend to be 
17   from the arithmetic mean. 
 11  the geometric and harmonic mean. The ``median`` function returns the number 
 12  separating the higher half of a sample from the lower half. The ``mode`` 
 13  returns the most common occuring member of a sample, plus the number of times 
 14  it occurs. If entries occur equally common, a list of the most common entries 
 15  are returned. The ``moving average`` is a finite impulse response filter, 
 16  creating a series of averages using a userdefined number of subsets of the 
 17  full data set. The ``standard deviation`` and the ``variance`` return a 
 18  measurement of how far data points tend to be from the arithmetic mean. 
18  19  
19   Functions have also been imported under the namespace 'stats'. 
 20  Functions are available in the namespace ``stats``, i.e. you can use them by 
 21  typing ``stats.mean``, ``stats.median``, etc. 
20  22  
21  23  
22  24  AUTHOR: 
 25  
23  26   Andrew Hou (11/06/2009) 
24  27  
25  28  """ 
… 
… 

38  41  
39  42  def mean(v): 
40  43  """ 
41   Return the mean of the elements of `v`. 
 44  Return the mean of the elements of ``v``. 
42  45  
43  46  We define the mean of the empty list to be NaN, following the 
44  47  convention of MATLAB, Scipy, and R. 
45  48  
46  49  INPUT: 
47  50  
48    `v`  a list of numbers 
 51   ``v``  a list of numbers 
49  52  
50  53  OUTPUT: 
51  54  
… 
… 

81  84  
82  85  def mode(v): 
83  86  """ 
84   Return the mode (most common) of the elements of 'v' 
 87  Return the mode (most common) of the elements of ``v`` 
85  88  
86   If 'v' is empty, we define the mode to be null. 
 89  If ``v`` is empty, we define the mode to be null. 
87  90  If all elements occur only once, we define the mode to be null. 
88  91  If multiple elements occur at the same frequency, all will be 
89  92  displayed. 
… 
… 

91  94  
92  95  INPUT: 
93  96  
94    'v'  a list 
 97   ``v``  a list 
95  98  
96  99  OUTPUT: 
97  100  
… 
… 

107  110  [] 
108  111  sage: mode(['sage', 4, I, 3/5, 'sage', pi]) 
109  112  [('sage', 2)] 
 113  sage: class MyClass: 
 114  ... def mode(self): 
 115  ... return 1 
 116  sage: stats.mode(MyClass()) 
 117  1 
110  118  """ 
 119  if hasattr(v, 'mode'): return v.mode() 
111  120  from operator import itemgetter 
112   
 121  
113  122  freq = {} 
114  123  for i in v: 
115  124  try: 
… 
… 

128  137  
129  138  def std(v, bias=False): 
130  139  """ 
131   Returns the standard deviation of the elements of 'v'. 
 140  Returns the standard deviation of the elements of ``v`` 
132  141  
133  142  We define the standard deviation of the empty list to be NaN, 
134  143  following the convention of MATLAB, Scipy, and R. 
135  144  
136  145  INPUT: 
137  146  
138    'v'  a list of numbers 
 147   ``v``  a list of numbers 
139  148  
140    bias  bool (default: False); if False, divide by 
141   len(v)  1 instead of len(v) 
142   to give a less biased estimator (sample) for the 
143   standard deviation. 
 149   ``bias``  bool (default: False); if False, divide by 
 150  len(v)  1 instead of len(v) 
 151  to give a less biased estimator (sample) for the 
 152  standard deviation. 
144  153  
145  154  OUTPUT: 
146  155  
… 
… 

202  211  
203  212  def variance(v, bias=False): 
204  213  """ 
205   Returns the variance of the elements of 'v'. 
 214  Returns the variance of the elements of ``v`` 
206  215  
207  216  We define the variance of the empty list to be NaN, 
208  217  following the convention of MATLAB, Scipy, and R. 
209  218  
210  219  INPUT: 
211  220  
212    'v'  a list of numbers 
 221   ``v``  a list of numbers 
213  222  
214    bias  bool (default: False); if False, divide by 
215   len(v)  1 instead of len(v) 
216   to give a less biased estimator (sample) for the 
217   standard deviation. 
 223   ``bias``  bool (default: False); if False, divide by 
 224  len(v)  1 instead of len(v) 
 225  to give a less biased estimator (sample) for the 
 226  standard deviation. 
218  227  
219  228  OUTPUT: 
220  229  
… 
… 

244  253  841.66666666666663 
245  254  sage: variance(x, bias=True) 
246  255  833.25 
 256  sage: class MyClass: 
 257  ... def variance(self, bias = False): 
 258  ... return 1 
 259  sage: stats.variance(MyClass()) 
 260  1 
247  261  
248  262  
249  263  
250  264  """ 
 265  if hasattr(v, 'variance'): return v.variance(bias=bias) 
251  266  import numpy 
252  267  
253  268  x = 0 
… 
… 

257  272  return v.var() 
258  273  elif bias == False: 
259  274  return v.var(ddof=1) 
260   if hasattr(v, 'variance'): return v.variance(bias = bias) 
261  275  if len(v) == 0: 
262  276  # variance of empty set defined as NaN 
263  277  return NaN 
… 
… 

278  292  
279  293  def median(v): 
280  294  """ 
281   Return the median (middle value) of the elements of 'v' 
 295  Return the median (middle value) of the elements of ``v`` 
282  296  
283   If 'v' is empty, we define the median to be null. 
284   If 'v' is comprised of strings, TypeError occurs. 
285   For elements other than numbers, the median is a result of 'sorted()' 
 297  If ``v`` is empty, we define the median to be null. 
 298  If ``v`` is comprised of strings, TypeError occurs. 
 299  For elements other than numbers, the median is a result of ``sorted()`` 
286  300  
287  301  INPUT: 
288  302  
289    'v'  a list 
 303   ``v``  a list 
290  304  
291  305  OUTPUT: 
292  306  
293    median element of 'v' 
 307   median element of ``v`` 
294  308  
295  309  EXAMPLES:: 
296  310  
… 
… 

324  338  cut up into that number of bins. Then, the mean of each bin is 
325  339  calculated, and appended into a new list. 
326  340  
327   If 'v' is empty, we define the entries of the moving average to be NaN. 
 341  If ``v`` is empty, we define the entries of the moving average to be NaN. 
328  342  
329  343  INPUT: 
330  344  
331    v  a list 
 345   ``v``  a list 
332  346  
333    bins  number of bins, default set to 1 
 347   ``bins``  number of bins, default set to 1 
334  348  
335  349  OUTPUT: 
336  350  