Ticket #7197: trac_7197_part_7.patch

File trac_7197_part_7.patch, 7.2 KB (added by amhou, 12 years ago)
  • sage/stats/basic_stats.py

    # HG changeset patch
    # User Andrew Hou <amhou@uw.edu>
    # Date 1258418752 28800
    # Node ID faf0b8837f76ed1e651646126bfc09ec42909904
    # Parent  9ee1085d76f4763b648cda04791f4b5bc45f9b6a
    Fixed documentation.
    
    diff -r 9ee1085d76f4 -r faf0b8837f76 sage/stats/basic_stats.py
    a b  
    11"""
    22Basic Statistics
    33
    4 This file contains basic descriptive functions. Included as of 11/06/2009
    5 are the mean, median, mode, moving average, standard deviation, and the
    6 variance. When calling a function on data, there are checks for functions
    7 already defined for that data type.
     4This file contains basic descriptive functions. Included are the mean,
     5median, mode, moving average, standard deviation, and the variance.
     6When calling a function on data, there are checks for functions already
     7defined for that data type.
    88
    9 The 'mean' function returns the arithmetic mean (the sum of all the members
     9The ``mean`` function returns the arithmetic mean (the sum of all the members
    1010of a list, divided by the number of members). Further revisions may include
    11 the geometric and harmonic mean. The 'median' function returns the number
    12 separating the higher half of a sample from the lower half. The 'mode'
    13 returns the most common occuring member of a sample. The 'moving average'
    14 is a finite impulse response filter, creating a series of averages using a
    15 user-defined number of subsets of the full data set. The 'standard deviation'
    16 and the 'variance' return a measurement of how far data points tend to be
    17 from the arithmetic mean.
     11the geometric and harmonic mean. The ``median`` function returns the number
     12separating the higher half of a sample from the lower half. The ``mode``
     13returns the most common occuring member of a sample, plus the number of times
     14it occurs. If entries occur equally common, a list of the most common  entries
     15are returned. The ``moving average`` is a finite impulse response filter,
     16creating a series of averages using a user-defined number of subsets of the
     17full data set. The ``standard deviation`` and the ``variance`` return a
     18measurement of how far data points tend to be from the arithmetic mean.
    1819
    19 Functions have also been imported under the namespace 'stats'.
     20Functions are available in the namespace ``stats``, i.e. you can use them by
     21typing ``stats.mean``, ``stats.median``, etc.
    2022
    2123
    2224AUTHOR:
     25
    2326    - Andrew Hou (11/06/2009)
    2427 
    2528"""
     
    3841
    3942def mean(v):
    4043    """
    41     Return the mean of the elements of `v`.
     44    Return the mean of the elements of ``v``.
    4245
    4346    We define the mean of the empty list to be NaN, following the
    4447    convention of MATLAB, Scipy, and R.
    4548
    4649    INPUT:
    4750
    48         - `v` -- a list of numbers
     51        - ``v`` -- a list of numbers
    4952
    5053    OUTPUT:
    5154
     
    8184
    8285def mode(v):
    8386    """
    84     Return the mode (most common) of the elements of 'v'
     87    Return the mode (most common) of the elements of ``v``
    8588   
    86     If 'v' is empty, we define the mode to be null.
     89    If ``v`` is empty, we define the mode to be null.
    8790    If all elements occur only once, we define the mode to be null.
    8891    If multiple elements occur at the same frequency, all will be
    8992    displayed.
     
    9194   
    9295    INPUT:
    9396
    94         - 'v' -- a list
     97        - ``v`` -- a list
    9598
    9699    OUTPUT:
    97100
     
    107110        []
    108111        sage: mode(['sage', 4, I, 3/5, 'sage', pi])
    109112        [('sage', 2)]
     113        sage: class MyClass:
     114        ...     def mode(self):
     115        ...         return 1
     116        sage: stats.mode(MyClass())
     117        1
    110118    """
     119    if hasattr(v, 'mode'): return v.mode()
    111120    from operator import itemgetter
    112 
     121   
    113122    freq = {}
    114123    for i in v:
    115124        try:
     
    128137
    129138def std(v, bias=False):
    130139    """
    131     Returns the standard deviation of the elements of 'v'.
     140    Returns the standard deviation of the elements of ``v``
    132141
    133142    We define the standard deviation of the empty list to be NaN,
    134143    following the convention of MATLAB, Scipy, and R.
    135144
    136145    INPUT:
    137146
    138         - 'v' -- a list of numbers
     147        - ``v`` -- a list of numbers
    139148       
    140         - bias -- bool (default: False); if False, divide by
    141                   len(v) - 1 instead of len(v)
    142                   to give a less biased estimator (sample) for the
    143                   standard deviation.
     149        - ``bias`` -- bool (default: False); if False, divide by
     150                      len(v) - 1 instead of len(v)
     151                      to give a less biased estimator (sample) for the
     152                      standard deviation.
    144153
    145154    OUTPUT:
    146155       
     
    202211
    203212def variance(v, bias=False):
    204213    """
    205     Returns the variance of the elements of 'v'.
     214    Returns the variance of the elements of ``v``
    206215
    207216    We define the variance of the empty list to be NaN,
    208217    following the convention of MATLAB, Scipy, and R.
    209218
    210219    INPUT:
    211220
    212         - 'v' -- a list of numbers
     221        - ``v`` -- a list of numbers
    213222       
    214         - bias -- bool (default: False); if False, divide by
    215                   len(v) - 1 instead of len(v)
    216                   to give a less biased estimator (sample) for the
    217                   standard deviation.
     223        - ``bias`` -- bool (default: False); if False, divide by
     224                      len(v) - 1 instead of len(v)
     225                      to give a less biased estimator (sample) for the
     226                      standard deviation.
    218227
    219228    OUTPUT:
    220229       
     
    244253        841.66666666666663
    245254        sage: variance(x, bias=True)
    246255        833.25
     256        sage: class MyClass:
     257        ...     def variance(self, bias = False):
     258        ...        return 1   
     259        sage: stats.variance(MyClass())
     260        1
    247261
    248262
    249263
    250264    """
     265    if hasattr(v, 'variance'): return v.variance(bias=bias)
    251266    import numpy
    252267
    253268    x = 0
     
    257272            return v.var()
    258273        elif bias == False:
    259274            return v.var(ddof=1)
    260     if hasattr(v, 'variance'): return v.variance(bias = bias)
    261275    if len(v) == 0:
    262276        # variance of empty set defined as NaN
    263277        return NaN
     
    278292
    279293def median(v):
    280294    """
    281     Return the median (middle value) of the elements of 'v'
     295    Return the median (middle value) of the elements of ``v``
    282296
    283     If 'v' is empty, we define the median to be null.
    284     If 'v' is comprised of strings, TypeError occurs.
    285     For elements other than numbers, the median is a result of 'sorted()'
     297    If ``v`` is empty, we define the median to be null.
     298    If ``v`` is comprised of strings, TypeError occurs.
     299    For elements other than numbers, the median is a result of ``sorted()``
    286300
    287301    INPUT:
    288302
    289         - 'v' -- a list
     303        - ``v`` -- a list
    290304
    291305    OUTPUT:
    292306
    293         - median element of 'v'
     307        - median element of ``v``
    294308
    295309    EXAMPLES::
    296310
     
    324338    cut up into that number of bins. Then, the mean of each bin is
    325339    calculated, and appended into a new list.
    326340
    327     If 'v' is empty, we define the entries of the moving average to be NaN.
     341    If ``v`` is empty, we define the entries of the moving average to be NaN.
    328342
    329343    INPUT:
    330344 
    331         - v -- a list
     345        - ``v`` -- a list
    332346
    333         - bins -- number of bins, default set to 1
     347        - ``bins`` -- number of bins, default set to 1
    334348
    335349    OUTPUT:
    336350