# Ticket #7197: trac_7197_basic_stats.patch

File trac_7197_basic_stats.patch, 6.9 KB (added by amhou, 12 years ago)
• ## new file sage/stats/basic_stats.py

```# HG changeset patch
# User Andrew Hou <hou.andrew@gmail.com>
# Date 1256536506 25200
# Node ID 90a5eeacff8d573ab6cf5c90639ce95946f9a41f
# Parent  dd0601e1d263ffc5040535f605a2b69fb8d26eab
basic_stats.py now works!

diff -r dd0601e1d263 -r 90a5eeacff8d sage/stats/basic_stats.py```
 - from sage.rings.integer_ring import ZZ from sage.symbolic.constants import NaN from operator import itemgetter from sage.functions.other import sqrt import numpy def mean(v): """ Return the mean of the elements of `v`. We define the mean of the empty list to be NaN, following the convention of MATLAB, Scipy, and R. INPUT: - `v` -- a list of numbers OUTPUT: - a number EXAMPLES:: sage: mean([pi, e]) 1/2*pi + 1/2*e sage: mean([]) NaN sage: mean([I, sqrt(2), 3/5]) 1/3*sqrt(2) + 1/3*I + 1/5 sage: mean([RIF(1.0103,1.0103), RIF(2)]) 1.5051500000000000? sage: mean(range(4)) 3/2 """ if hasattr(v, 'mean'): return v.mean() if len(v) == 0: return NaN s = sum(v) if isinstance(s, (int,long)): # python integers are stupid. return s/ZZ(len(v)) return s/len(v) def mode(v): """ Return the mode (most common) of the elements of 'v' If 'v' is empty, we define the mode to be null. If all elements occur only once, we define the mode to be null. If multiple elements occur at the same frequency, all will be displayed. INPUT: - 'v' -- a list OUTPUT: - [(element, number_of_occurences)] EXAMPLES:: sage: mode([1,2,4,1,6,2,6,7,1]) [(1, 3)] sage: mode([]) [] sage: mode([1,2,3,4,5]) [] sage: mode(['sage', 4, I, 3/5, 'sage', pi]) [('sage', 2)] """ freq = {} if hasattr(v, 'mode'): return v.mode() for i in v: try: freq[i] += 1 except KeyError: freq[i] = 1 s = sorted(freq.items(), key=itemgetter(1), reverse=True) if not s or s[0][1]==1: # no mode if all occur equally often return [] else: return [i for i in s if i[1]==s[0][1]] def std(v, bias=False): """ Returns the standard deviation of the elements of 'v'. We define the standard deviation of the empty list to be NaN, following the convention of MATLAB, Scipy, and R. INPUT: - 'v' -- a list of numbers - bias -- bool (default: False); if False, divide by len(v) - 1 instead of len(v) to give a less biased estimator (sample) for the standard deviation. OUTPUT: - a number EXAMPLES:: sage: std([1..6], bias=True) 1/2*sqrt(35/3) sage: std([1..6], bias=False) sqrt(7/2) sage: std([e, pi]) sqrt(1/2)*sqrt((pi - e)^2) sage: std([]) NaN sage: std([I, sqrt(2), 3/5]) sqrt(1/450*(5*sqrt(2) + 5*I - 6)^2 + 1/450*(5*sqrt(2) - 10*I + 3)^2 + 1/450*(10*sqrt(2) - 5*I - 3)^2) sage: std([RIF(1.0103, 1.0103), RIF(2)]) 0.6998235813403261? """ x = 0 if type(v) is numpy.ndarray: # accounts for numpy arrays if bias == True: return v.std() elif bias == False: return v.std(ddof=1) if hasattr(v, 'std'): return v.std(bias=bias) if hasattr(v, 'standard_deviation'): return v.standard_deviation(bias=bias) if len(v) == 0: # standard deviation of empty set defined as NaN return NaN for i in range(len(v)): x += (v[i] - mean(v))**2 if bias == True: # population standard deviation if isinstance(x, (int,long)): return sqrt(x/ZZ(len(v))) return sqrt(x/len(v)) elif bias == False: # sample standard deviation if isinstance(x, (int,long)): return sqrt(x/ZZ(len(v))) return sqrt(x/(len(v)-1)) def variance(v, bias=False): """ Returns the variance of the elements of 'v'. We define the variance of the empty list to be NaN, following the convention of MATLAB, Scipy, and R. INPUT: - 'v' -- a list of numbers - bias -- bool (default: False); if False, divide by len(v) - 1 instead of len(v) to give a less biased estimator (sample) for the standard deviation. OUTPUT: - a number EXAMPLES:: sage: variance([1..6]) 7/2 sage: variance([1..6], bias=True) 35/12 sage: variance([e, pi]) 1/2*(pi - e)^2 sage: variance([]) NaN sage: variance([I, sqrt(2), 3/5]) 1/450*(5*sqrt(2) + 5*I - 6)^2 + 1/450*(5*sqrt(2) - 10*I + 3)^2 + 1/450*(10*sqrt(2) - 5*I - 3)^2 sage: variance([RIF(1.0103, 1.0103), RIF(2)]) 0.4897530450000000? """ x = 0 if type(v) == numpy.ndarray: # accounts for numpy arrays if bias == True: return v.var() elif bias == False: return v.var(ddof=1) if hasattr(v, 'variance'): return v.variance(bias = bias) if len(v) == 0: # variance of empty set defined as NaN return NaN for i in range(len(v)): x += (v[i] - mean(v))**2 if bias == True: # population variance if isinstance(x, (int,long)): return x/ZZ(len(v)) return x/len(v) elif bias == False: # sample variance if isinstance(x, (int,long)): return x/ZZ(len(v)) return x/(len(v)-1) def median(v): """ Return the median (middle value) of the elements of 'v' If 'v' is empty, we define the median to be null. If 'v' is comprised of strings, TypeError occurs. For elements other than numbers, the median is a result of 'sorted()' INPUT: - 'v' -- a list OUTPUT: - median element of 'v' EXAMPLES:: sage: median([1,2,3,4,5]) 3 sage: median([e, pi]) 1/2*pi + 1/2*e sage: median(['sage', 'linux', 'python']) 'python' sage: median([]) [] """ if hasattr(v, 'median'): return v.median() if len(v) == 0: #median of empty set defined as null return [] values = sorted(v) if len(values) % 2 == 1: return values[((len(values))+1)/2-1] else: lower = values[(len(values)+1)/2-1] upper = values[len(values)/2] return (lower + upper)/ZZ(2)