Search form

Python: Statistics Calculators

Some statistics calculators written for Python 2.x (the str.format() method requires Python >= 2.6). Of course you could just use the numpy or statistics libraries, but where's the fun in that?

All solutions verified where possible by Wolfram Alpha or else by EasyCalculation.com.

import math

numList = [10, 12, 12, 34, 5, 25, 26, 8, 45, 55]
numList2 = [12, 45, 23, 9, 65, 67, 45, 34, 49, 22]


def mean(numList):
    """ Calculates the arithmetic mean of a list of numbers """

    finalMean = 0.0

    for num in numList:
        finalMean += num
    finalMean = finalMean / float(len(numList))
    return finalMean
    #print "{0:.4f}".format(finalMean)

mean(numList)


def weightedMean(numList):
    """ Calculates the weighted mean of a list of numbers """

    total = 0
    totalWeight = 0
    normalizedWeights = []

    # Set up some lists for our weights, weighted values, and weighted means
    weights = [1 + n for n in range(len(numList))]
    normalizedWeights = [0 for n in range(len(numList))]

    # Generate a total of all weights
    totalWeight = reduce(lambda y,x: x+y, weights)
    
    # Divide each weight by the sum of all weights
    for q, r in enumerate(weights):
        normalizedWeights[q] = r / float(totalWeight)
        
    # Add values of original numList multiplied by weighted values
    for q, r in enumerate(numList):
        total += r * normalizedWeights[q]
        
    #return total
    print "{0:.4f}".format(total)
    
weightedMean(numList)


def median(numList):
    """ Calculate the median of a list of numbers """
    """ The middle value in the set """

    tempList = sorted(numList)
    index = (len(tempList) - 1) // 2
    
    # If the set has an even number of entries, combine the middle two
    # Otherwise print the middle value    
    if len(tempList) % 2 == 0:
        print "{0:.4f}".format((tempList[index] + tempList[index + 1]) / 2.0)
    else:
        print tempList[index]

median(numList)


def mode(numList):
    """ Calculates the mode of a list of numbers """
    """ The mode is the most common value in a set """
     
    sortedCount = []
    # Set up a dictionary (we prefer unique keys) to hold our counts
    count = {}
    for num in numList:
        if num not in count.keys():
            count[num] = 1
        else:
            count[num] += 1

    # Simply print the mode (key with largest value)
    print max(count, key=count.get)
     
    # Sort the dictionary by values and print them    
    sortedCount = sorted(count.iteritems(), key = lambda(k,v):v, reverse=True)
    for k, v in sortedCount:
        # To print non-uniques only
        #if v > 1:
        print "{0}: {1}".format(k, v)
 
mode(numList)


def standardDeviation(numList):
    """ Calculate the (population) standard deviation of a mean """
    """ Distance of a given number from the mean """

    # Call our mean function defined above
    newMean = float(mean(numList))
    tempList = [0 for n in range(len(numList))]
    finalDeviation = 0

    # Create a temp data set with (each value minus the mean) squared
    for q, r in enumerate(numList):
        tempList[q] = float((numList[q]-newMean)**2)

    # Pass temp data set to mean() and return its square root
    finalDeviation = math.sqrt(float(mean(tempList)))
    
    #print "{0:.4f}".format(finalDeviation)
    return finalDeviation
    
standardDeviation(numList)   
    

def distributions(loadedDie):
    """ Variance and standard deviation of a probability distribution """
    """ Standard dev function above measures data set, not distribution """
    
    expectedValue = 0.0
    variance = 0.0
    standardDev = 0.0    
        
    # Get the expected value
    for q, r in enumerate(loadedDie):
        expectedValue += (1 + q) * r
        
    # Get the variance
    for q, r in enumerate(loadedDie):
        variance += ((1 + q)**2) * r
        
    variance = variance - expectedValue**2
    
    print "Variance: {0:.4f}".format(variance)
    print "Standard Dev: {0:.4f}".format(math.sqrt(variance))

# Simulate a loaded six-sided die where 5 and 6 are twice as likely
# Using 1.0 to force float conversion for Python 2, else import __future__.division
# Python 3 automatically converts fractions to float
loadedDie = [1.0/8, 1.0/8, 1.0/8, 1.0/8, 1.0/4, 1.0/4]
#normal die
#loadedDie = [1.0/6, 1.0/6, 1.0/6, 1.0/6, 1.0/6, 1.0/6]
distributions(loadedDie)


def covariance(numList, numList2):
    """ Measures the correlation of two variables """
    """ The mean of the products of the deviations of each data set from its mean """

    finalCovariance = 0.0

    # Find the product of the two result sets
    for q, r in enumerate(numList):
        finalCovariance += numList[q] * numList2[q]
    
    # Divide by size of sample set, then subtract mean1 * mean2
    finalCovariance /= len(numList)
    finalCovariance -= float(mean(numList)) * float(mean(numList2))
    
    return finalCovariance
    
covariance(numList, numList2)    
    

def correlation(numList, numList2):
    """ Compute the correlation coefficient """
    
    sum1 = 0.0
    sum2 = 0.0
    sum1Sq = 0.0
    sum2Sq = 0.0
    coeffFinal = 0.0
    
    for num in numList:
        sum1 += num
        sum1Sq += num**2
    
    for num2 in numList2:
        sum2 += num2
        sum2Sq += num2**2
    
    coeffFinal = len(numList)**2 * covariance(numList, numList2) / \
        math.sqrt(((len(numList) * sum1Sq) - (sum1**2)) * \
                                    ((len(numList) * sum2Sq) - (sum2**2)))
        
    print "{0:.4f}".format(coeffFinal)

correlation(numList, numList2)

Categories: