#! /usr/bin/env python

"""
chi2.py
(C) 2004 by Damir Cavar <dcavar@indiana.edu>

License:

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

http://www.gnu.org/licenses/gpl.txt


chi2-test

chi2-value = sum over (Observation - Expectation)^2/Expectation

Observation = found frequency
Expectation = expected frequency
"""

from math import pow

class Chi2:
   # Probability             
   probs = (0.99, 0.95, 0.05, 0.01, 0.001)
   # by df
   table = {
      1:(0, 0.004, 3.84, 6.64, 10.83),
      2:(0.02, 0.103, 5.99, 9.21, 13.82),
      3:(0.115, 0.352, 7.82, 11.35, 16.27),
      4:(0.297, 0.711, 9.49, 13.28, 18.47),
      5:(0.554, 1.145, 11.07, 15.09, 20.52),
      6:(0.872, 1.635, 12.59, 16.81, 22.46),
      7:(1.239, 2.167, 14.07, 18.48, 24.32),
      8:(1.646, 2.733, 15.51, 20.09, 26.13),
      9:(2.088, 3.325, 16.92, 21.67, 27.88),
      10:(2.558, 3.94, 18.31, 23.21, 29.59),
      11:(3.05, 4.58, 19.68, 24.73, 31.26),
      12:(3.57, 5.23, 21.03, 26.22, 32.91),
      13:(4.11, 5.89, 22.36, 27.69, 34.53),
      14:(4.66, 6.57, 23.69, 29.14, 36.12),
      15:(5.23, 7.26, 25, 30.58, 37.7),
      16:(5.81, 7.96, 26.3, 32, 39.25),
      17:(6.41, 8.67, 27.59, 33.41, 40.79),
      18:(7.02, 9.39, 28.87, 34.81, 42.31),
      19:(7.63, 10.12, 30.14, 36.19, 43.82),
      20:(8.26, 10.85, 31.41, 37.57, 45.32),
      21:(8.9, 11.59, 32.67, 38.93, 46.8),
      22:(9.54, 12.34, 33.92, 40.29, 48.27),
      23:(10.2, 13.09, 35.17, 41.64, 49.73),
      24:(10.86, 13.85, 36.42, 42.98, 51.18),
      25:(11.52, 14.61, 37.65, 44.31, 52.62),
      26:(12.2, 15.38, 38.89, 45.64, 54.05),
      27:(12.88, 16.15, 40.11, 46.96, 55.48),
      28:(13.57, 16.93, 41.34, 48.28, 56.89),
      29:(14.26, 17.71, 42.56, 49.59, 58.3),
      30:(14.95, 18.49, 43.77, 50.89, 59.7) }


   def getChi2Value(self, sample, expectation):
      """Calculate the t-test value for a sample."""
      chi2 = 0.0
      for x in range(len(sample)):
         for i in range(len(sample[0])):
            chi2 += pow(float(sample[x][i] - expectation[x][i]), 2) / float(expectation[x][i])
      return chi2


   def getDF(self, sample):
      """Returns the degree of freedom."""
      return (len(sample) - 1) * (len(sample[0]) - 1)


   def getSignificance(self, chi2value, df):
      """Returns the probability = significance value for the tvalue, given the df."""
      if df > len(self.table.keys()):
         df = max(self.table.keys())
      scores = self.table[df]
      for i in range(len(scores)):
         if scores[i] > chi2value:
            i = i - 1
            break
      if i == -1:
         return 1.0
      else:
         return self.probs[i]


   def isSignificant(self, sample, expectation, level):
      """Returns the significance of the difference between two samples."""
      val = self.getSignificance(self.getChi2Value(sample, expectation), self.getDF(sample))
      if val <= level:
         return True
      return False


if __name__ == "__main__":
   # some example
   sample = [(161, 59, 58), (23, 21, 29)]
   expectation = [(60, 60, 60), (20, 20, 20)] # H0 = equal proportions

   myChi2 = Chi2()
   df = myChi2.getDF(sample)
   chi2 = myChi2.getChi2Value(sample, expectation)
   significance = myChi2.getSignificance(chi2, df)
   print "Sample", sample
   print "Expectation = H0:", expectation
   print "chi2-test:\nchi2 =", chi2
   print "df =", df
   print "Is significant: (0.05)", myChi2.isSignificant(sample, expectation, 0.05)
   # print "Alpha level exceeded", significance