#! /usr/bin/env python

"""
EM2.py
(C) 2005 by Damir Cavar
GNU General Public License

Expectation Maximization clustering
"""

import math, operator

income = {}
income["Josh"] = 1550.0
income["Tossi"] = 1460.0
income["Amber"] = 1278.0
income["Debbie"] = 3400.0
income["Paul"] = 5430.0
income["Giancarlo"] = 5540.0

group = {}
probability = {}


# Gaussians: mean, deviation
Gaussians = [(1200.0, 200.0), (5000.0, 300.0)]


def GaussianPDF(value, G):
   return (1/float(math.sqrt(2 * math.pi * G[1]))) * math.e**(-1 * (value - G[0])**2 / (2 * G[1]**2))
   # operator.div(1.0, val)


def expectation():
   global group, probability, Gaussians
   for x in income.keys():
      p_g1 = GaussianPDF(income[x], Gaussians[0])
      p_g2 = GaussianPDF(income[x], Gaussians[1])
      print x,
      if p_g1 > p_g2:
         print "G1", p_g1
         group[x] = 0
         probability[x] = p_g1
      else:
         print "G2", p_g2
         group[x] = 1
         probability[x] = p_g2


def maximization():
   global group, Gaussians, probability
   for x in range(len(Gaussians)):
      probs = 0.0
      posprobs = 0.0
      deviations = 0.0
      for i in group.keys():
         if group[i] == x:
            probs += probability[i]
            posprobs += (probability[i] * income[i])
      mean = math.floor(posprobs/probs)
      for i in group.keys():
         if group[i] == x:
            deviations += probability[i] * (mean - income[i])**2
      deviations = math.floor(math.sqrt(deviations / probs))
      Gaussians[x] = (mean, deviations)

print "Data:"
print income

print "Initialization:"
print Gaussians

for x in range(5):
   expectation()
   maximization()
   print "Iteration:", x
   print Gaussians
   print probability, "\n"