#! /usr/bin/env python
# -*- coding: utf8 -*-

"""
freq2.py
(C) 2005 by Damir Cavar <dcavar@indiana.edu>
GNU General Public License

Functionality: Counting words
"""

import sys, os.path, glob, string, codecs

from win32com.client import constants
import win32com.client


def countWords(words, filename):
   """Counts words in file and returns dictionary."""
   try:
      file = codecs.open(filename, "r", "utf8")
      tokens = [ string.strip(string.lower(i)) for i in file.read().split() ]
      for i in tokens:
         words[i] = words.get(i, 0) + 1
      file.close()
   except IOError:
      print "Cannot read from file:", filename
   return words


if __name__ == "__main__":
   words = {}
   speaker = win32com.client.Dispatch('SAPI.SpVoice')
   speaker.Speak('Computational linguistics is very cool')
   speaker.Speak('Loading file')
   for x in sys.argv[1:]:
      for y in glob.glob(os.path.normcase(x)):
         speaker.Speak(y)
         words = countWords(words, y)

   # sort the dictionary on frequency
   items = words.items()
   wordsort = [ [ v[1], v[0] ] for v in items ]
   wordsort.sort()
   wordsort.reverse()

   try:
      file = codecs.open("log.txt", "w", "utf8")
      file.write("word\tfrequency\n")
      speaker.Speak('Counting words finished')
      speaker.Speak('Saving frequency profile')
      for x in wordsort:
         file.write(x[1] + "\t" + str(x[0]) + "\n")
      file.close()
   except IOError:
      print "Output error."