"""
readbrown1.py
(C) 2011 by Damir Cavar <dcavar@me.com>
See: Python 3 for (Computational and Corpus) Linguists
URL: http://www.cavar.me/damir/py4cl/
Process Brown corpus files.
Split single tokens with slash-annotated part-of-speech in tuples
of token and part-of-speech, count the tokens only, and generate
a relative frequency profile.
"""
import sys
from collections import Counter
def readBrownFile(filename, mycounters):
try:
ifp = open(filename, mode='r', encoding='utf8')
mycounters.update( ( x[0] for x in ( tuple(token.split("/")) for token in ifp.read().split() ) ) )
ifp.close()
except IOError:
print("Could not read file:", filename)
if __name__ == '__main__':
mycounters = Counter()
for i in sys.argv[1:]:
readBrownFile(i, mycounters)
total = sum(mycounters.values())
for i in mycounters:
print(i, mycounters[i]/total, sep="\t")