"""
readbrown2.py
(C) 2011 by Damir Cavar <dcavar@me.com>
See: Python 3 for (Computational and Corpus) Linguists
URL: http://www.cavar.me/damir/py4cl/
Process Brown corpus files.
Split single tokens with slash-annotated part-of-speech in tuples
of token and part-of-speech, count the PoS-tags only, and generate
a relative frequency profile.
"""
import sys
from collections import Counter
def readBrownFile(filename, mycounters):
tokenlist = []
try:
ifp = open(filename, mode='r', encoding='utf8')
mycounters.update( ( x[1] for x in ( tuple(token.split("/")) for token in ifp.read().split() ) ) )
ifp.close()
except IOError:
print("Could not read file:", filename)
if __name__ == '__main__':
mycounters = Counter()
for i in sys.argv[1:]:
readBrownFile(i, mycounters)
total = sum(mycounters.values())
for i in mycounters:
print(i, mycounters[i]/total, sep="\t")