"""
parsetei1.py
(C) 2011 by Damir Cavar <dcavar@me.com>
See: Python 3 for (Computational and Corpus) Linguists
URL: http://www.cavar.me/damir/py4cl/
The general idea is taken from Dive into Python 3, Chapter on XML
http://diveintopython3.ep.io/xml.html
Opens XML files that are encoded in TEI and prints some nodes and their content.
"""
import sys
import xml.etree.ElementTree as etree
def parseXML(file):
if file[-4:] == ".xml":
print(file)
tree = etree.parse(file)
root = tree.getroot()
print(root)
print(root.tag)
print(root.attrib)
paragraphs = list(root.iter("{http://www.tei-c.org/ns/1.0}p"))
print(len(paragraphs))
for p in paragraphs:
print(p.text)
parseS(p)
def parseS(root):
for w in root:
print(w.text, w.attrib.get("type", ""), sep="\\", end=" ")
print("")
if __name__ == '__main__':
for i in sys.argv[1:]:
parseXML(i)