#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
# encoding: utf-8


"""
parsetei1.py
(C) 2011 by Damir Cavar <dcavar@me.com>

See: Python 3 for (Computational and Corpus) Linguists
URL: http://www.cavar.me/damir/py4cl/


The general idea is taken from Dive into Python 3, Chapter on XML
http://diveintopython3.ep.io/xml.html

Opens XML files that are encoded in TEI and prints some nodes and their content.
"""


import sys
import xml.etree.ElementTree as etree


def parseXML(file):
   if file[-4:] == ".xml":
      print(file)
      tree = etree.parse(file)
      root = tree.getroot()
      print(root)
      print(root.tag)
      print(root.attrib)
      paragraphs = list(root.iter("{http://www.tei-c.org/ns/1.0}p"))
      print(len(paragraphs))
      for p in paragraphs:
         print(p.text)
         parseS(p)


def parseS(root):
   for w in root:
      print(w.text, w.attrib.get("type", ""), sep="\\", end=" ")
   print("")


if __name__ == '__main__':
   for i in sys.argv[1:]:
      parseXML(i)