#!/usr/bin/env python """ Filename: grammar.py Author: Damir Cavar Date: 19. Sept. 2005 (C) 2005 by Damir Cavar This code is free; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This is a simple implementation of a context free grammar parser that reads in files of the format: ----------- begin file example ----------- # my small example grammar S -> NP VP NP -> N NP -> Art N NP -> Art Adj N VP -> V VP -> V NP # lexical rules Art -> the Art -> a Adj -> green Adj -> big N -> dog N -> cat N -> mouse V -> chase V -> ignore ------------ end file example ------------ """ import sys class PSG: """ Grammar class: Internal data structures: LHS: dictionary with left-hand-side symbols as keys and a list of possible right-hand-sides as values. RHS: dictionary with right-hand-side symbol tuples as keys and a list of possible left-hand-sides. """ def __init__(self, filename): """Constructor.""" self.LHS = {} self.RHS = {} self.__read__(filename) def __str__(self): """Generates a string representation of the grammar such that the grammar is dumped in a phrase structure rule format.""" text = "" for i in self.LHS.keys(): # self.rules: if len(text) > 0: text += "\n" for x in self.LHS[i]: text += i + " -> " + " ".join(x) + "\n" return text def __read__(self, filename): """Read in a CFG and return a grammar representation. This is a hidden method.""" try: file = open(filename) for i in file.readlines(): i = i.split("#")[0].strip() # cut off comment string and strip if len(i) > 0: # rule line, expected -> somewhere tokens = i.split("->") if len(tokens) == 2: # we need exactly two tokens lhs = tokens[0].split() if len(lhs) == 1: # we need exactly one token on LHS rhs = tuple(tokens[1].split()) value = self.LHS.get(lhs[0], [ ]) if rhs not in value: value.append(rhs) self.LHS[lhs[0]] = value value = self.RHS.get(rhs, [ ]) if lhs[0] not in value: value.append(lhs[0]) self.RHS[rhs] = value file.close() except IOError: pass def getRHS(self, left): """Return the RHS for a LHS.""" return self.LHS.get(left, []) def getLHS(self, right): """Return LHS for a RHS.""" return self.RHS.get(right, []) if __name__ == "__main__": if len(sys.argv) > 1: myGrammar = PSG(sys.argv[1]) print myGrammar