"""
Wlist2RE.py
(C) 2007-2011 by Damir Cavar <dcavar@me.com>
This code is published under the GNU General Public Licence Version 3:
http://www.gnu.org/licenses/gpl-3.0.html
Make sure you understand the license terms before doing anything with
this code.
The code generates an FSA from a list of words and generate a DOT file
(for visualization in for example Graphviz).
The file FSA.py is required. Make sure it is in the path or next to
Wlist2RE.py.
Run with Python, having a list of words in a text-file "wordlist.txt":
python Wlist2RE.py wordlist.txt
The output will be written in the file out.dot. This file can be opened in
Graphviz for visualization.
or make Wlist2RE.py executable.
The word list is assumed to be encoded in UTF-8. This can be changed in the
code below.
Links:
For the DOT language:
http://www.graphviz.org/doc/info/lang.html
For Graphviz:
http://www.graphviz.org/
"""
__author__ = "Damir Cavar <dcavar@me.hr>"
__credits__ = ""
__version__ = "$Revision: 1.0 $"
__date__ = "$Date: 2008/01/13 13:20:01 $"
__copyright__ = "Copyright (c) 2007-2011 Damir Ćavar"
__license__ = "GPLv3"
import sys, re, codecs
from FSA import FSA
finals = u"#__0__FS"
def makeFSA(wlist):
+--136 lines: """Returns a non-deterministic minimal automaton incrementally generated from a word list."""
def makeDOT(myFSA):
+-- 10 lines: """Return DOT representation for graphviz."""-----------------------------------------
def makePrefixList(wlist):
+-- 15 lines: prefdict = {}-------------------------------------------------------------------------
def loadWlist(fname):
+-- 14 lines: """Return a list of words from a text file (in UTF-8 encoding).-----------------------
if __name__ == "__main__":
+-- 7 lines: for i in sys.argv[1:]:----------------------------------------------------------------