#!/usr/bin/env python
# -*- coding: UTF-8 -*-

(C) 2007-2011 by Damir Cavar <dcavar@me.com>

This code is published under the GNU General Public Licence Version 3:
Make sure you understand the license terms before doing anything with
this code.

The code generates an FSA from a list of words and generate a DOT file
(for visualization in for example Graphviz).

The file FSA.py is required. Make sure it is in the path or next to

Run with Python, having a list of words in a text-file "wordlist.txt":

python Wlist2RE.py wordlist.txt

The output will be written in the file out.dot. This file can be opened in
Graphviz for visualization.

or make Wlist2RE.py executable.

The word list is assumed to be encoded in UTF-8. This can be changed in the
code below.


For the DOT language:

For Graphviz:


__author__    = "Damir Cavar <dcavar@me.hr>"
__credits__ = ""
__version__   = "$Revision: 1.0 $"
__date__      = "$Date: 2008/01/13 13:20:01 $"
__copyright__ = "Copyright (c) 2007-2011 Damir ─ćavar"
__license__   = "GPLv3"

import sys, re, codecs
from FSA import FSA

finals = u"#__0__FS"

def makeFSA(wlist):
+--136 lines: """Returns a non-deterministic minimal automaton incrementally generated from a word list."""

def makeDOT(myFSA):
+-- 10 lines: """Return DOT representation for graphviz."""-----------------------------------------

def makePrefixList(wlist):
    # print wlist
+-- 15 lines: prefdict = {}-------------------------------------------------------------------------

def loadWlist(fname):
+-- 14 lines: """Return a list of words from a text file (in UTF-8 encoding).-----------------------

if __name__ == "__main__":
+--  7 lines: for i in sys.argv[1:]:----------------------------------------------------------------
        # test some word in the FSA (accepting)
        # print myFSA.accept("pospavaju")