":"; exec mzscheme -r $0 "$@"
;;; ----------------------------------------------------
;;; Filename: readgrammar1.ss
;;; Author: Damir Cavar <dcavar@me.com>
;;;
;;; (C) 2006 by Damir Cavar
;;;
;;; This code is published under the restrictive GPL!
;;; Please find the text of the GPL here:
;;; http://www.gnu.org/licenses/gpl.txt
;;;
;;; It is free for use, change, etc. as long as the copyright
;;; note above is included in any modified version of the code.
;;;
;;; This script assumes that the text is raw and encoded in UTF8.
;;;
;;; Functions:
;;; 1. Reads a grammar file from files.
;;; 2. Tokenizes each line and reads in left- and right-hand-side
;;; into hash-tables.
;;;
;;; If the command line parameters contain more than one text/grammar file,
;;; the above results are accumulated over all the input files into one hash-table.
;;;
;;; The grammar files have one CFG-rule per line:
;;; S -> NP VP
;;;
;;; Comment lines start with a hash-mark #.
;;;
;;; Empty lines are allowed.
;;;
;;; Usage:
;;; mzscheme -r readgrammar1.ss grammar1.txt grammar2.txt ...
;;; ----------------------------------------------------
(require (lib "string.ss" "srfi" "13"))
(define lhs (make-hash-table 'equal))
(define rhs (make-hash-table 'equal))
;;; reading from file line by line
(define fold-lines-in-file
(lambda (filename proc init . mode)
(with-input-from-file filename
(lambda ()
(apply fold-lines proc init (current-input-port) mode)))))
(define fold-lines
(lambda (proc init . port+mode)
(let while ((accum init))
(let ([line (apply read-line port+mode)])
(if (eof-object? line) accum
(while (proc line)))))))
;;; parsing the grammar string
(define parse-rule!
(lambda (line)
(if (< 0 (string-length line))
(unless (eq? (string-ref line 0) #\#)
(let ([tokens (string-tokenize line)])
(if (string= (cadr tokens) "->")
(let* ([lhst (string->symbol (car tokens))]
[rhst (map (lambda (x)
(string->symbol x)) (cddr tokens))]
[lhstbl (hash-table-get lhs lhst (make-hash-table 'equal))]
[lhsval (hash-table-get lhstbl rhst 0.0)]
[rhstbl (hash-table-get rhs rhst (make-hash-table 'equal))]
[rhsval (hash-table-get rhstbl lhst 0.0)])
;
(if (= lhsval 0.0)
(hash-table-put! lhstbl rhst 1.0)
(hash-table-put! lhstbl rhst (+ rhsval 1)))
;
(if (= rhsval 0.0)
(hash-table-put! rhstbl lhst 1.0)
(hash-table-put! rhstbl lhst (+ lhsval 1)))
(hash-table-put! lhs lhst lhstbl)
(hash-table-put! rhs rhst rhstbl))))))))
;(let loop ([i 0])
; (unless (>= i (vector-length argv))
; (fold-lines-in-file (vector-ref argv i)
; (lambda (line)
; (parse-rule! (string-trim-both line)))
; 1)
; (hash-table-for-each lhs
; (lambda (key value)
; (hash-table-for-each value (lambda (vkey vval)
; (printf "~a\t->\t~a\n" key vkey)))))
; (loop (+ i 1))))