#!/usr/bin/env racket
#lang racket/base
(require srfi/1)
(require racket/file)
(require racket/cmdline)
(define rule-re (pregexp
(string-append
"([^-=>#,\\s]+(?:[-]+[^-=>#,\\s]+)*)"
"(?:\\s+(?:-+|=+)>\\s+)"
"([^-=>#,\\s]+(?:[-]+[^-=>#,\\s]+)*(?:\\s+[^-=>#,\\s]+(?:[-]+[^-=>#,\\s]+)*)*)"
"(?:\\s+#.*)?")))
(struct grammar (lhs lprhs terminals symbols) #:mutable #:transparent)
(define parse-grammar
(lambda (grammar-file grammar)
(for-each
(lambda (line)
(let ([lmatch (regexp-match rule-re line)])
(when (and lmatch
(= (length lmatch) 3))
(let ([lhs (string->symbol (list-ref lmatch 1))]
[rhs (map (lambda (token)
(string->symbol token))
(regexp-split #rx" +" (list-ref lmatch 2)))])
(hash-set! (grammar-symbols grammar) lhs #t)
(for-each
(lambda (token)
(hash-set! (grammar-terminals grammar) token #t))
rhs)
(let ([val (hash-ref (grammar-lhs grammar) lhs '())]
[rval (hash-ref (grammar-lprhs grammar) (list-ref rhs 0) '())])
(unless (member (list lhs rhs) rval)
(hash-set! (grammar-lprhs grammar) (list-ref rhs 0) (append rval (list (list lhs (cdr rhs))))))
(unless (member rhs val)
(hash-set! (grammar-lhs grammar) lhs (append val (list rhs)))))))))
(file->lines grammar-file))
(for-each
(lambda (key)
(hash-remove! (grammar-terminals grammar) key))
(hash-keys (grammar-symbols grammar)))
grammar))
(define new-grammar
(lambda (grammar-file)
(parse-grammar grammar-file (grammar (make-hash) (make-hash) (make-hash) (make-hash)))))
(define add2grammar
(lambda (grammar-file ogrammar)
(parse-grammar grammar-file ogrammar)))
(define verbose #f)
(define latex #f)
(define inactive-edge
(lambda (edge)
(>= (caddr edge) (length (list-ref edge 4)))))
(define-syntax-rule (active-edge edge)
(not (inactive-edge edge)))
(define terminal?
(lambda (mgrammar token)
(hash-ref (grammar-terminals mgrammar) token #f)))
(define symbol?
(lambda (mgrammar token)
(hash-ref (grammar-symbols mgrammar) token #f)))
(define rule-invocation
(lambda (mgrammar chart start)
(let ([res '()])
(for-each
(lambda (edge)
(cond [(inactive-edge edge)
(let ([rhss (hash-ref (grammar-lprhs mgrammar) (list-ref edge 3) '())])
(for-each
(lambda (x)
(let* ([newedge (list (car edge) (cadr edge) 1 (list-ref x 0)
(append (list (list-ref edge 3)) (list-ref x 1))
(append (list (list-index
(lambda (el)
(eq? el edge))
chart))))])
(unless (or (member newedge chart)
(member newedge res))
(when verbose
(printf "RI new edge: ~a\n" newedge))
(set! res (append res (list newedge))))))
rhss))]))
(drop chart start))
res)))
(define fundamental-rule
(lambda (chart)
(let ([res '()])
(for-each
(lambda (edge)
(cond [(active-edge edge)
(let ([expectation (list-ref (list-ref edge 4) (caddr edge))])
(for-each
(lambda (oe)
(cond [(inactive-edge oe)
(cond [(and (eq? expectation (list-ref oe 3))
(= (cadr edge) (car oe)))
(let ([newedge (list (car edge) (cadr oe)
(+ (caddr edge) 1)
(list-ref edge 3)
(list-ref edge 4)
(append (list-ref edge 5)
(list (list-index
(lambda (el)
(eq? el oe))
chart))))])
(unless (or (member newedge chart)
(member newedge res))
(when verbose
(printf "FR new edge: ~a\n" newedge))
(set! res (append res (list newedge)))))])]))
chart))]))
chart)
res)))
(define parse
(lambda (mgrammar tokens)
(let ([chart '()])
(let ([counter 0])
(for-each
(lambda (token)
(let ([vals (hash-ref (grammar-lprhs mgrammar) token '())])
(for-each (lambda (y)
(let ([edge (list counter (+ counter 1) 1 (list-ref y 0) (append (list token ) (list-ref y 1)) '())])
(unless (member edge chart)
(when verbose
(printf "Init new edge: ~a\n" edge))
(set! chart (append chart (list edge))))))
vals)
(set! counter (+ counter 1))))
tokens))
(let ([start 0]
[oldlen (length chart)])
(let loop ()
(let ([res (rule-invocation mgrammar chart start)])
(when (> (length res) 0)
(set! chart (append chart res))))
(let ([res (fundamental-rule chart)])
(when (> (length res) 0)
(set! chart (append chart res))))
(let ([nlen (length chart)])
(cond [(> nlen oldlen)
(begin
(set! oldlen nlen)
(loop))]))))
chart)))
(define serialize-chart
(lambda (mgrammar chart input (with-span #f))
(let ([end (length input)])
(for/list ((edge (in-list (reverse chart)))
#:when (and (inactive-edge edge)
(= (car edge) 0)
(= (cadr edge) end)))
(edge->list mgrammar chart edge with-span)))))
(define edge->list
(lambda (mgrammar chart edge (with-span #f))
(let ([analysis (list (list-ref edge 3))])
(let ([rule-counter 0])
(for-each
(lambda (token)
(if (terminal? mgrammar token)
(set! analysis (append analysis (list (list-ref (list-ref edge 4) 0))))
(begin
(set! analysis
(append analysis
(list (edge->list mgrammar chart
(list-ref chart
(list-ref (list-ref edge 5) rule-counter)) with-span))))
(set! rule-counter (add1 rule-counter)))))
(list-ref edge 4)))
analysis)))
(define (sublist l offset n)
(take (drop l offset) n))
(define main
(lambda ()
(let ([grammar-file ""]
[input-sentence ""])
(define sentence-to-parse
(command-line
#:program "charty"
#:once-each
[("-v" "--verbose") "Verbose mode" (set! verbose #t)]
[("-l" "--latex") "LaTeX Qtree output mode" (set! latex #t)]
[("-g" "--grammar") gf "Grammar file name" (set! grammar-file gf)]
[("-s" "--sentence") is "Input sentence" (set! input-sentence is)]))
(when (and (> (string-length grammar-file) 0)
(> (string-length input-sentence) 0))
(let ([mygrammar (new-grammar grammar-file)]
[input (map (lambda (t) (string->symbol t)) (regexp-split #rx" +" input-sentence))])
(let ([chart (parse mygrammar input)])
(let ([counter 0])
(for-each (lambda (x)
(set! counter (+ counter 1))
(printf "Parse ~a: ~a\n" counter x))
(serialize-chart mygrammar chart input #f)))))))))
(main)