":"; exec mzscheme -r $0 "$@"
;;; ----------------------------------------------------
;;; Filename: mkstpwdlst.ss
;;; Author: Damir Cavar <dcavar@me.com>
;;;
;;; (C) 2006 by Damir Cavar
;;;
;;; This code is published under the restrictive GPL!
;;; Please find the text of the GPL here:
;;; http://www.gnu.org/licenses/gpl.txt
;;;
;;; It is free for use, change, etc. as long as the copyright
;;; note above is included in any modified version of the code.
;;;
;;; This script assumes that the text is raw and encoded in UTF8.
;;;
;;; Functions:
;;; 1. The text with stop-words is loaded into memory.
;;; 2. The text is tokenized, i.e. converted into a list of tokens.
;;; 3. A list of tokens is displayed, and can be redirected into
;;; a Scheme source file, see Usage.
;;;
;;; Usage:
;;; mzscheme -r mkstpwdlst.ss english.txt > english.ss
;;; ----------------------------------------------------
(require (lib "vector-lib.ss" "srfi" "43")) ; for vector-for-each
(require (lib "string.ss" "srfi" "13")) ; for string-tokenize
;;; load-file
;;; <- string filename
;;; -> string file content
;;; ----------------------------------------------------
;;; Load text from file into a string variable and return it.
(define load-file
(lambda (name)
(let* ((size (file-size name)))
(call-with-input-file name
(lambda (p)
(read-string size p))))))
(begin
(vector-for-each (lambda (i fname)
(let ([text (string-tokenize (load-file fname))])
(display "(define stopwords '")
(write text)
(display ")")
(newline)))
argv))