diff options
Diffstat (limited to 'ps01_grep/regexp.scm')
-rw-r--r-- | ps01_grep/regexp.scm | 230 |
1 files changed, 230 insertions, 0 deletions
diff --git a/ps01_grep/regexp.scm b/ps01_grep/regexp.scm new file mode 100644 index 0000000..a7f2305 --- /dev/null +++ b/ps01_grep/regexp.scm @@ -0,0 +1,230 @@ +;;;; Scheme Regular Expression Language Implementation -- regexp.scm + +(define (r:dot) ".") + +(define (r:bol) "^") + +(define (r:eol) "$") + +(define (r:quote string) + (r:seq + (call-with-output-string ; see RefMan section 14.3 + (lambda (port) + (let ((end (string-length string))) + (do ((i 0 (+ i 1))) ((not (< i end))) ; see RefMan 2.9 + (let ((c (string-ref string i))) + (if (or (char=? c #\.) + (char=? c #\[) + (char=? c #\\) + (char=? c #\^) + (char=? c #\$) + (char=? c #\*)) + (write-char #\\ port)) + (write-char c port)))))))) + +(define (r:char-from char-set) ; see RefMan section 5.6 + (let ((members (char-set-members char-set))) + (cond ((not (pair? members)) + (r:seq)) + ((not (pair? (cdr members))) + (r:quote (string (car members)))) + (else + (%char-from #f members))))) + +(define (r:char-not-from char-set) + (%char-from #t (char-set-members char-set))) + +(define (%char-from negate? members) + (let ((right? (memv #\] members)) + (caret? (memv #\^ members)) + (hyphen? (memv #\- members)) + (others + (delete-matching-items members + (lambda (c) + (or (char=? c #\]) + (char=? c #\^) + (char=? c #\-)))))) + (if (and caret? + hyphen? + (not right?) + (not negate?) + (null? others)) + "[-^]" + (string-append "[" + (if negate? "^" "") + (if right? "]" "") + (list->string others) + (if caret? "^" "") + (if hyphen? "-" "") + "]")))) + +;;; Means of combination for patterns + +(define (r:seq . exprs) + (string-append "\\(" (apply string-append exprs) "\\)")) + +(define (r:alt . exprs) + (if (pair? exprs) + (apply r:seq + (cons (car exprs) + (append-map (lambda (expr) + (list "\\|" expr)) + (cdr exprs)))) + (r:seq))) + +(define (r:repeat min max expr) + (if (not (exact-nonnegative-integer? min)) + (error "Min must be non-negative integer:" min)) + (if max + (begin + (if (not (exact-nonnegative-integer? max)) + (error "Max must be non-negative integer:" max)) + (if (not (<= min max)) + (error "Min not less than max:" min max)))) + (cond ((not max) + (apply r:seq + (append (make-list min expr) + (list expr "*")))) + ((= max min) + (apply r:seq (make-list min expr))) + (else + (apply r:seq + (append (make-list min expr) + (make-list (- max min) + (r:seq expr "\\|"))))))) + +;;; The following magic allows a program in MIT/GNU Scheme to call the +;;; grep system utility, returning the list of grep output lines to +;;; the caller. You can make similar mechanisms to call other system +;;; utilities. + +(load-option 'synchronous-subprocess) + + +(define (r:grep expr filename) + (r:grep-like "grep" '() expr filename)) + +(define (r:egrep expr filename) + (if (eq? microcode-id/operating-system 'nt) + (r:grep-like "grep" '("-E") expr filename) + (r:grep-like "egrep" '() expr filename))) + +(define (r:grep-like program options expr filename) + (let ((port (open-output-string))) + (and (= (run-synchronous-subprocess program + (append options + (list "-e" expr (->namestring filename))) + 'output port) + 0) + (r:split-lines (get-output-string port))))) + +(define (r:split-lines string) + (reverse + (let ((end (string-length string))) + (let loop ((i 0) (lines '())) + (if (< i end) + (let ((j + (substring-find-next-char string i end #\newline))) + (if j + (loop (+ j 1) + (cons (substring string i j) lines)) + (cons (substring string i end) lines))) + lines))))) + +#| +;;; An alternate implementation using MIT/GNU Scheme's internal +;;; regular-expression interpreter. + +(define (r:grep expr filename) + (call-with-input-file filename + (lambda (port) + (let loop ((lines '())) + (let ((line (read-line port))) + (if (eof-object? line) + (reverse lines) + (loop (if (re-string-search-forward expr line #f) + (cons line lines) + lines)))))))) +|# + +#| +;;; For example... + +;;; Note, the result of the next two requests were not in this file +;;; when the requests were made! + +(pp (r:grep (r:quote "r:sex") "regexp.scm")) +("(pp (r:grep (r:quote \"r:sex\") \"regexp.scm\"))") +;Unspecified return value + +(pp (r:grep (r:quote "r:seq") "regexp.scm")) +(" (r:seq" + "\t (r:seq))" + "(define (r:seq . exprs)" + " (apply r:seq" + " (r:seq)))" + "\t (apply r:seq" + "\t (apply r:seq (make-list min expr)))" + "\t (apply r:seq" + "\t\t\t\t (r:seq expr \"\\\\|\")))))))" + "(pp (r:grep (r:quote \"r:seq\") \"regexp.scm\"))" + "(pp (r:grep (r:seq (r:quote \"a\") (r:dot) (r:quote \"c\")) \"tests.txt\"))" + " (r:grep (r:seq \" \"" + " (r:seq (r:bol)") +;Unspecified return value + +(pp (r:grep (r:seq (r:quote "a") (r:dot) (r:quote "c")) "tests.txt")) +("[00]. abc" + "[01]. aac" + "[02]. acc" + "[03]. zzzaxcqqq" + "[10]. catcatdogdog" + "[12]. catcatcatdogdogdog") +;Unspecified return value + +;;; And... + +(pp (r:grep (r:alt (r:quote "foo") (r:quote "bar") (r:quote "baz")) + "tests.txt")) +("[05]. foo" "[06]. bar" "[07]. foo bar baz quux") +;Unspecified return value + + +(pp (r:grep (r:repeat 3 5 (r:alt (r:quote "cat") (r:quote "dog"))) + "tests.txt")) +("[09]. catdogcat" + "[10]. catcatdogdog" + "[11]. dogdogcatdogdog" + "[12]. catcatcatdogdogdog" + "[13]. acatdogdogcats" + "[14]. ifacatdogdogs" + "[15]. acatdogdogsme") +;Unspecified return value + +(pp + (r:grep (r:seq " " + (r:repeat 3 5 (r:alt (r:quote "cat") (r:quote "dog"))) + (r:eol)) + "tests.txt")) +("[09]. catdogcat" "[10]. catcatdogdog" "[11]. dogdogcatdogdog") +;Unspecified return value + +(pp + (r:grep + (let ((digit + (r:char-from (string->char-set "0123456789")))) + (r:seq (r:bol) + (r:quote "[") + digit + digit + (r:quote "]") + (r:quote ".") + (r:quote " ") + (r:char-from (char-set #\a #\b)) + (r:repeat 3 5 (r:alt "cat" "dog")) + (r:char-not-from (char-set #\d #\e #\f)) + (r:eol))) + "tests.txt")) +("[13]. acatdogdogcats") +;Unspecified return value +|#
\ No newline at end of file |