;;;; Scheme Regular Expression Language Implementation -- regexp.scm (define (r:dot) ".") (define (r:bol) "^") (define (r:eol) "$") (define (r:quote string) (r:seq (call-with-output-string ; see RefMan section 14.3 (lambda (port) (let ((end (string-length string))) (do ((i 0 (+ i 1))) ((not (< i end))) ; see RefMan 2.9 (let ((c (string-ref string i))) (if (or (char=? c #\.) (char=? c #\[) (char=? c #\\) (char=? c #\^) (char=? c #\$) (char=? c #\*)) (write-char #\\ port)) (write-char c port)))))))) (define (r:char-from char-set) ; see RefMan section 5.6 (let ((members (char-set-members char-set))) (cond ((not (pair? members)) (r:seq)) ((not (pair? (cdr members))) (r:quote (string (car members)))) (else (%char-from #f members))))) (define (r:char-not-from char-set) (%char-from #t (char-set-members char-set))) (define (%char-from negate? members) (let ((right? (memv #\] members)) (caret? (memv #\^ members)) (hyphen? (memv #\- members)) (others (delete-matching-items members (lambda (c) (or (char=? c #\]) (char=? c #\^) (char=? c #\-)))))) (if (and caret? hyphen? (not right?) (not negate?) (null? others)) "[-^]" (string-append "[" (if negate? "^" "") (if right? "]" "") (list->string others) (if caret? "^" "") (if hyphen? "-" "") "]")))) ;;; Means of combination for patterns (define (r:seq . exprs) (string-append "\\(" (apply string-append exprs) "\\)")) (define (r:alt . exprs) (if (pair? exprs) (apply r:seq (cons (car exprs) (append-map (lambda (expr) (list "\\|" expr)) (cdr exprs)))) (r:seq))) (define (r:repeat min max expr) (if (not (exact-nonnegative-integer? min)) (error "Min must be non-negative integer:" min)) (if max (begin (if (not (exact-nonnegative-integer? max)) (error "Max must be non-negative integer:" max)) (if (not (<= min max)) (error "Min not less than max:" min max)))) (cond ((not max) (apply r:seq (append (make-list min expr) (list expr "*")))) ((= max min) (apply r:seq (make-list min expr))) (else (apply r:seq (append (make-list min expr) (make-list (- max min) (r:seq expr "\\|"))))))) ;;; The following magic allows a program in MIT/GNU Scheme to call the ;;; grep system utility, returning the list of grep output lines to ;;; the caller. You can make similar mechanisms to call other system ;;; utilities. (load-option 'synchronous-subprocess) (define (r:grep expr filename) (r:grep-like "grep" '() expr filename)) (define (r:egrep expr filename) (if (eq? microcode-id/operating-system 'nt) (r:grep-like "grep" '("-E") expr filename) (r:grep-like "egrep" '() expr filename))) (define (r:grep-like program options expr filename) (let ((port (open-output-string))) (and (= (run-synchronous-subprocess program (append options (list "-e" expr (->namestring filename))) 'output port) 0) (r:split-lines (get-output-string port))))) (define (r:split-lines string) (reverse (let ((end (string-length string))) (let loop ((i 0) (lines '())) (if (< i end) (let ((j (substring-find-next-char string i end #\newline))) (if j (loop (+ j 1) (cons (substring string i j) lines)) (cons (substring string i end) lines))) lines))))) #| ;;; An alternate implementation using MIT/GNU Scheme's internal ;;; regular-expression interpreter. (define (r:grep expr filename) (call-with-input-file filename (lambda (port) (let loop ((lines '())) (let ((line (read-line port))) (if (eof-object? line) (reverse lines) (loop (if (re-string-search-forward expr line #f) (cons line lines) lines)))))))) |# #| ;;; For example... ;;; Note, the result of the next two requests were not in this file ;;; when the requests were made! (pp (r:grep (r:quote "r:sex") "regexp.scm")) ("(pp (r:grep (r:quote \"r:sex\") \"regexp.scm\"))") ;Unspecified return value (pp (r:grep (r:quote "r:seq") "regexp.scm")) (" (r:seq" "\t (r:seq))" "(define (r:seq . exprs)" " (apply r:seq" " (r:seq)))" "\t (apply r:seq" "\t (apply r:seq (make-list min expr)))" "\t (apply r:seq" "\t\t\t\t (r:seq expr \"\\\\|\")))))))" "(pp (r:grep (r:quote \"r:seq\") \"regexp.scm\"))" "(pp (r:grep (r:seq (r:quote \"a\") (r:dot) (r:quote \"c\")) \"tests.txt\"))" " (r:grep (r:seq \" \"" " (r:seq (r:bol)") ;Unspecified return value (pp (r:grep (r:seq (r:quote "a") (r:dot) (r:quote "c")) "tests.txt")) ("[00]. abc" "[01]. aac" "[02]. acc" "[03]. zzzaxcqqq" "[10]. catcatdogdog" "[12]. catcatcatdogdogdog") ;Unspecified return value ;;; And... (pp (r:grep (r:alt (r:quote "foo") (r:quote "bar") (r:quote "baz")) "tests.txt")) ("[05]. foo" "[06]. bar" "[07]. foo bar baz quux") ;Unspecified return value (pp (r:grep (r:repeat 3 5 (r:alt (r:quote "cat") (r:quote "dog"))) "tests.txt")) ("[09]. catdogcat" "[10]. catcatdogdog" "[11]. dogdogcatdogdog" "[12]. catcatcatdogdogdog" "[13]. acatdogdogcats" "[14]. ifacatdogdogs" "[15]. acatdogdogsme") ;Unspecified return value (pp (r:grep (r:seq " " (r:repeat 3 5 (r:alt (r:quote "cat") (r:quote "dog"))) (r:eol)) "tests.txt")) ("[09]. catdogcat" "[10]. catcatdogdog" "[11]. dogdogcatdogdog") ;Unspecified return value (pp (r:grep (let ((digit (r:char-from (string->char-set "0123456789")))) (r:seq (r:bol) (r:quote "[") digit digit (r:quote "]") (r:quote ".") (r:quote " ") (r:char-from (char-set #\a #\b)) (r:repeat 3 5 (r:alt "cat" "dog")) (r:char-not-from (char-set #\d #\e #\f)) (r:eol))) "tests.txt")) ("[13]. acatdogdogcats") ;Unspecified return value |#