summaryrefslogtreecommitdiffstats
path: root/ps01_grep/regexp.scm
diff options
context:
space:
mode:
Diffstat (limited to 'ps01_grep/regexp.scm')
-rw-r--r--ps01_grep/regexp.scm230
1 files changed, 230 insertions, 0 deletions
diff --git a/ps01_grep/regexp.scm b/ps01_grep/regexp.scm
new file mode 100644
index 0000000..a7f2305
--- /dev/null
+++ b/ps01_grep/regexp.scm
@@ -0,0 +1,230 @@
+;;;; Scheme Regular Expression Language Implementation -- regexp.scm
+
+(define (r:dot) ".")
+
+(define (r:bol) "^")
+
+(define (r:eol) "$")
+
+(define (r:quote string)
+ (r:seq
+ (call-with-output-string ; see RefMan section 14.3
+ (lambda (port)
+ (let ((end (string-length string)))
+ (do ((i 0 (+ i 1))) ((not (< i end))) ; see RefMan 2.9
+ (let ((c (string-ref string i)))
+ (if (or (char=? c #\.)
+ (char=? c #\[)
+ (char=? c #\\)
+ (char=? c #\^)
+ (char=? c #\$)
+ (char=? c #\*))
+ (write-char #\\ port))
+ (write-char c port))))))))
+
+(define (r:char-from char-set) ; see RefMan section 5.6
+ (let ((members (char-set-members char-set)))
+ (cond ((not (pair? members))
+ (r:seq))
+ ((not (pair? (cdr members)))
+ (r:quote (string (car members))))
+ (else
+ (%char-from #f members)))))
+
+(define (r:char-not-from char-set)
+ (%char-from #t (char-set-members char-set)))
+
+(define (%char-from negate? members)
+ (let ((right? (memv #\] members))
+ (caret? (memv #\^ members))
+ (hyphen? (memv #\- members))
+ (others
+ (delete-matching-items members
+ (lambda (c)
+ (or (char=? c #\])
+ (char=? c #\^)
+ (char=? c #\-))))))
+ (if (and caret?
+ hyphen?
+ (not right?)
+ (not negate?)
+ (null? others))
+ "[-^]"
+ (string-append "["
+ (if negate? "^" "")
+ (if right? "]" "")
+ (list->string others)
+ (if caret? "^" "")
+ (if hyphen? "-" "")
+ "]"))))
+
+;;; Means of combination for patterns
+
+(define (r:seq . exprs)
+ (string-append "\\(" (apply string-append exprs) "\\)"))
+
+(define (r:alt . exprs)
+ (if (pair? exprs)
+ (apply r:seq
+ (cons (car exprs)
+ (append-map (lambda (expr)
+ (list "\\|" expr))
+ (cdr exprs))))
+ (r:seq)))
+
+(define (r:repeat min max expr)
+ (if (not (exact-nonnegative-integer? min))
+ (error "Min must be non-negative integer:" min))
+ (if max
+ (begin
+ (if (not (exact-nonnegative-integer? max))
+ (error "Max must be non-negative integer:" max))
+ (if (not (<= min max))
+ (error "Min not less than max:" min max))))
+ (cond ((not max)
+ (apply r:seq
+ (append (make-list min expr)
+ (list expr "*"))))
+ ((= max min)
+ (apply r:seq (make-list min expr)))
+ (else
+ (apply r:seq
+ (append (make-list min expr)
+ (make-list (- max min)
+ (r:seq expr "\\|")))))))
+
+;;; The following magic allows a program in MIT/GNU Scheme to call the
+;;; grep system utility, returning the list of grep output lines to
+;;; the caller. You can make similar mechanisms to call other system
+;;; utilities.
+
+(load-option 'synchronous-subprocess)
+
+
+(define (r:grep expr filename)
+ (r:grep-like "grep" '() expr filename))
+
+(define (r:egrep expr filename)
+ (if (eq? microcode-id/operating-system 'nt)
+ (r:grep-like "grep" '("-E") expr filename)
+ (r:grep-like "egrep" '() expr filename)))
+
+(define (r:grep-like program options expr filename)
+ (let ((port (open-output-string)))
+ (and (= (run-synchronous-subprocess program
+ (append options
+ (list "-e" expr (->namestring filename)))
+ 'output port)
+ 0)
+ (r:split-lines (get-output-string port)))))
+
+(define (r:split-lines string)
+ (reverse
+ (let ((end (string-length string)))
+ (let loop ((i 0) (lines '()))
+ (if (< i end)
+ (let ((j
+ (substring-find-next-char string i end #\newline)))
+ (if j
+ (loop (+ j 1)
+ (cons (substring string i j) lines))
+ (cons (substring string i end) lines)))
+ lines)))))
+
+#|
+;;; An alternate implementation using MIT/GNU Scheme's internal
+;;; regular-expression interpreter.
+
+(define (r:grep expr filename)
+ (call-with-input-file filename
+ (lambda (port)
+ (let loop ((lines '()))
+ (let ((line (read-line port)))
+ (if (eof-object? line)
+ (reverse lines)
+ (loop (if (re-string-search-forward expr line #f)
+ (cons line lines)
+ lines))))))))
+|#
+
+#|
+;;; For example...
+
+;;; Note, the result of the next two requests were not in this file
+;;; when the requests were made!
+
+(pp (r:grep (r:quote "r:sex") "regexp.scm"))
+("(pp (r:grep (r:quote \"r:sex\") \"regexp.scm\"))")
+;Unspecified return value
+
+(pp (r:grep (r:quote "r:seq") "regexp.scm"))
+(" (r:seq"
+ "\t (r:seq))"
+ "(define (r:seq . exprs)"
+ " (apply r:seq"
+ " (r:seq)))"
+ "\t (apply r:seq"
+ "\t (apply r:seq (make-list min expr)))"
+ "\t (apply r:seq"
+ "\t\t\t\t (r:seq expr \"\\\\|\")))))))"
+ "(pp (r:grep (r:quote \"r:seq\") \"regexp.scm\"))"
+ "(pp (r:grep (r:seq (r:quote \"a\") (r:dot) (r:quote \"c\")) \"tests.txt\"))"
+ " (r:grep (r:seq \" \""
+ " (r:seq (r:bol)")
+;Unspecified return value
+
+(pp (r:grep (r:seq (r:quote "a") (r:dot) (r:quote "c")) "tests.txt"))
+("[00]. abc"
+ "[01]. aac"
+ "[02]. acc"
+ "[03]. zzzaxcqqq"
+ "[10]. catcatdogdog"
+ "[12]. catcatcatdogdogdog")
+;Unspecified return value
+
+;;; And...
+
+(pp (r:grep (r:alt (r:quote "foo") (r:quote "bar") (r:quote "baz"))
+ "tests.txt"))
+("[05]. foo" "[06]. bar" "[07]. foo bar baz quux")
+;Unspecified return value
+
+
+(pp (r:grep (r:repeat 3 5 (r:alt (r:quote "cat") (r:quote "dog")))
+ "tests.txt"))
+("[09]. catdogcat"
+ "[10]. catcatdogdog"
+ "[11]. dogdogcatdogdog"
+ "[12]. catcatcatdogdogdog"
+ "[13]. acatdogdogcats"
+ "[14]. ifacatdogdogs"
+ "[15]. acatdogdogsme")
+;Unspecified return value
+
+(pp
+ (r:grep (r:seq " "
+ (r:repeat 3 5 (r:alt (r:quote "cat") (r:quote "dog")))
+ (r:eol))
+ "tests.txt"))
+("[09]. catdogcat" "[10]. catcatdogdog" "[11]. dogdogcatdogdog")
+;Unspecified return value
+
+(pp
+ (r:grep
+ (let ((digit
+ (r:char-from (string->char-set "0123456789"))))
+ (r:seq (r:bol)
+ (r:quote "[")
+ digit
+ digit
+ (r:quote "]")
+ (r:quote ".")
+ (r:quote " ")
+ (r:char-from (char-set #\a #\b))
+ (r:repeat 3 5 (r:alt "cat" "dog"))
+ (r:char-not-from (char-set #\d #\e #\f))
+ (r:eol)))
+ "tests.txt"))
+("[13]. acatdogdogcats")
+;Unspecified return value
+|# \ No newline at end of file