summaryrefslogtreecommitdiffstats
path: root/ps01_grep/ps01_bnewbold.txt
diff options
context:
space:
mode:
Diffstat (limited to 'ps01_grep/ps01_bnewbold.txt')
-rw-r--r--ps01_grep/ps01_bnewbold.txt345
1 files changed, 345 insertions, 0 deletions
diff --git a/ps01_grep/ps01_bnewbold.txt b/ps01_grep/ps01_bnewbold.txt
new file mode 100644
index 0000000..52979e7
--- /dev/null
+++ b/ps01_grep/ps01_bnewbold.txt
@@ -0,0 +1,345 @@
+;;; 6.945 Problem Set #1
+;;; 02/10/2009
+;;; Bryan Newbold <bnewbold@mit.edu>
+
+
+Problem 1.1: Warmup
+---------------
+
+(define (r:+ pattern)
+ (r:repeat 1 #f pattern))
+
+(define (r:* pattern)
+ (r:repeat 0 #f pattern))
+
+(pp (r:grep (r:seq (r:quote "dog")
+ (r:+ (r:quote "cat")))
+ "tests.txt"))
+
+;("[09]. catdogcat" "[11]. dogdogcatdogdog" "[13]. acatdogdogcats")
+
+(pp (r:grep (r:seq (r:quote "cat")
+ (r:+ (r:char-from (string->char-set "ogd")))
+ (r:quote "cat"))
+ "tests.txt"))
+
+;("[09]. catdogcat" "[13]. acatdogdogcats")
+
+(pp (r:grep (r:seq " "
+ (r:+ (r:quote "cat"))
+ (r:eol))
+ "tests.txt"))
+; #f
+
+(pp (r:grep (r:* (r:quote "something that isn't there"))
+ "tests.txt"))
+; matches all
+
+(pp (r:grep (r:repeat 2 3 (r:* (r:char-from (string->char-set "tca"))))
+ "tests.txt"))
+; matches all: nothing repeated several times
+
+(pp (r:grep (r:seq (r:quote "dog")
+ (r:+ (r:quote "cat"))
+ (r:quote "dog"))
+ "tests.txt"))
+
+Problem 1.2: The Proposals
+---------------
+
+a) This would be the same as asking Louis Reasoner to explain why his
+suggestion is wrong; the problem lies within r:repeat, so re-calling r:repeat
+does not sidestep the problem.
+
+b) Bonnie's method is computationally more efficient because checking up to the
+minimum number of repetitions is only executed once; after that successive
+checks are made for additional repetitions. From a straight-forward
+perspective, Bonnie's approach also uses significantly fewer characters of
+final regex code: this could potentially (but unlikely) become important if
+large expressions were searched for with a large range of possible repetitions.
+Most importantly, Bonnie's method is more readable and brings the
+implementation more inline with the underlying philosophy of regular
+expressions (in my opinion).
+
+c) Ben's solution is the one that an experienced regex-er would think of first
+and is the closest conceptual analog to r:repeat in the actual specification.
+The code is compact and should be the most readable and explicit, though we'll
+see what kind of syntaxual issues crop up...
+
+d)
+
+(define (r:repeat min max expr)
+ (if (not (exact-nonnegative-integer? min))
+ (error "Min must be non-negative integer:" min))
+ (if max
+ (begin
+ (if (not (exact-nonnegative-integer? max))
+ (error "Max must be non-negative integer (or null):" max))
+ (if (not (<= min max))
+ (error "Min not less than max:" min max))))
+ (cond
+ ((not max) (r:seq expr (string-append "\\{" (number->string min) ",\\}")))
+ ((= max min) (r:seq expr (string-append "\\{" (number->string min) "\\}")))
+ (else (r:seq expr (string-append "\\{"
+ (number->string min)
+ ","
+ (number->string max)
+ "\\}")))))
+#| tests:
+(pp (r:grep (r:repeat 2 3 (r:quote "cat"))
+ "tests.txt"))
+(pp (r:grep (r:repeat 2 #f (r:quote "cat"))
+ "tests.txt"))
+(pp (r:grep (r:repeat 1 1 (r:quote "cat"))
+ "tests.txt"))
+(pp (r:grep (r:repeat 4 3 (r:quote "cat"))
+ "tests.txt"))
+(r:repeat 2 3 (r:quote "cat"))
+|#
+
+
+Problem 1.3: Optimization
+--------------
+I simply removed the default parentheses around all expressions and implemented
+a new r:subexp wrapper which puts parentheses around expressions. Then I had
+r:alt and r:repeat wrap their expressions in r:subexp. This results in /some/
+redundant nesting but is a huge reduction.
+
+(define (r:subexp . exprs)
+ (string-append "\\(" (apply string-append exprs) "\\)"))
+
+(define (r:seq . exprs)
+ (apply string-append exprs))
+
+(r:seq " " (r:subexp "sdf") "dddd")
+
+(define (r:repeat min max expr)
+ (if (not (exact-nonnegative-integer? min))
+ (error "Min must be non-negative integer:" min))
+ (if max
+ (begin
+ (if (not (exact-nonnegative-integer? max))
+ (error "Max must be non-negative integer (or null):" max))
+ (if (not (<= min max))
+ (error "Min not less than max:" min max))))
+ (cond
+ ((not max) (r:seq (r:subexp expr)
+ (string-append "\\{" (number->string min) ",\\}")))
+ ((= max min) (r:seq (r:subexp expr)
+ (string-append "\\{" (number->string min) "\\}")))
+ (else (r:seq (r:subexp expr)
+ (string-append "\\{"
+ (number->string min)
+ ","
+ (number->string max)
+ "\\}")))))
+
+
+Problem 1.4: Back-references
+--------------
+I only implemented the most crude form of back-expressions. A better
+implementation would include a method of naming subexpressions and refering
+back to them by name: the implementation would do the appropriate counting to
+find the correct backreference number for a given named subexpression.
+
+(define (r:backref n)
+ (string-append "\\" (number->string n)))
+
+Problem 1.5: Ugh!
+-------------
+a) Differences between BREs and EREs (I referenced the table at
+http://www.greenend.org.uk/rjk/2002/06/regexp.html):
+
+* Some characters do not need escaping backslashes: mutiple matching brackets
+ ('\{' becomes '{'), subexpressions ('\(' becomes '(')
+* The alternation operator '|' is added
+* The '+' operator is added meaning one or more matches
+* The '?' operator is added meaning one or zero matches
+
+b) The basic method I chose is to have each r: procedure return a list
+containing two lambda procedures: the first will return the regular grep
+version of the expression, while the second will return the extended grep
+version. The r:grep procedure selects the first lambda and r:egrep selects the
+second, and this decision cascades down recursively to the atomic expressions.
+
+My implementation is crude; the r-grep: and r-egrep: procedures should be
+internal to the primary r: procedures (using let), and the whole process could
+be implemented as a funtion.
+
+c) (not, uh, fully tested...)
+
+; the following are only the expressions which had to be rewritten...
+
+(define (r-grep:seq . exprs)
+ (let ((choose-grep (lambda (x)
+ (cond
+ ((string? x) x)
+ ((list? x) (car x))))))
+ (apply string-append (map choose-grep exprs))))
+
+(r-grep:seq "a" "b" '("grep" "egrep"))
+; abgrep
+
+(define (r-egrep:seq . exprs)
+ (let ((choose-egrep (lambda (x)
+ (cond
+ ((string? x) x)
+ ((list? x) (cadr x))))))
+ (apply string-append (map choose-egrep exprs))))
+
+#| test:
+(r-grep:seq "a" "b" '("grep" "egrep"))
+; "abgrep"
+(r-egrep:seq "a" "b" '("grep" "egrep"))
+; "abegrep"
+|#
+
+(define (r:seq . exprs)
+ (cons (lambda () (apply r-grep:seq exprs))
+ (cons (lambda () (apply r-egrep:seq exprs)) '())))
+
+#| test:
+(r:seq "a" "b" '("grep" "egrep"))
+;Value: (#[compound-procedure 14] #[compound-procedure 15])
+((car (r:seq "a" "b" '("grep" "egrep"))))
+; "abgrep"
+((cadr (r:seq "a" "b" '("grep" "egrep"))))
+; "abegrep"
+|#
+
+(define (r-egrep:alt . exprs)
+ (let ((choose-egrep (lambda (x)
+ (cond
+ ((string? x) x)
+ ((list? x) ((cadr x)))))))
+ (if (pair? exprs)
+ (apply r-egrep:seq
+ (cons (choose-egrep (car exprs))
+ (append-map (lambda (expr)
+ (list "|" (choose-egrep expr)))
+ (cdr exprs))))
+ (r-egrep:seq))))
+
+(define (r-grep:alt . exprs)
+ (let ((choose-grep (lambda (x)
+ (cond
+ ((string? x) x)
+ ((list? x) ((car x)))))))
+ (if (pair? exprs)
+ (apply r-grep:seq
+ (cons (choose-grep (car exprs))
+ (append-map (lambda (expr)
+ (list "\\|" (choose-grep expr)))
+ (cdr exprs))))
+ (r-grep:seq))))
+
+(define (r:alt . exprs)
+ (cons (lambda () (apply r-grep:alt exprs))
+ (cons (lambda () (apply r-egrep:alt exprs)) '())))
+
+#| test:
+(r-egrep:alt "a" (r:dot) "c")
+;Value: "a|.|c"
+(r:alt "asdf" (r:dot))
+;Value: (#[compound-procedure 16] #[compound-procedure 17])
+((car (r:alt "asdf" (r:dot))))
+;Value: "asdf\\|."
+((cadr (r:alt "asdf" (r:dot))))
+;Value: "asdf|."
+|#
+
+(define (r-grep:repeat min max expr)
+ (let ((choose-grep (lambda (x)
+ (cond
+ ((string? x) x)
+ ((list? x) ((car x)))))))
+ (if (not (exact-nonnegative-integer? min))
+ (error "Min must be non-negative integer:" min))
+ (if max
+ (begin
+ (if (not (exact-nonnegative-integer? max))
+ (error "Max must be non-negative integer (or null):" max))
+ (if (not (<= min max))
+ (error "Min not less than max:" min max))))
+ (cond
+ ((not max) (r-grep:seq (r-grep:subexp (choose-grep expr))
+ (string-append "\\{" (number->string min) ",\\}")))
+ ((= max min) (r-grep:seq (r-grep:subexp (choose-grep expr))
+ (string-append "\\{" (number->string min) "\\}")))
+ (else (r-grep:seq (r-grep:subexp (choose-grep expr))
+ (string-append "\\{"
+ (number->string min)
+ ","
+ (number->string max)
+ "\\}"))))))
+
+(define (r-egrep:repeat min max expr)
+ (let ((choose-egrep (lambda (x)
+ (cond
+ ((string? x) x)
+ ((list? x) ((cadr x)))))))
+ (if (not (exact-nonnegative-integer? min))
+ (error "Min must be non-negative integer:" min))
+ (if max
+ (begin
+ (if (not (exact-nonnegative-integer? max))
+ (error "Max must be non-negative integer (or null):" max))
+ (if (not (<= min max))
+ (error "Min not less than max:" min max))))
+ (cond
+ ((not max) (r-egrep:seq (r-egrep:subexp (choose-egrep expr))
+ (string-append "{" (number->string min) ",}")))
+ ((= max min) (r-egrep:seq (r-egrep:subexp (choose-egrep expr))
+ (string-append "{" (number->string min) "}")))
+ (else (r-egrep:seq (r-egrep:subexp (choose-egrep expr))
+ (string-append "{"
+ (number->string min)
+ ","
+ (number->string max)
+ "}"))))))
+
+(define (r:repeat min max expr)
+ (cons (lambda () (r-grep:repeat min max expr))
+ (cons (lambda () (r-egrep:repeat min max expr)) '())))
+
+(define (r-grep:subexp . exprs)
+ (string-append "\\(" (apply string-append
+ (map (lambda (x)
+ (cond
+ ((string? x) x)
+ ((pair? x) ((car x)))))
+ exprs)) "\\)"))
+
+(define (r-egrep:subexp . exprs)
+ (string-append "(" (apply string-append
+ (map (lambda (x)
+ (cond
+ ((string? x) x)
+ ((pair? x) ((cadr x)))))
+ exprs)) ")"))
+
+(define (r:subexp . exprs)
+ (cons (lambda () (apply r-grep:subexp exprs))
+ (cons (lambda () (apply r-egrep:subexp exprs)) '())))
+
+
+#| test:
+(r:subexp "a" "b" (r:dot))
+;Value: (#[compound-procedure 25] #[compound-procedure 26])
+((cadr (r:subexp "a" "b" (r:dot))))
+;Value: "(ab.)"
+((car (r:subexp "a" "b" (r:dot))))
+;Value: "\\(ab.\\)"
+|#
+
+(define (r:grep expr filename)
+ (r:grep-like "grep" '() ((car expr)) filename))
+
+(define (r:egrep expr filename)
+ (if (eq? microcode-id/operating-system 'nt)
+ (r:grep-like "grep" '("-E") ((cadr expr)) filename)
+ (r:grep-like "egrep" '() ((cadr expr)) filename)))
+
+(pp (r:grep (r:seq (r:repeat 2 3 (r:quote "cat")) (r:+ (r:quote "dog")))
+ "tests.txt"))
+