From 2b7db5b23df55e3a1ac0639494bea750d0797c9d Mon Sep 17 00:00:00 2001 From: bnewbold Date: Wed, 11 Feb 2009 14:24:16 -0500 Subject: first pset work --- ps01_grep/ps01_bnewbold.txt | 345 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 345 insertions(+) create mode 100644 ps01_grep/ps01_bnewbold.txt (limited to 'ps01_grep/ps01_bnewbold.txt') diff --git a/ps01_grep/ps01_bnewbold.txt b/ps01_grep/ps01_bnewbold.txt new file mode 100644 index 0000000..52979e7 --- /dev/null +++ b/ps01_grep/ps01_bnewbold.txt @@ -0,0 +1,345 @@ +;;; 6.945 Problem Set #1 +;;; 02/10/2009 +;;; Bryan Newbold + + +Problem 1.1: Warmup +--------------- + +(define (r:+ pattern) + (r:repeat 1 #f pattern)) + +(define (r:* pattern) + (r:repeat 0 #f pattern)) + +(pp (r:grep (r:seq (r:quote "dog") + (r:+ (r:quote "cat"))) + "tests.txt")) + +;("[09]. catdogcat" "[11]. dogdogcatdogdog" "[13]. acatdogdogcats") + +(pp (r:grep (r:seq (r:quote "cat") + (r:+ (r:char-from (string->char-set "ogd"))) + (r:quote "cat")) + "tests.txt")) + +;("[09]. catdogcat" "[13]. acatdogdogcats") + +(pp (r:grep (r:seq " " + (r:+ (r:quote "cat")) + (r:eol)) + "tests.txt")) +; #f + +(pp (r:grep (r:* (r:quote "something that isn't there")) + "tests.txt")) +; matches all + +(pp (r:grep (r:repeat 2 3 (r:* (r:char-from (string->char-set "tca")))) + "tests.txt")) +; matches all: nothing repeated several times + +(pp (r:grep (r:seq (r:quote "dog") + (r:+ (r:quote "cat")) + (r:quote "dog")) + "tests.txt")) + +Problem 1.2: The Proposals +--------------- + +a) This would be the same as asking Louis Reasoner to explain why his +suggestion is wrong; the problem lies within r:repeat, so re-calling r:repeat +does not sidestep the problem. + +b) Bonnie's method is computationally more efficient because checking up to the +minimum number of repetitions is only executed once; after that successive +checks are made for additional repetitions. From a straight-forward +perspective, Bonnie's approach also uses significantly fewer characters of +final regex code: this could potentially (but unlikely) become important if +large expressions were searched for with a large range of possible repetitions. +Most importantly, Bonnie's method is more readable and brings the +implementation more inline with the underlying philosophy of regular +expressions (in my opinion). + +c) Ben's solution is the one that an experienced regex-er would think of first +and is the closest conceptual analog to r:repeat in the actual specification. +The code is compact and should be the most readable and explicit, though we'll +see what kind of syntaxual issues crop up... + +d) + +(define (r:repeat min max expr) + (if (not (exact-nonnegative-integer? min)) + (error "Min must be non-negative integer:" min)) + (if max + (begin + (if (not (exact-nonnegative-integer? max)) + (error "Max must be non-negative integer (or null):" max)) + (if (not (<= min max)) + (error "Min not less than max:" min max)))) + (cond + ((not max) (r:seq expr (string-append "\\{" (number->string min) ",\\}"))) + ((= max min) (r:seq expr (string-append "\\{" (number->string min) "\\}"))) + (else (r:seq expr (string-append "\\{" + (number->string min) + "," + (number->string max) + "\\}"))))) +#| tests: +(pp (r:grep (r:repeat 2 3 (r:quote "cat")) + "tests.txt")) +(pp (r:grep (r:repeat 2 #f (r:quote "cat")) + "tests.txt")) +(pp (r:grep (r:repeat 1 1 (r:quote "cat")) + "tests.txt")) +(pp (r:grep (r:repeat 4 3 (r:quote "cat")) + "tests.txt")) +(r:repeat 2 3 (r:quote "cat")) +|# + + +Problem 1.3: Optimization +-------------- +I simply removed the default parentheses around all expressions and implemented +a new r:subexp wrapper which puts parentheses around expressions. Then I had +r:alt and r:repeat wrap their expressions in r:subexp. This results in /some/ +redundant nesting but is a huge reduction. + +(define (r:subexp . exprs) + (string-append "\\(" (apply string-append exprs) "\\)")) + +(define (r:seq . exprs) + (apply string-append exprs)) + +(r:seq " " (r:subexp "sdf") "dddd") + +(define (r:repeat min max expr) + (if (not (exact-nonnegative-integer? min)) + (error "Min must be non-negative integer:" min)) + (if max + (begin + (if (not (exact-nonnegative-integer? max)) + (error "Max must be non-negative integer (or null):" max)) + (if (not (<= min max)) + (error "Min not less than max:" min max)))) + (cond + ((not max) (r:seq (r:subexp expr) + (string-append "\\{" (number->string min) ",\\}"))) + ((= max min) (r:seq (r:subexp expr) + (string-append "\\{" (number->string min) "\\}"))) + (else (r:seq (r:subexp expr) + (string-append "\\{" + (number->string min) + "," + (number->string max) + "\\}"))))) + + +Problem 1.4: Back-references +-------------- +I only implemented the most crude form of back-expressions. A better +implementation would include a method of naming subexpressions and refering +back to them by name: the implementation would do the appropriate counting to +find the correct backreference number for a given named subexpression. + +(define (r:backref n) + (string-append "\\" (number->string n))) + +Problem 1.5: Ugh! +------------- +a) Differences between BREs and EREs (I referenced the table at +http://www.greenend.org.uk/rjk/2002/06/regexp.html): + +* Some characters do not need escaping backslashes: mutiple matching brackets + ('\{' becomes '{'), subexpressions ('\(' becomes '(') +* The alternation operator '|' is added +* The '+' operator is added meaning one or more matches +* The '?' operator is added meaning one or zero matches + +b) The basic method I chose is to have each r: procedure return a list +containing two lambda procedures: the first will return the regular grep +version of the expression, while the second will return the extended grep +version. The r:grep procedure selects the first lambda and r:egrep selects the +second, and this decision cascades down recursively to the atomic expressions. + +My implementation is crude; the r-grep: and r-egrep: procedures should be +internal to the primary r: procedures (using let), and the whole process could +be implemented as a funtion. + +c) (not, uh, fully tested...) + +; the following are only the expressions which had to be rewritten... + +(define (r-grep:seq . exprs) + (let ((choose-grep (lambda (x) + (cond + ((string? x) x) + ((list? x) (car x)))))) + (apply string-append (map choose-grep exprs)))) + +(r-grep:seq "a" "b" '("grep" "egrep")) +; abgrep + +(define (r-egrep:seq . exprs) + (let ((choose-egrep (lambda (x) + (cond + ((string? x) x) + ((list? x) (cadr x)))))) + (apply string-append (map choose-egrep exprs)))) + +#| test: +(r-grep:seq "a" "b" '("grep" "egrep")) +; "abgrep" +(r-egrep:seq "a" "b" '("grep" "egrep")) +; "abegrep" +|# + +(define (r:seq . exprs) + (cons (lambda () (apply r-grep:seq exprs)) + (cons (lambda () (apply r-egrep:seq exprs)) '()))) + +#| test: +(r:seq "a" "b" '("grep" "egrep")) +;Value: (#[compound-procedure 14] #[compound-procedure 15]) +((car (r:seq "a" "b" '("grep" "egrep")))) +; "abgrep" +((cadr (r:seq "a" "b" '("grep" "egrep")))) +; "abegrep" +|# + +(define (r-egrep:alt . exprs) + (let ((choose-egrep (lambda (x) + (cond + ((string? x) x) + ((list? x) ((cadr x))))))) + (if (pair? exprs) + (apply r-egrep:seq + (cons (choose-egrep (car exprs)) + (append-map (lambda (expr) + (list "|" (choose-egrep expr))) + (cdr exprs)))) + (r-egrep:seq)))) + +(define (r-grep:alt . exprs) + (let ((choose-grep (lambda (x) + (cond + ((string? x) x) + ((list? x) ((car x))))))) + (if (pair? exprs) + (apply r-grep:seq + (cons (choose-grep (car exprs)) + (append-map (lambda (expr) + (list "\\|" (choose-grep expr))) + (cdr exprs)))) + (r-grep:seq)))) + +(define (r:alt . exprs) + (cons (lambda () (apply r-grep:alt exprs)) + (cons (lambda () (apply r-egrep:alt exprs)) '()))) + +#| test: +(r-egrep:alt "a" (r:dot) "c") +;Value: "a|.|c" +(r:alt "asdf" (r:dot)) +;Value: (#[compound-procedure 16] #[compound-procedure 17]) +((car (r:alt "asdf" (r:dot)))) +;Value: "asdf\\|." +((cadr (r:alt "asdf" (r:dot)))) +;Value: "asdf|." +|# + +(define (r-grep:repeat min max expr) + (let ((choose-grep (lambda (x) + (cond + ((string? x) x) + ((list? x) ((car x))))))) + (if (not (exact-nonnegative-integer? min)) + (error "Min must be non-negative integer:" min)) + (if max + (begin + (if (not (exact-nonnegative-integer? max)) + (error "Max must be non-negative integer (or null):" max)) + (if (not (<= min max)) + (error "Min not less than max:" min max)))) + (cond + ((not max) (r-grep:seq (r-grep:subexp (choose-grep expr)) + (string-append "\\{" (number->string min) ",\\}"))) + ((= max min) (r-grep:seq (r-grep:subexp (choose-grep expr)) + (string-append "\\{" (number->string min) "\\}"))) + (else (r-grep:seq (r-grep:subexp (choose-grep expr)) + (string-append "\\{" + (number->string min) + "," + (number->string max) + "\\}")))))) + +(define (r-egrep:repeat min max expr) + (let ((choose-egrep (lambda (x) + (cond + ((string? x) x) + ((list? x) ((cadr x))))))) + (if (not (exact-nonnegative-integer? min)) + (error "Min must be non-negative integer:" min)) + (if max + (begin + (if (not (exact-nonnegative-integer? max)) + (error "Max must be non-negative integer (or null):" max)) + (if (not (<= min max)) + (error "Min not less than max:" min max)))) + (cond + ((not max) (r-egrep:seq (r-egrep:subexp (choose-egrep expr)) + (string-append "{" (number->string min) ",}"))) + ((= max min) (r-egrep:seq (r-egrep:subexp (choose-egrep expr)) + (string-append "{" (number->string min) "}"))) + (else (r-egrep:seq (r-egrep:subexp (choose-egrep expr)) + (string-append "{" + (number->string min) + "," + (number->string max) + "}")))))) + +(define (r:repeat min max expr) + (cons (lambda () (r-grep:repeat min max expr)) + (cons (lambda () (r-egrep:repeat min max expr)) '()))) + +(define (r-grep:subexp . exprs) + (string-append "\\(" (apply string-append + (map (lambda (x) + (cond + ((string? x) x) + ((pair? x) ((car x))))) + exprs)) "\\)")) + +(define (r-egrep:subexp . exprs) + (string-append "(" (apply string-append + (map (lambda (x) + (cond + ((string? x) x) + ((pair? x) ((cadr x))))) + exprs)) ")")) + +(define (r:subexp . exprs) + (cons (lambda () (apply r-grep:subexp exprs)) + (cons (lambda () (apply r-egrep:subexp exprs)) '()))) + + +#| test: +(r:subexp "a" "b" (r:dot)) +;Value: (#[compound-procedure 25] #[compound-procedure 26]) +((cadr (r:subexp "a" "b" (r:dot)))) +;Value: "(ab.)" +((car (r:subexp "a" "b" (r:dot)))) +;Value: "\\(ab.\\)" +|# + +(define (r:grep expr filename) + (r:grep-like "grep" '() ((car expr)) filename)) + +(define (r:egrep expr filename) + (if (eq? microcode-id/operating-system 'nt) + (r:grep-like "grep" '("-E") ((cadr expr)) filename) + (r:grep-like "egrep" '() ((cadr expr)) filename))) + +(pp (r:grep (r:seq (r:repeat 2 3 (r:quote "cat")) (r:+ (r:quote "dog"))) + "tests.txt")) + -- cgit v1.2.3