diff options
Diffstat (limited to 'strsrch.scm')
-rw-r--r-- | strsrch.scm | 164 |
1 files changed, 105 insertions, 59 deletions
diff --git a/strsrch.scm b/strsrch.scm index 71c69df..13edb65 100644 --- a/strsrch.scm +++ b/strsrch.scm @@ -1,67 +1,109 @@ ;;; "MISCIO" Search for string from port. -; Written 1995, 1996 by Oleg Kiselyov (oleg@ponder.csci.unt.edu) -; Modified 1996, 1997, 1998 by A. Jaffer (jaffer@ai.mit.edu) +; Written 1995, 1996 by Oleg Kiselyov (oleg@acm.org) +; Modified 1996, 1997, 1998, 2001 by A. Jaffer (agj@alum.mit.edu) +; Modified 2003 by Steve VanDevender (stevev@hexadecimal.uoregon.edu) ; ; This code is in the public domain. -;;; Return the index of the first occurence of a-char in str, or #f -(define (string-index str a-char) - (let loop ((pos 0)) - (cond - ;; whole string has been searched, in vain - ((>= pos (string-length str)) #f) - ((char=? a-char (string-ref str pos)) pos) - (else (loop (+ 1 pos)))))) - -(define (string-index-ci str a-char) - (let loop ((pos 0)) - (cond - ;; whole string has been searched, in vain - ((>= pos (string-length str)) #f) - ((char-ci=? a-char (string-ref str pos)) pos) - (else (loop (+ 1 pos)))))) - -(define (string-reverse-index str a-char) - (let loop ((pos (- (string-length str) 1))) - (cond ((< pos 0) #f) - ((char=? (string-ref str pos) a-char) pos) - (else (loop (- pos 1)))))) +;;;@ Return the index of the first occurence of chr in str, or #f +(define (string-index str chr) + (define len (string-length str)) + (do ((pos 0 (+ 1 pos))) + ((or (>= pos len) (char=? chr (string-ref str pos))) + (and (< pos len) pos)))) +;@ +(define (string-index-ci str chr) + (define len (string-length str)) + (do ((pos 0 (+ 1 pos))) + ((or (>= pos len) (char-ci=? chr (string-ref str pos))) + (and (< pos len) pos)))) +;@ +(define (string-reverse-index str chr) + (do ((pos (+ -1 (string-length str)) (+ -1 pos))) + ((or (negative? pos) (char=? (string-ref str pos) chr)) + (and (not (negative? pos)) pos)))) +;@ +(define (string-reverse-index-ci str chr) + (do ((pos (+ -1 (string-length str)) (+ -1 pos))) + ((or (negative? pos) (char-ci=? (string-ref str pos) chr)) + (and (not (negative? pos)) pos)))) +;@ +(define (substring? pat str) + (define patlen (string-length pat)) + (define strlen (string-length str)) + (cond ((zero? patlen) 0) ; trivial match + ((>= patlen strlen) (and (= patlen strlen) (string=? pat str) 0)) + ;; use faster string-index to match a single-character pattern + ((= 1 patlen) (string-index str (string-ref pat 0))) + ((or (<= strlen (+ patlen patlen (quotient char-code-limit 2))) + (<= patlen 4)) + (subloop pat patlen str strlen char=?)) + (else + ;; compute skip values for search pattern characters + ;; for all c not in pat, skip[c] = patlen + 1 + ;; for c in pat, skip[c] is distance of rightmost occurrence + ;; of c from end of str + (let ((skip (make-vector char-code-limit (+ patlen 1)))) + (do ((i 0 (+ i 1))) + ((= i patlen)) + (vector-set! skip (char->integer (string-ref pat i)) + (- patlen i))) + (subskip skip pat patlen str strlen char=?))))) +;@ +(define (substring-ci? pat str) + (define patlen (string-length pat)) + (define strlen (string-length str)) + (cond ((zero? patlen) 0) ; trivial match + ((>= patlen strlen) (and (= patlen strlen) (string-ci=? pat str) 0)) + ((= 1 patlen) (string-index-ci str (string-ref pat 0))) + ((or (<= strlen (+ patlen patlen (quotient char-code-limit 2))) + (<= patlen 4)) + (subloop pat patlen str strlen char-ci=?)) + (else + (let ((skip (make-vector char-code-limit (+ patlen 1)))) + (do ((i 0 (+ i 1))) + ((= i patlen)) + (let ((c (string-ref pat i)) + (d (- patlen i))) + ;; use same skip value for both upper- and lowercase characters + (vector-set! skip (char->integer (char-upcase c)) d) + (vector-set! skip (char->integer (char-downcase c)) d))) + (subskip skip pat patlen str strlen char-ci=?))))) -(define (string-reverse-index-ci str a-char) - (let loop ((pos (- (string-length str) 1))) - (cond ((< pos 0) #f) - ((char-ci=? (string-ref str pos) a-char) pos) - (else (loop (- pos 1)))))) +(define (subskip skip pat patlen str strlen char=) + (do ((k patlen (if (< k strlen) + (+ k (vector-ref skip (char->integer (string-ref str k)))) + (+ strlen 1)))) + ((or (> k strlen) + (do ((i 0 (+ i 1)) + (j (- k patlen) (+ j 1))) + ((or (= i patlen) + (not (char= (string-ref pat i) (string-ref str j)))) + (= i patlen)))) + (and (<= k strlen) (- k patlen))))) -(define (miscio:substring? pattern str char=?) - (let* ((pat-len (string-length pattern)) - (search-span (- (string-length str) pat-len)) - (c1 (if (zero? pat-len) #f (string-ref pattern 0))) - (c2 (if (<= pat-len 1) #f (string-ref pattern 1)))) +;;; Assumes that PATLEN > 1 +(define (subloop pat patlen str strlen char=) + (define span (- strlen patlen)) + (define c1 (string-ref pat 0)) + (define c2 (string-ref pat 1)) + (let outer ((pos 0)) (cond - ((not c1) 0) ; empty pattern, matches upfront - ((not c2) (string-index str c1)) ; one-char pattern - (else ; matching pattern of > two chars - (let outer ((pos 0)) - (cond - ((> pos search-span) #f) ; nothing was found thru the whole str - ((not (char=? c1 (string-ref str pos))) - (outer (+ 1 pos))) ; keep looking for the right beginning - ((not (char=? c2 (string-ref str (+ 1 pos)))) - (outer (+ 1 pos))) ; could've done pos+2 if c1 == c2.... - (else ; two char matched: high probability + ((> pos span) #f) ; nothing was found thru the whole str + ((not (char= c1 (string-ref str pos))) + (outer (+ 1 pos))) ; keep looking for the right beginning + ((not (char= c2 (string-ref str (+ 1 pos)))) + (outer (+ 1 pos))) ; could've done pos+2 if c1 == c2.... + (else ; two char matched: high probability ; the rest will match too - (let inner ((i-pat 2) (i-str (+ 2 pos))) - (if (>= i-pat pat-len) pos ; the whole pattern matched - (if (char=? (string-ref pattern i-pat) - (string-ref str i-str)) - (inner (+ 1 i-pat) (+ 1 i-str)) - ;; mismatch after partial match - (outer (+ 1 pos)))))))))))) - -(define (substring? pattern str) (miscio:substring? pattern str char=?)) -(define (substring-ci? pattern str) (miscio:substring? pattern str char-ci=?)) - + (let inner ((pdx 2) (sdx (+ 2 pos))) + (if (>= pdx patlen) pos ; the whole pat matched + (if (char= (string-ref pat pdx) + (string-ref str sdx)) + (inner (+ 1 pdx) (+ 1 sdx)) + ;; mismatch after partial match + (outer (+ 1 pos))))))))) +;@ (define (find-string-from-port? str <input-port> . max-no-char) (set! max-no-char (if (null? max-no-char) #f (car max-no-char))) (letrec @@ -94,7 +136,7 @@ (match-other-chars (lambda (pos-to-match) (if (>= pos-to-match (string-length str)) - no-chars-read ; the entire string has matched + no-chars-read ; the entire string has matched (let ((c (my-peek-char))) (and c (if (not (char=? c (string-ref str pos-to-match))) @@ -124,7 +166,7 @@ (backtrack (+ 1 i) matched-substr-len)))))))) ) (match-1st-char))) - +;@ (define (string-subst text old new . rest) (define sub (lambda (text) @@ -143,4 +185,8 @@ text (apply string-subst text rest)))) (sub text)) - +;@ +(define (count-newlines str) + (do ((idx (+ -1 (string-length str)) (+ -1 idx)) + (cnt 0 (+ (if (eqv? #\newline (string-ref str idx)) 1 0) cnt))) + ((<= idx 0) cnt))) |