summaryrefslogtreecommitdiffstats
path: root/strsrch.scm
diff options
context:
space:
mode:
Diffstat (limited to 'strsrch.scm')
-rw-r--r--strsrch.scm164
1 files changed, 105 insertions, 59 deletions
diff --git a/strsrch.scm b/strsrch.scm
index 71c69df..13edb65 100644
--- a/strsrch.scm
+++ b/strsrch.scm
@@ -1,67 +1,109 @@
;;; "MISCIO" Search for string from port.
-; Written 1995, 1996 by Oleg Kiselyov (oleg@ponder.csci.unt.edu)
-; Modified 1996, 1997, 1998 by A. Jaffer (jaffer@ai.mit.edu)
+; Written 1995, 1996 by Oleg Kiselyov (oleg@acm.org)
+; Modified 1996, 1997, 1998, 2001 by A. Jaffer (agj@alum.mit.edu)
+; Modified 2003 by Steve VanDevender (stevev@hexadecimal.uoregon.edu)
;
; This code is in the public domain.
-;;; Return the index of the first occurence of a-char in str, or #f
-(define (string-index str a-char)
- (let loop ((pos 0))
- (cond
- ;; whole string has been searched, in vain
- ((>= pos (string-length str)) #f)
- ((char=? a-char (string-ref str pos)) pos)
- (else (loop (+ 1 pos))))))
-
-(define (string-index-ci str a-char)
- (let loop ((pos 0))
- (cond
- ;; whole string has been searched, in vain
- ((>= pos (string-length str)) #f)
- ((char-ci=? a-char (string-ref str pos)) pos)
- (else (loop (+ 1 pos))))))
-
-(define (string-reverse-index str a-char)
- (let loop ((pos (- (string-length str) 1)))
- (cond ((< pos 0) #f)
- ((char=? (string-ref str pos) a-char) pos)
- (else (loop (- pos 1))))))
+;;;@ Return the index of the first occurence of chr in str, or #f
+(define (string-index str chr)
+ (define len (string-length str))
+ (do ((pos 0 (+ 1 pos)))
+ ((or (>= pos len) (char=? chr (string-ref str pos)))
+ (and (< pos len) pos))))
+;@
+(define (string-index-ci str chr)
+ (define len (string-length str))
+ (do ((pos 0 (+ 1 pos)))
+ ((or (>= pos len) (char-ci=? chr (string-ref str pos)))
+ (and (< pos len) pos))))
+;@
+(define (string-reverse-index str chr)
+ (do ((pos (+ -1 (string-length str)) (+ -1 pos)))
+ ((or (negative? pos) (char=? (string-ref str pos) chr))
+ (and (not (negative? pos)) pos))))
+;@
+(define (string-reverse-index-ci str chr)
+ (do ((pos (+ -1 (string-length str)) (+ -1 pos)))
+ ((or (negative? pos) (char-ci=? (string-ref str pos) chr))
+ (and (not (negative? pos)) pos))))
+;@
+(define (substring? pat str)
+ (define patlen (string-length pat))
+ (define strlen (string-length str))
+ (cond ((zero? patlen) 0) ; trivial match
+ ((>= patlen strlen) (and (= patlen strlen) (string=? pat str) 0))
+ ;; use faster string-index to match a single-character pattern
+ ((= 1 patlen) (string-index str (string-ref pat 0)))
+ ((or (<= strlen (+ patlen patlen (quotient char-code-limit 2)))
+ (<= patlen 4))
+ (subloop pat patlen str strlen char=?))
+ (else
+ ;; compute skip values for search pattern characters
+ ;; for all c not in pat, skip[c] = patlen + 1
+ ;; for c in pat, skip[c] is distance of rightmost occurrence
+ ;; of c from end of str
+ (let ((skip (make-vector char-code-limit (+ patlen 1))))
+ (do ((i 0 (+ i 1)))
+ ((= i patlen))
+ (vector-set! skip (char->integer (string-ref pat i))
+ (- patlen i)))
+ (subskip skip pat patlen str strlen char=?)))))
+;@
+(define (substring-ci? pat str)
+ (define patlen (string-length pat))
+ (define strlen (string-length str))
+ (cond ((zero? patlen) 0) ; trivial match
+ ((>= patlen strlen) (and (= patlen strlen) (string-ci=? pat str) 0))
+ ((= 1 patlen) (string-index-ci str (string-ref pat 0)))
+ ((or (<= strlen (+ patlen patlen (quotient char-code-limit 2)))
+ (<= patlen 4))
+ (subloop pat patlen str strlen char-ci=?))
+ (else
+ (let ((skip (make-vector char-code-limit (+ patlen 1))))
+ (do ((i 0 (+ i 1)))
+ ((= i patlen))
+ (let ((c (string-ref pat i))
+ (d (- patlen i)))
+ ;; use same skip value for both upper- and lowercase characters
+ (vector-set! skip (char->integer (char-upcase c)) d)
+ (vector-set! skip (char->integer (char-downcase c)) d)))
+ (subskip skip pat patlen str strlen char-ci=?)))))
-(define (string-reverse-index-ci str a-char)
- (let loop ((pos (- (string-length str) 1)))
- (cond ((< pos 0) #f)
- ((char-ci=? (string-ref str pos) a-char) pos)
- (else (loop (- pos 1))))))
+(define (subskip skip pat patlen str strlen char=)
+ (do ((k patlen (if (< k strlen)
+ (+ k (vector-ref skip (char->integer (string-ref str k))))
+ (+ strlen 1))))
+ ((or (> k strlen)
+ (do ((i 0 (+ i 1))
+ (j (- k patlen) (+ j 1)))
+ ((or (= i patlen)
+ (not (char= (string-ref pat i) (string-ref str j))))
+ (= i patlen))))
+ (and (<= k strlen) (- k patlen)))))
-(define (miscio:substring? pattern str char=?)
- (let* ((pat-len (string-length pattern))
- (search-span (- (string-length str) pat-len))
- (c1 (if (zero? pat-len) #f (string-ref pattern 0)))
- (c2 (if (<= pat-len 1) #f (string-ref pattern 1))))
+;;; Assumes that PATLEN > 1
+(define (subloop pat patlen str strlen char=)
+ (define span (- strlen patlen))
+ (define c1 (string-ref pat 0))
+ (define c2 (string-ref pat 1))
+ (let outer ((pos 0))
(cond
- ((not c1) 0) ; empty pattern, matches upfront
- ((not c2) (string-index str c1)) ; one-char pattern
- (else ; matching pattern of > two chars
- (let outer ((pos 0))
- (cond
- ((> pos search-span) #f) ; nothing was found thru the whole str
- ((not (char=? c1 (string-ref str pos)))
- (outer (+ 1 pos))) ; keep looking for the right beginning
- ((not (char=? c2 (string-ref str (+ 1 pos))))
- (outer (+ 1 pos))) ; could've done pos+2 if c1 == c2....
- (else ; two char matched: high probability
+ ((> pos span) #f) ; nothing was found thru the whole str
+ ((not (char= c1 (string-ref str pos)))
+ (outer (+ 1 pos))) ; keep looking for the right beginning
+ ((not (char= c2 (string-ref str (+ 1 pos))))
+ (outer (+ 1 pos))) ; could've done pos+2 if c1 == c2....
+ (else ; two char matched: high probability
; the rest will match too
- (let inner ((i-pat 2) (i-str (+ 2 pos)))
- (if (>= i-pat pat-len) pos ; the whole pattern matched
- (if (char=? (string-ref pattern i-pat)
- (string-ref str i-str))
- (inner (+ 1 i-pat) (+ 1 i-str))
- ;; mismatch after partial match
- (outer (+ 1 pos))))))))))))
-
-(define (substring? pattern str) (miscio:substring? pattern str char=?))
-(define (substring-ci? pattern str) (miscio:substring? pattern str char-ci=?))
-
+ (let inner ((pdx 2) (sdx (+ 2 pos)))
+ (if (>= pdx patlen) pos ; the whole pat matched
+ (if (char= (string-ref pat pdx)
+ (string-ref str sdx))
+ (inner (+ 1 pdx) (+ 1 sdx))
+ ;; mismatch after partial match
+ (outer (+ 1 pos)))))))))
+;@
(define (find-string-from-port? str <input-port> . max-no-char)
(set! max-no-char (if (null? max-no-char) #f (car max-no-char)))
(letrec
@@ -94,7 +136,7 @@
(match-other-chars
(lambda (pos-to-match)
(if (>= pos-to-match (string-length str))
- no-chars-read ; the entire string has matched
+ no-chars-read ; the entire string has matched
(let ((c (my-peek-char)))
(and c
(if (not (char=? c (string-ref str pos-to-match)))
@@ -124,7 +166,7 @@
(backtrack (+ 1 i) matched-substr-len))))))))
)
(match-1st-char)))
-
+;@
(define (string-subst text old new . rest)
(define sub
(lambda (text)
@@ -143,4 +185,8 @@
text
(apply string-subst text rest))))
(sub text))
-
+;@
+(define (count-newlines str)
+ (do ((idx (+ -1 (string-length str)) (+ -1 idx))
+ (cnt 0 (+ (if (eqv? #\newline (string-ref str idx)) 1 0) cnt)))
+ ((<= idx 0) cnt)))