;;; "strsrch.scm" Search for string from port. ; Written 1995, 1996 by Oleg Kiselyov (oleg@acm.org) ; Modified 1996, 1997, 1998, 2001 by A. Jaffer (agj@alum.mit.edu) ; Modified 2003 by Steve VanDevender (stevev@hexadecimal.uoregon.edu) ; ; This code is in the public domain. ;;;@ Return the index of the first occurence of chr in str, or #f (define (string-index str chr) (define len (string-length str)) (do ((pos 0 (+ 1 pos))) ((or (>= pos len) (char=? chr (string-ref str pos))) (and (< pos len) pos)))) ;@ (define (string-index-ci str chr) (define len (string-length str)) (do ((pos 0 (+ 1 pos))) ((or (>= pos len) (char-ci=? chr (string-ref str pos))) (and (< pos len) pos)))) ;@ (define (string-reverse-index str chr) (do ((pos (+ -1 (string-length str)) (+ -1 pos))) ((or (negative? pos) (char=? (string-ref str pos) chr)) (and (not (negative? pos)) pos)))) ;@ (define (string-reverse-index-ci str chr) (do ((pos (+ -1 (string-length str)) (+ -1 pos))) ((or (negative? pos) (char-ci=? (string-ref str pos) chr)) (and (not (negative? pos)) pos)))) ;@ (define (substring? pat str) (define patlen (string-length pat)) (define strlen (string-length str)) (cond ((zero? patlen) 0) ; trivial match ((>= patlen strlen) (and (= patlen strlen) (string=? pat str) 0)) ;; use faster string-index to match a single-character pattern ((= 1 patlen) (string-index str (string-ref pat 0))) ((or (<= strlen (+ patlen patlen (quotient char-code-limit 2))) (<= patlen 4)) (subloop pat patlen str strlen char=?)) (else ;; compute skip values for search pattern characters ;; for all c not in pat, skip[c] = patlen + 1 ;; for c in pat, skip[c] is distance of rightmost occurrence ;; of c from end of str (let ((skip (make-vector char-code-limit (+ patlen 1)))) (do ((i 0 (+ i 1))) ((= i patlen)) (vector-set! skip (char->integer (string-ref pat i)) (- patlen i))) (subskip skip pat patlen str strlen char=?))))) ;@ (define (substring-ci? pat str) (define patlen (string-length pat)) (define strlen (string-length str)) (cond ((zero? patlen) 0) ; trivial match ((>= patlen strlen) (and (= patlen strlen) (string-ci=? pat str) 0)) ((= 1 patlen) (string-index-ci str (string-ref pat 0))) ((or (<= strlen (+ patlen patlen (quotient char-code-limit 2))) (<= patlen 4)) (subloop pat patlen str strlen char-ci=?)) (else (let ((skip (make-vector char-code-limit (+ patlen 1)))) (do ((i 0 (+ i 1))) ((= i patlen)) (let ((c (string-ref pat i)) (d (- patlen i))) ;; use same skip value for both upper- and lowercase characters (vector-set! skip (char->integer (char-upcase c)) d) (vector-set! skip (char->integer (char-downcase c)) d))) (subskip skip pat patlen str strlen char-ci=?))))) (define (subskip skip pat patlen str strlen char=) (do ((k patlen (if (< k strlen) (+ k (vector-ref skip (char->integer (string-ref str k)))) (+ strlen 1)))) ((or (> k strlen) (do ((i 0 (+ i 1)) (j (- k patlen) (+ j 1))) ((or (= i patlen) (not (char= (string-ref pat i) (string-ref str j)))) (= i patlen)))) (and (<= k strlen) (- k patlen))))) ;;; Assumes that PATLEN > 1 (define (subloop pat patlen str strlen char=) (define span (- strlen patlen)) (define c1 (string-ref pat 0)) (define c2 (string-ref pat 1)) (let outer ((pos 0)) (cond ((> pos span) #f) ; nothing was found thru the whole str ((not (char= c1 (string-ref str pos))) (outer (+ 1 pos))) ; keep looking for the right beginning ((not (char= c2 (string-ref str (+ 1 pos)))) (outer (+ 1 pos))) ; could've done pos+2 if c1 == c2.... (else ; two char matched: high probability ; the rest will match too (let inner ((pdx 2) (sdx (+ 2 pos))) (if (>= pdx patlen) pos ; the whole pat matched (if (char= (string-ref pat pdx) (string-ref str sdx)) (inner (+ 1 pdx) (+ 1 sdx)) ;; mismatch after partial match (outer (+ 1 pos))))))))) ;@ (define (find-string-from-port? str . max-no-char) (set! max-no-char (if (null? max-no-char) #f (car max-no-char))) (letrec ((no-chars-read 0) (peeked? #f) (my-peek-char ; Return a peeked char or #f (lambda () (and (or (not (number? max-no-char)) (< no-chars-read max-no-char)) (let ((c (peek-char ))) (cond (peeked? c) ((eof-object? c) #f) ((procedure? max-no-char) (set! peeked? #t) (if (max-no-char c) #f c)) ((eqv? max-no-char c) #f) (else c)))))) (next-char (lambda () (set! peeked? #f) (read-char ) (set! no-chars-read (+ 1 no-chars-read)))) (match-1st-char ; of the string str (lambda () (let ((c (my-peek-char))) (and c (begin (next-char) (if (char=? c (string-ref str 0)) (match-other-chars 1) (match-1st-char))))))) ;; There has been a partial match, up to the point pos-to-match ;; (for example, str[0] has been found in the stream) ;; Now look to see if str[pos-to-match] for would be found, too (match-other-chars (lambda (pos-to-match) (if (>= pos-to-match (string-length str)) no-chars-read ; the entire string has matched (let ((c (my-peek-char))) (and c (if (not (char=? c (string-ref str pos-to-match))) (backtrack 1 pos-to-match) (begin (next-char) (match-other-chars (+ 1 pos-to-match))))))))) ;; There had been a partial match, but then a wrong char showed up. ;; Before discarding previously read (and matched) characters, we check ;; to see if there was some smaller partial match. Note, characters read ;; so far (which matter) are those of str[0..matched-substr-len - 1] ;; In other words, we will check to see if there is such i>0 that ;; substr(str,0,j) = substr(str,i,matched-substr-len) ;; where j=matched-substr-len - i (backtrack (lambda (i matched-substr-len) (let ((j (- matched-substr-len i))) (if (<= j 0) ;; backed off completely to the begining of str (match-1st-char) (let loop ((k 0)) (if (>= k j) (match-other-chars j) ; there was indeed a shorter match (if (char=? (string-ref str k) (string-ref str (+ i k))) (loop (+ 1 k)) (backtrack (+ 1 i) matched-substr-len)))))))) ) (match-1st-char))) ;@ (define (string-subst text old new . rest) (define sub (lambda (text) (set! text (cond ((equal? "" text) text) ((substring? old text) => (lambda (idx) (string-append (substring text 0 idx) new (sub (substring text (+ idx (string-length old)) (string-length text)))))) (else text))) (if (null? rest) text (apply string-subst text rest)))) (sub text)) ;@ (define (count-newlines str) (do ((idx (+ -1 (string-length str)) (+ -1 idx)) (cnt 0 (+ (if (eqv? #\newline (string-ref str idx)) 1 0) cnt))) ((<= idx 0) cnt)))