From 421dc8c7141ecb6297996f7370d96e7e99894683 Mon Sep 17 00:00:00 2001 From: bnewbold Date: Fri, 24 Jun 2011 17:21:45 -0400 Subject: arc3.1.tar --- strings.arc | 226 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 226 insertions(+) create mode 100644 strings.arc (limited to 'strings.arc') diff --git a/strings.arc b/strings.arc new file mode 100644 index 0000000..b80bc59 --- /dev/null +++ b/strings.arc @@ -0,0 +1,226 @@ +; Matching. Spun off 29 Jul 06. + +; arc> (tostring (writec (coerce 133 'char))) +; +;> (define ss (open-output-string)) +;> (write-char (integer->char 133) ss) +;> (get-output-string ss) +;"\u0085" + +(def tokens (s (o sep whitec)) + (let test (testify sep) + (let rec (afn (cs toks tok) + (if (no cs) (consif tok toks) + (test (car cs)) (self (cdr cs) (consif tok toks) nil) + (self (cdr cs) toks (cons (car cs) tok)))) + (rev (map [coerce _ 'string] + (map rev (rec (coerce s 'cons) nil nil))))))) + +; names of cut, split, halve not optimal + +(def halve (s (o sep whitec)) + (let test (testify sep) + (let rec (afn (cs tok) + (if (no cs) (list (rev tok)) + (test (car cs)) (list cs (rev tok)) + (self (cdr cs) (cons (car cs) tok)))) + (rev (map [coerce _ 'string] + (rec (coerce s 'cons) nil)))))) + +; maybe promote to arc.arc, but if so include a list clause + +(def positions (test seq) + (accum a + (let f (testify test) + (forlen i seq + (if (f (seq i)) (a i)))))) + +(def lines (s) + (accum a + ((afn ((p . ps)) + (if ps + (do (a (rem #\return (cut s (+ p 1) (car ps)))) + (self ps)) + (a (cut s (+ p 1))))) + (cons -1 (positions #\newline s))))) + +(def slices (s test) + (accum a + ((afn ((p . ps)) + (if ps + (do (a (cut s (+ p 1) (car ps))) + (self ps)) + (a (cut s (+ p 1))))) + (cons -1 (positions test s))))) + +; > (require (lib "uri-codec.ss" "net")) +;> (form-urlencoded-decode "x%ce%bbx") +;"xλx" + +; first byte: 0-7F, 1 char; c2-df 2; e0-ef 3, f0-f4 4. + +; Fixed for utf8 by pc. + +(def urldecode (s) + (tostring + (forlen i s + (caselet c (s i) + #\+ (writec #\space) + #\% (do (when (> (- (len s) i) 2) + (writeb (int (cut s (+ i 1) (+ i 3)) 16))) + (++ i 2)) + (writec c))))) + +(def urlencode (s) + (tostring + (each c s + (writec #\%) + (let i (int c) + (if (< i 16) (writec #\0)) + (pr (coerce i 'string 16)))))) + +(mac litmatch (pat string (o start 0)) + (w/uniq (gstring gstart) + `(with (,gstring ,string ,gstart ,start) + (unless (> (+ ,gstart ,(len pat)) (len ,gstring)) + (and ,@(let acc nil + (forlen i pat + (push `(is ,(pat i) (,gstring (+ ,gstart ,i))) + acc)) + (rev acc))))))) + +; litmatch would be cleaner if map worked for string and integer args: + +; ,@(map (fn (n c) +; `(is ,c (,gstring (+ ,gstart ,n)))) +; (len pat) +; pat) + +(mac endmatch (pat string) + (w/uniq (gstring glen) + `(withs (,gstring ,string ,glen (len ,gstring)) + (unless (> ,(len pat) (len ,gstring)) + (and ,@(let acc nil + (forlen i pat + (push `(is ,(pat (- (len pat) 1 i)) + (,gstring (- ,glen 1 ,i))) + acc)) + (rev acc))))))) + +(def posmatch (pat seq (o start 0)) + (catch + (if (isa pat 'fn) + (for i start (- (len seq) 1) + (when (pat (seq i)) (throw i))) + (for i start (- (len seq) (len pat)) + (when (headmatch pat seq i) (throw i)))) + nil)) + +(def headmatch (pat seq (o start 0)) + (let p (len pat) + ((afn (i) + (or (is i p) + (and (is (pat i) (seq (+ i start))) + (self (+ i 1))))) + 0))) + +(def begins (seq pat (o start 0)) + (unless (len> pat (- (len seq) start)) + (headmatch pat seq start))) + +(def subst (new old seq) + (let boundary (+ (- (len seq) (len old)) 1) + (tostring + (forlen i seq + (if (and (< i boundary) (headmatch old seq i)) + (do (++ i (- (len old) 1)) + (pr new)) + (pr (seq i))))))) + +(def multisubst (pairs seq) + (tostring + (forlen i seq + (iflet (old new) (find [begins seq (car _) i] pairs) + (do (++ i (- (len old) 1)) + (pr new)) + (pr (seq i)))))) + +; not a good name + +(def findsubseq (pat seq (o start 0)) + (if (< (- (len seq) start) (len pat)) + nil + (if (headmatch pat seq start) + start + (findsubseq pat seq (+ start 1))))) + +(def blank (s) (~find ~whitec s)) + +(def nonblank (s) (unless (blank s) s)) + +(def trim (s (o where 'both) (o test whitec)) + (withs (f (testify test) + p1 (pos ~f s)) + (if p1 + (cut s + (if (in where 'front 'both) p1 0) + (when (in where 'end 'both) + (let i (- (len s) 1) + (while (and (> i p1) (f (s i))) + (-- i)) + (+ i 1)))) + ""))) + +(def num (n (o digits 2) (o trail-zeros nil) (o init-zero nil)) + (withs (comma + (fn (i) + (tostring + (map [apply pr (rev _)] + (rev (intersperse '(#\,) + (tuples (rev (coerce (string i) 'cons)) + 3)))))) + abrep + (let a (abs n) + (if (< digits 1) + (comma (roundup a)) + (exact a) + (string (comma a) + (when (and trail-zeros (> digits 0)) + (string "." (newstring digits #\0)))) + (withs (d (expt 10 digits) + m (/ (roundup (* a d)) d) + i (trunc m) + r (abs (trunc (- (* m d) (* i d))))) + (+ (if (is i 0) + (if (or init-zero (is r 0)) "0" "") + (comma i)) + (withs (rest (string r) + padded (+ (newstring (- digits (len rest)) #\0) + rest) + final (if trail-zeros + padded + (trim padded 'end [is _ #\0]))) + (string (unless (empty final) ".") + final))))))) + (if (and (< n 0) (find [and (digit _) (isnt _ #\0)] abrep)) + (+ "-" abrep) + abrep))) + + +; English + +(def pluralize (n str) + (if (or (is n 1) (single n)) + str + (string str "s"))) + +(def plural (n x) + (string n #\ (pluralize n x))) + + +; http://www.eki.ee/letter/chardata.cgi?HTML4=1 +; http://jrgraphix.net/research/unicode_blocks.php?block=1 +; http://home.tiscali.nl/t876506/utf8tbl.html +; http://www.fileformat.info/info/unicode/block/latin_supplement/utf8test.htm +; http://en.wikipedia.org/wiki/Utf-8 +; http://unicode.org/charts/charindex2.html -- cgit v1.2.3