diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2017-02-20 00:05:25 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2017-02-20 00:05:25 -0800 |
commit | 8ffbc2df0fde83082610149d24e594c1cd879f4a (patch) | |
tree | a2be9aad5101c5e450ad141d15c514bc9c2a2963 /soundex.scm | |
download | slib-8ffbc2df0fde83082610149d24e594c1cd879f4a.tar.gz slib-8ffbc2df0fde83082610149d24e594c1cd879f4a.zip |
Import Upstream version 2a6upstream/2a6
Diffstat (limited to 'soundex.scm')
-rw-r--r-- | soundex.scm | 82 |
1 files changed, 82 insertions, 0 deletions
diff --git a/soundex.scm b/soundex.scm new file mode 100644 index 0000000..eb3a542 --- /dev/null +++ b/soundex.scm @@ -0,0 +1,82 @@ +;"soundex.scm" Original SOUNDEX algorithm. +;From jjb@isye.gatech.edu Mon May 2 22:29:43 1994 +; +; This code is in the public domain. + +;Date: Mon, 2 May 94 13:45:39 -0500 + +; Taken from Knuth, Vol. 3 "Sorting and searching", pp 391--2 + +(require 'common-list-functions) + +(define SOUNDEX + (let* ((letters-to-omit + (list #\A #\E #\H #\I #\O #\U #\W #\Y)) + (codes + (list (list #\B #\1) + (list #\F #\1) + (list #\P #\1) + (list #\V #\1) + ; + (list #\C #\2) + (list #\G #\2) + (list #\J #\2) + (list #\K #\2) + (list #\Q #\2) + (list #\S #\2) + (list #\X #\2) + (list #\Z #\2) + ; + (list #\D #\3) + (list #\T #\3) + ; + (list #\L #\4) + ; + (list #\M #\5) + (list #\N #\5) + ; + (list #\R #\6))) + (xform + (lambda (c) + (let ((code (assq c codes))) + (if code + (cadr code) + c))))) + (lambda (name) + (let ((char-list + (map char-upcase + (remove-if (lambda (c) + (not (char-alphabetic? c))) + (string->list name))))) + (if (null? char-list) + name + (let* (; Replace letters except first with codes: + (n1 (cons (car char-list) (map xform char-list))) + ; If 2 or more letter with same code are adjacent + ; in the original name, omit all but the first: + (n2 (let loop ((chars n1)) + (cond ((null? (cdr chars)) + chars) + (else + (if (char=? (xform (car chars)) + (cadr chars)) + (loop (cdr chars)) + (cons (car chars) (loop (cdr chars)))))))) + ; Omit vowels and similar letters, except first: + (n3 (cons (car char-list) + (remove-if + (lambda (c) + (memq c letters-to-omit)) + (cdr n2))))) + ; + ; pad with 0's or drop rightmost digits until of form "annn": + (let loop ((rev-chars (reverse n3))) + (let ((len (length rev-chars))) + (cond ((= 4 len) + (list->string (reverse rev-chars))) + ((> 4 len) + (loop (cons #\0 rev-chars))) + ((< 4 len) + (loop (cdr rev-chars)))))))))))) + + |