summaryrefslogtreecommitdiffstats
path: root/soundex.scm
diff options
context:
space:
mode:
Diffstat (limited to 'soundex.scm')
-rw-r--r--soundex.scm82
1 files changed, 82 insertions, 0 deletions
diff --git a/soundex.scm b/soundex.scm
new file mode 100644
index 0000000..eb3a542
--- /dev/null
+++ b/soundex.scm
@@ -0,0 +1,82 @@
+;"soundex.scm" Original SOUNDEX algorithm.
+;From jjb@isye.gatech.edu Mon May 2 22:29:43 1994
+;
+; This code is in the public domain.
+
+;Date: Mon, 2 May 94 13:45:39 -0500
+
+; Taken from Knuth, Vol. 3 "Sorting and searching", pp 391--2
+
+(require 'common-list-functions)
+
+(define SOUNDEX
+ (let* ((letters-to-omit
+ (list #\A #\E #\H #\I #\O #\U #\W #\Y))
+ (codes
+ (list (list #\B #\1)
+ (list #\F #\1)
+ (list #\P #\1)
+ (list #\V #\1)
+ ;
+ (list #\C #\2)
+ (list #\G #\2)
+ (list #\J #\2)
+ (list #\K #\2)
+ (list #\Q #\2)
+ (list #\S #\2)
+ (list #\X #\2)
+ (list #\Z #\2)
+ ;
+ (list #\D #\3)
+ (list #\T #\3)
+ ;
+ (list #\L #\4)
+ ;
+ (list #\M #\5)
+ (list #\N #\5)
+ ;
+ (list #\R #\6)))
+ (xform
+ (lambda (c)
+ (let ((code (assq c codes)))
+ (if code
+ (cadr code)
+ c)))))
+ (lambda (name)
+ (let ((char-list
+ (map char-upcase
+ (remove-if (lambda (c)
+ (not (char-alphabetic? c)))
+ (string->list name)))))
+ (if (null? char-list)
+ name
+ (let* (; Replace letters except first with codes:
+ (n1 (cons (car char-list) (map xform char-list)))
+ ; If 2 or more letter with same code are adjacent
+ ; in the original name, omit all but the first:
+ (n2 (let loop ((chars n1))
+ (cond ((null? (cdr chars))
+ chars)
+ (else
+ (if (char=? (xform (car chars))
+ (cadr chars))
+ (loop (cdr chars))
+ (cons (car chars) (loop (cdr chars))))))))
+ ; Omit vowels and similar letters, except first:
+ (n3 (cons (car char-list)
+ (remove-if
+ (lambda (c)
+ (memq c letters-to-omit))
+ (cdr n2)))))
+ ;
+ ; pad with 0's or drop rightmost digits until of form "annn":
+ (let loop ((rev-chars (reverse n3)))
+ (let ((len (length rev-chars)))
+ (cond ((= 4 len)
+ (list->string (reverse rev-chars)))
+ ((> 4 len)
+ (loop (cons #\0 rev-chars)))
+ ((< 4 len)
+ (loop (cdr rev-chars))))))))))))
+
+