summaryrefslogtreecommitdiffstats
path: root/soundex.scm
blob: 9853401e0fb94812be5da9de59af54fa3c9bd6f0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
;"soundex.scm" Original SOUNDEX algorithm.
;From jjb@isye.gatech.edu Mon May  2 22:29:43 1994
;
; This code is in the public domain.

; Taken from Knuth, Vol. 3 "Sorting and searching", pp 391--2

;;; 2003-01-26  L.J. Buitinck converted to use dotted pairs for codes.

(require 'common-list-functions)
;@
(define SOUNDEX
  (let* ((letters-to-omit
          '(#\A #\E #\H #\I #\O #\U #\W #\Y))
         (codes
          '((#\B . #\1)
            (#\F . #\1)
            (#\P . #\1)
            (#\V . #\1)
            ;;
            (#\C . #\2)
            (#\G . #\2)
            (#\J . #\2)
            (#\K . #\2)
            (#\Q . #\2)
            (#\S . #\2)
            (#\X . #\2)
            (#\Z . #\2)
            ;;
            (#\D . #\3)
            (#\T . #\3)
            ;;
            (#\L . #\4)
            ;;
            (#\M . #\5)
            (#\N . #\5)
            ;;
            (#\R . #\6)))
         (xform
          (lambda (c)
            (let ((code (assv c codes)))
              (if code
                  (cdr code)
                  c)))))
    (lambda (name)
      (let ((char-list
             (map char-upcase
                  (remove-if (lambda (c)
                               (not (char-alphabetic? c)))
                             (string->list name)))))
        (if (null? char-list)
            name
            (let* ( ;; Replace letters except first with codes:
                   (n1 (cons (car char-list) (map xform char-list)))
                   ;; If 2 or more letter with same code are adjacent
                   ;; in the original name, omit all but the first:
                   (n2 (let loop ((chars n1))
                         (cond ((null? (cdr chars))
                                chars)
                               (else
                                (if (char=? (xform (car chars))
                                            (cadr chars))
                                    (loop (cdr chars))
                                    (cons (car chars) (loop (cdr chars))))))))
                   ;; Omit vowels and similar letters, except first:
                   (n3 (cons (car char-list)
                             (remove-if
                              (lambda (c)
                                (memv c letters-to-omit))
                              (cdr n2)))))
              ;;
              ;; pad with 0's or drop rightmost digits until of form "annn":
              (let loop ((rev-chars (reverse n3)))
                (let ((len (length rev-chars)))
                  (cond ((= 4 len)
                         (list->string (reverse rev-chars)))
                        ((> 4 len)
                         (loop (cons #\0 rev-chars)))
                        ((< 4 len)
                         (loop (cdr rev-chars))))))))))))