summaryrefslogtreecommitdiffstats
path: root/differ.c
diff options
context:
space:
mode:
Diffstat (limited to 'differ.c')
-rw-r--r--differ.c594
1 files changed, 594 insertions, 0 deletions
diff --git a/differ.c b/differ.c
new file mode 100644
index 0000000..43b2c3f
--- /dev/null
+++ b/differ.c
@@ -0,0 +1,594 @@
+/* Copyright (C) 2003, 2004 Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this software; see the file COPYING. If not, write to
+ * the Free Software Foundation, 59 Temple Place, Suite 330, Boston, MA 02111, USA.
+ *
+ * As a special exception, the Free Software Foundation gives permission
+ * for additional uses of the text contained in its release of SCM.
+ *
+ * The exception is that, if you link the SCM library with other files
+ * to produce an executable, this does not by itself cause the
+ * resulting executable to be covered by the GNU General Public License.
+ * Your use of that executable is in no way restricted on account of
+ * linking the SCM library code into it.
+ *
+ * This exception does not however invalidate any other reasons why
+ * the executable file might be covered by the GNU General Public License.
+ *
+ * This exception applies only to the code released by the
+ * Free Software Foundation under the name SCM. If you copy
+ * code from other Free Software Foundation releases into a copy of
+ * SCM, as the General Public License permits, the exception does
+ * not apply to the code that you add in this way. To avoid misleading
+ * anyone as to the status of such modified files, you must delete
+ * this exception notice from them.
+ *
+ * If you write modifications of your own for SCM, it is your choice
+ * whether to permit this exception to apply to your modifications.
+ * If you do not wish that, delete this exception notice.
+ */
+
+/* "differ.c" Linear-space O(NP) sequence comparison. */
+/* Author: Aubrey Jaffer */
+
+#include <stdlib.h>
+/* #include <stdio.h> */
+
+#include "scm.h"
+
+SCM_EXPORT SCM array_dims P((SCM ra));
+
+typedef int (*int_function) ();
+
+typedef struct {
+ void* (*subarray) ();
+ int_function array_refsEql_P;
+ int_function array_refs_revEql_P;
+} fp_procs;
+
+int fp_compare(int *fp,int fpoff,int *cc,void *a,int m,void *b,int n,int_function array_refsEql_P,int p_lim);
+
+int fp_run(int *fp,int fpoff,int k,void *a,int m,void *b,int n,int_function array_refsEql_P,int *cc,int p);
+
+int diff_mid_split(int m,int n,int *rr,int *cc,int cost);
+
+void fp_init(int *fp,int fpoff,int fill,int mindx,int maxdx);
+
+int diff_divide_and_conquer(int *fp,int fpoff,int *ccrr,void *a,int start_a,int end_a,void *b,int start_b,int end_b,int *edits,int edx,int epo,fp_procs *procs,int p_lim);
+
+int diff2et(int *fp,int fpoff,int *ccrr,void *a,int start_a,int end_a,void *b,int start_b,int end_b,int *edits,int edx,int epo,fp_procs *procs,int p_lim);
+
+int diff2ez(int *fp,int fpoff,int *ccrr,void *a,int start_a,int end_a,void *b,int start_b,int end_b,int *edits,int edx,int epo,fp_procs *procs,int p_lim);
+
+void check_cost(unsigned char *name,int est,int cost);
+
+SCM_EXPORT SCM diff2edits P((SCM Edits, SCM Fp, SCM Args));
+
+SCM_EXPORT SCM diff2editlen P((SCM Fp, SCM A, SCM Args));
+
+# define MAX(a,b) (a<b ? b : a)
+# define MIN(a,b) (a>b ? b : a)
+
+long *long_subarray(ra, start, end)
+ long *ra; int start, end;
+{
+ return &(ra[start]);
+}
+short *short_subarray(ra, start, end)
+ short *ra; int start, end;
+{
+ return &(ra[start]);
+}
+char *char_subarray(ra, start, end)
+ char *ra; int start, end;
+{
+ return &(ra[start]);
+}
+
+long long_array_refsEql_P(a, x, m, b, y, n)
+ long *a; int x, m; long *b; int y, n;
+{
+ return (a[x])==(b[y]);
+}
+long long_array_refs_revEql_P(a, x, m, b, y, n)
+ long *a; int x, m; long *b; int y, n;
+{
+/* if (x > m) printf("long x(%d) > m(%d)\n", x, m); */
+/* if (y > n) printf("long y(%d) > n(%d)\n", y, n); */
+ return a[(m)-(x)-1]==b[(n)-(y)-1];
+}
+short short_array_refsEql_P(a, x, m, b, y, n)
+ short *a; int x, m; short *b; int y, n;
+{
+ return (a[x])==(b[y]);
+}
+short short_array_refs_revEql_P(a, x, m, b, y, n)
+ short *a; int x, m; short *b; int y, n;
+{
+/* if (x > m) printf("short x(%d) > m(%d)\n", x, m); */
+/* if (y > n) printf("short y(%d) > n(%d)\n", y, n); */
+ return a[(m)-(x)-1]==b[(n)-(y)-1];
+}
+char char_array_refsEql_P(a, x, m, b, y, n)
+ char *a; int x, m; char *b; int y, n;
+{
+ return (a[x])==(b[y]);
+}
+char char_array_refs_revEql_P(a, x, m, b, y, n)
+ char *a; int x, m; char *b; int y, n;
+{
+/* if (x > m) printf("char x(%d) > m(%d)\n", x, m); */
+/* if (y > n) printf("char y(%d) > n(%d)\n", y, n); */
+ return a[(m)-(x)-1]==b[(n)-(y)-1];
+}
+
+fp_procs long_procs =
+ {long_subarray, long_array_refsEql_P, long_array_refs_revEql_P};
+fp_procs short_procs =
+ {short_subarray, short_array_refsEql_P, short_array_refs_revEql_P};
+fp_procs char_procs =
+ {char_subarray, char_array_refsEql_P, char_array_refs_revEql_P};
+
+int fp_compare(fp, fpoff, cc, a, m, b, n, array_refsEql_P, p_lim)
+ int *fp;
+ int fpoff;
+ int *cc;
+ void *a;
+ int m;
+ void *b;
+ int n;
+ int_function array_refsEql_P;
+ int p_lim;
+{
+ int delta = (n)-(m);
+ {
+ int p = 0;
+L_loop:
+ {
+ int k = -(p);
+ while (!((k)>=(delta))) {
+ fp_run(fp, fpoff, k, a, m, b, n, array_refsEql_P, cc, p);
+ {
+ k = 1+(k);
+ }
+ }
+ }
+ {
+ int k = (delta)+(p);
+ while (!((k)<=(delta))) {
+ fp_run(fp, fpoff, k, a, m, b, n, array_refsEql_P, cc, p);
+ {
+ k = -1+(k);
+ }
+ }
+ }
+ {
+ int fpval = fp_run(fp, fpoff, delta, a, m, b, n, array_refsEql_P, cc, p);
+ if ((!(cc))
+ && ((n)<=(fpval)))
+ return (delta)+(2*(p));
+ else if ((!(0 > (p_lim)))
+ && ((p)>=(p_lim)))
+ return -1;
+ else {
+ p = 1+(p);
+ goto L_loop;
+ }
+ }
+ }
+}
+
+/* Traces runs of matches until they end; then set fp[k]=y. */
+/* If CC is supplied, set each CC[y] = MIN(CC[y], cost) for run. */
+/* Returns furthest y reached. */
+
+int fp_run(fp, fpoff, k, a, m, b, n, array_refsEql_P, cc, p)
+ int *fp;
+ int fpoff;
+ int k;
+ void *a;
+ int m;
+ void *b;
+ int n;
+ int_function array_refsEql_P;
+ int *cc;
+ int p;
+{
+ int cost = (k)+(p)+(p);
+ {
+ int y = MAX((fp[ -1+(k)+(fpoff)])+1, fp[1+(k)+(fpoff)]);
+L_snloop:
+ {
+ int x = (y)-(k);
+ if ((cc)
+ && ((y)<=(n)))
+ {
+ int xcst = (m)-(x);
+ if (0 > (xcst))
+ ;
+ else cc[y] = MIN((xcst)+(cost), cc[y]);
+ }
+ if (((x)<(m))
+ && ((y)<(n))
+ && (array_refsEql_P(a, x, m, b, y, n)))
+ {
+ y = 1+(y);
+ goto L_snloop;
+ }
+ else {
+ fp[(fpoff)+(k)] = y;
+ return y;
+ }
+ }
+ }
+}
+
+int diff_mid_split(m, n, rr, cc, cost)
+ int m;
+ int n;
+ int *rr;
+ int *cc;
+ int cost;
+{
+ {
+ int cdx = 1+((n)/2);
+ int rdx = (n)/2;
+L_loop:
+ if ((cost)==((cc[rdx])+(rr[(n)-(rdx)])))
+ return rdx;
+ else if ((cost)==((cc[cdx])+(rr[(n)-(cdx)])))
+ return cdx;
+ else {
+ cdx = 1+(cdx);
+ rdx = -1+(rdx);
+ goto L_loop;
+ }
+ }
+}
+
+
+void fp_init(fp, fpoff, fill, mindx, maxdx)
+ int *fp;
+ int fpoff;
+ int fill;
+ int mindx;
+ int maxdx;
+{
+ int mlim = (fpoff)+(mindx);
+ {
+ int idx = (fpoff)+(maxdx);
+ while (!((idx)<(mlim))) {
+ fp[idx] = fill;
+ {
+ idx = -1+(idx);
+ }
+ }
+ }
+}
+
+/* Split A[start-a..end-a] (shorter array) into smaller and smaller chunks. */
+/* EDX is index into EDITS. */
+/* EPO is insert/delete polarity (+1 or -1) */
+
+int diff_divide_and_conquer(fp, fpoff, ccrr, a, start_a, end_a, b, start_b, end_b, edits, edx, epo, procs, p_lim)
+ int *fp;
+ int fpoff;
+ int *ccrr;
+ void *a;
+ int start_a;
+ int end_a;
+ void *b;
+ int start_b;
+ int end_b;
+ int *edits;
+ int edx;
+ int epo;
+ fp_procs *procs;
+ int p_lim;
+{
+ int mid_a = ((start_a)+(end_a))/2;
+ int len_b = (end_b)-(start_b);
+ int len_a = (end_a)-(start_a);
+ {
+ int tcst = (p_lim)+(p_lim)+((len_b)-(len_a));
+ int *cc = &(ccrr[0]);
+ int *rr = &(ccrr[(len_b)+1]);
+ int m2 = (end_a)-(mid_a);
+ int m1 = (mid_a)-(start_a);
+ fp_init(cc, 0, (len_a)+(len_b), 0, len_b);
+ fp_init(fp, fpoff, -1, -(1+(p_lim)), 1+(p_lim)+((len_b)-(m1)));
+ fp_compare(fp, fpoff, cc, procs->subarray(a, start_a, mid_a), m1, procs->subarray(b, start_b, end_b), len_b, procs->array_refsEql_P, MIN(p_lim, len_a));
+ fp_init(rr, 0, (len_a)+(len_b), 0, len_b);
+ fp_init(fp, fpoff, -1, -(1+(p_lim)), 1+(p_lim)+((len_b)-(m2)));
+ fp_compare(fp, fpoff, rr, procs->subarray(a, mid_a, end_a), m2, procs->subarray(b, start_b, end_b), len_b, procs->array_refs_revEql_P, MIN(p_lim, len_a));
+ {
+ int b_splt = diff_mid_split(len_a, len_b, rr, cc, tcst);
+ int est_c = cc[b_splt];
+ int est_r = rr[(len_b)-(b_splt)];
+ check_cost("cc", est_c, diff2et(fp, fpoff, ccrr, a, start_a, mid_a, b, start_b, (start_b)+(b_splt), edits, edx, epo, procs, ((est_c)-((b_splt)-((mid_a)-(start_a))))/2));
+ check_cost("rr", est_r, diff2et(fp, fpoff, ccrr, a, mid_a, end_a, b, (start_b)+(b_splt), end_b, edits, (est_c)+(edx), epo, procs, ((est_r)-(((len_b)-(b_splt))-((end_a)-(mid_a))))/2));
+ return (est_c)+(est_r);
+ }
+ }
+}
+
+/* Trim; then diff sub-arrays; either one longer. Returns edit-length */
+
+int diff2et(fp, fpoff, ccrr, a, start_a, end_a, b, start_b, end_b, edits, edx, epo, procs, p_lim)
+ int *fp;
+ int fpoff;
+ int *ccrr;
+ void *a;
+ int start_a;
+ int end_a;
+ void *b;
+ int start_b;
+ int end_b;
+ int *edits;
+ int edx;
+ int epo;
+ fp_procs *procs;
+ int p_lim;
+{
+ {
+ int bdx = -1+(end_b);
+ int adx = -1+(end_a);
+ while (((start_b)<=(bdx))
+ && ((start_a)<=(adx))
+ && (procs->array_refsEql_P(a, adx, 0, b, bdx, 0))) {
+ {
+ bdx = -1+(bdx);
+ adx = -1+(adx);
+ }
+ }
+ {
+ int bsx = start_b;
+ int asx = start_a;
+ while (((bsx)<(bdx))
+ && ((asx)<(adx))
+ && (procs->array_refsEql_P(a, asx, 0, b, bsx, 0))) {
+ {
+ bsx = 1+(bsx);
+ asx = 1+(asx);
+ }
+ }
+ {
+ int delta = ((bdx)-(bsx))-((adx)-(asx));
+ if (0 > (delta))
+ return diff2ez(fp, fpoff, ccrr, b, bsx, 1+(bdx), a, asx, 1+(adx), edits, edx, -(epo), procs, (delta)+(p_lim));
+ else return diff2ez(fp, fpoff, ccrr, a, asx, 1+(adx), b, bsx, 1+(bdx), edits, edx, epo, procs, p_lim);
+ }
+ }
+ }
+}
+
+/* Diff sub-arrays, A not longer than B. Returns edit-length */
+
+int diff2ez(fp, fpoff, ccrr, a, start_a, end_a, b, start_b, end_b, edits, edx, epo, procs, p_lim)
+ int *fp;
+ int fpoff;
+ int *ccrr;
+ void *a;
+ int start_a;
+ int end_a;
+ void *b;
+ int start_b;
+ int end_b;
+ int *edits;
+ int edx;
+ int epo;
+ fp_procs *procs;
+ int p_lim;
+{
+ int len_a = (end_a)-(start_a);
+ int len_b = (end_b)-(start_b);
+ if (!(p_lim))
+ if ((len_b)==(len_a))
+ return 0;
+ else {
+ int T_edx = edx;
+ int adx = start_a;
+ int bdx = start_b;
+ int edx = T_edx;
+L_loop:
+ if ((bdx)>=(end_b))
+ return (len_b)-(len_a);
+ else if ((adx)>=(end_a))
+ {
+ int T_edx = edx;
+ int idx = bdx;
+ int edx = T_edx;
+ while (!((idx)>=(end_b))) {
+ edits[edx] = (epo)*(1+(idx));
+ {
+ idx = 1+(idx);
+ edx = 1+(edx);
+ }
+ }
+ return (len_b)-(len_a);
+ }
+ else if (procs->array_refsEql_P(a, adx, 0, b, bdx, 0))
+ {
+ adx = 1+(adx);
+ bdx = 1+(bdx);
+ goto L_loop;
+ }
+ else {
+ edits[edx] = (epo)*(1+(bdx));
+ {
+ bdx = 1+(bdx);
+ edx = 1+(edx);
+ goto L_loop;
+ }
+ }
+ }
+ else if ((len_a)<=(p_lim))
+ {
+ int idx = start_a;
+ int jdx = start_b;
+ while (!(((idx)>=(end_a))
+ && ((jdx)>=(end_b)))) {
+ if ((jdx)<(end_b))
+ {
+ edits[edx] = (epo)*(1+(jdx));
+ edx = 1+(edx);
+ }
+ if ((idx)<(end_a))
+ {
+ edits[edx] = (epo)*( -1-(idx));
+ edx = 1+(edx);
+ }
+ {
+ idx = 1+(idx);
+ jdx = 1+(jdx);
+ }
+ }
+ return (len_a)+(len_b);
+ }
+ else return diff_divide_and_conquer(fp, fpoff, ccrr, a, start_a, end_a, b, start_b, end_b, edits, edx, epo, procs, p_lim);
+}
+
+void check_cost(name, est, cost)
+ unsigned char *name;
+ int est;
+ int cost;
+{
+ if ((est)!=(cost)) {
+/* fprintf(stderr, "%s: cost check failed %d != %d\\n", name, est, cost); */
+ wta(MAKINUM(cost), "cost check failed", name);
+ }
+}
+
+/* Routines interfacing API layer to algorithms. */
+
+/* Return the fp_procs appropriate for SCM array prototype */
+fp_procs *raprot2procs(prot, s_name)
+ SCM prot;
+ char *s_name;
+{
+ fp_procs *procs;
+ if (ICHRP(prot)) procs = &char_procs;
+ else if (MAKINUM(16L)==prot) procs = &short_procs;
+ else if (MAKINUM(-16L)==prot) procs = &short_procs;
+ else if (MAKINUM(32L)==prot) procs = &long_procs;
+ else if (MAKINUM(-32L)==prot) procs = &long_procs;
+ else if (EOL==prot) procs = &long_procs;
+ else wta(prot, (char *)ARG3, s_name);
+ return procs;
+}
+
+static SCM list_of_0;
+
+void* array2addr(RA, prot, pos, s_name)
+ SCM RA, prot;
+ char *pos;
+ char s_name[];
+{
+ ASRTER(BOOL_T==arrayp(RA, UNDEFINED) && array_prot(RA)==prot, RA,
+ pos, s_name);
+ return (void*)scm_addr(cons(RA, list_of_0), s_name);
+}
+
+/* A not longer than B (M <= N) */
+static char s_d2es[] = "diff2edits!";
+static char s_incomp[] = "incompatible array types";
+SCM diff2edits(Edits, Fp, Args)
+ SCM Edits, Fp, Args; /* Ccrr, A, B; */
+{
+ SCM aprot, bprot;
+ int *edits;
+ int est;
+ int *fp;
+ int *ccrr;
+ void *a, *b;
+ int m, n;
+ fp_procs *procs;
+ ASRTER(3==ilength(Args), Args, WNA, s_d2es);
+ edits = array2addr(Edits, MAKINUM(-32), ARG1, s_d2es);
+ fp = array2addr(Fp, MAKINUM(-32), ARG2, s_d2es);
+ ccrr = array2addr(CAR(Args), MAKINUM(-32), ARG3, s_d2es);
+ Args = CDR(Args);
+ aprot = array_prot(CAR(Args));
+ a = array2addr(CAR(Args), aprot, ARG4, s_d2es);
+ ASRTER(NFALSEP(aprot), aprot, ARG4, s_d2es);
+ m = INUM(CAR(array_dims(CAR(Args))));
+ Args = CDR(Args);
+ bprot = array_prot(CAR(Args));
+ b = array2addr(CAR(Args), bprot, ARG5, s_d2es);
+ ASRTER(NFALSEP(bprot), bprot, ARG5, s_d2es);
+ n = INUM(CAR(array_dims(CAR(Args))));
+ ASRTER(aprot==bprot, bprot, s_incomp, s_d2es);
+ procs = raprot2procs(aprot, s_d2es);
+ est = INUM(CAR(array_dims(Edits)));
+ {
+ int p_lim = ((est)-((n)-(m)))/2;
+ check_cost(s_d2es, est,
+ diff2et(fp, 1+(p_lim),
+ ccrr, a, 0, m, b, 0, n, edits, 0, 1, procs, p_lim));
+ return UNSPECIFIED;
+ }
+}
+
+/* A not longer than B (M <= N) */
+
+static char s_d2el[] = "diff2editlen";
+SCM diff2editlen(Fp, A, Args)
+ SCM Fp, A, Args; /* B, P_lim */
+{
+ SCM aprot, bprot;
+ fp_procs *procs;
+ int p_lim;
+ int m, n;
+ int *fp;
+ void *a, *b;
+ ASRTER(2==ilength(Args), Args, WNA, s_d2el);
+ fp = array2addr(Fp, MAKINUM(-32), ARG1, s_d2el);
+ aprot = array_prot(A);
+ a = array2addr(A, aprot, ARG2, s_d2el);
+ ASRTER(NFALSEP(aprot), aprot, ARG2, s_d2el);
+ m = INUM(CAR(array_dims(A)));
+ bprot = array_prot(CAR(Args));
+ b = array2addr(CAR(Args), bprot, ARG3, s_d2el);
+ ASRTER(NFALSEP(bprot), bprot, ARG3, s_d2el);
+ n = INUM(CAR(array_dims(CAR(Args))));
+ Args = CDR(Args);
+ ASRTER(INUMP(CAR(Args)), CAR(Args), ARG4, s_d2el);
+ p_lim = INUM(CAR(Args));
+ ASRTER(aprot==bprot, bprot, s_incomp, s_d2el);
+ procs = raprot2procs(aprot, s_d2el);
+ {
+ int maxdx = 0 > (p_lim)
+ ?1+(n)
+ :1+(p_lim)+((n)-(m));
+ int mindx = 0 > (p_lim)
+ ?-(1+(m))
+ :-(1+(p_lim));
+ int res;
+ fp_init(fp, -(mindx), -1, mindx, maxdx);
+ res = fp_compare(fp, -(mindx), 0, a, m, b, n,
+ procs->array_refsEql_P, p_lim);
+ return (-1==res) ? BOOL_F : MAKINUM(res);
+ }
+}
+
+static char s_Idiffer[] = "Idiffer.scm";
+void init_differ()
+{
+ list_of_0 = cons(INUM0, EOL);
+ scm_gc_protect(list_of_0);
+ make_subr(s_d2es, tc7_lsubr_2, diff2edits);
+ make_subr(s_d2el, tc7_lsubr_2, diff2editlen);
+ if (scm_ldprog(s_Idiffer))
+ wta(*loc_errobj, "couldn't init", s_Idiffer);
+}