diff options
Diffstat (limited to 'differ.c')
-rw-r--r-- | differ.c | 594 |
1 files changed, 594 insertions, 0 deletions
diff --git a/differ.c b/differ.c new file mode 100644 index 0000000..43b2c3f --- /dev/null +++ b/differ.c @@ -0,0 +1,594 @@ +/* Copyright (C) 2003, 2004 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this software; see the file COPYING. If not, write to + * the Free Software Foundation, 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + * As a special exception, the Free Software Foundation gives permission + * for additional uses of the text contained in its release of SCM. + * + * The exception is that, if you link the SCM library with other files + * to produce an executable, this does not by itself cause the + * resulting executable to be covered by the GNU General Public License. + * Your use of that executable is in no way restricted on account of + * linking the SCM library code into it. + * + * This exception does not however invalidate any other reasons why + * the executable file might be covered by the GNU General Public License. + * + * This exception applies only to the code released by the + * Free Software Foundation under the name SCM. If you copy + * code from other Free Software Foundation releases into a copy of + * SCM, as the General Public License permits, the exception does + * not apply to the code that you add in this way. To avoid misleading + * anyone as to the status of such modified files, you must delete + * this exception notice from them. + * + * If you write modifications of your own for SCM, it is your choice + * whether to permit this exception to apply to your modifications. + * If you do not wish that, delete this exception notice. + */ + +/* "differ.c" Linear-space O(NP) sequence comparison. */ +/* Author: Aubrey Jaffer */ + +#include <stdlib.h> +/* #include <stdio.h> */ + +#include "scm.h" + +SCM_EXPORT SCM array_dims P((SCM ra)); + +typedef int (*int_function) (); + +typedef struct { + void* (*subarray) (); + int_function array_refsEql_P; + int_function array_refs_revEql_P; +} fp_procs; + +int fp_compare(int *fp,int fpoff,int *cc,void *a,int m,void *b,int n,int_function array_refsEql_P,int p_lim); + +int fp_run(int *fp,int fpoff,int k,void *a,int m,void *b,int n,int_function array_refsEql_P,int *cc,int p); + +int diff_mid_split(int m,int n,int *rr,int *cc,int cost); + +void fp_init(int *fp,int fpoff,int fill,int mindx,int maxdx); + +int diff_divide_and_conquer(int *fp,int fpoff,int *ccrr,void *a,int start_a,int end_a,void *b,int start_b,int end_b,int *edits,int edx,int epo,fp_procs *procs,int p_lim); + +int diff2et(int *fp,int fpoff,int *ccrr,void *a,int start_a,int end_a,void *b,int start_b,int end_b,int *edits,int edx,int epo,fp_procs *procs,int p_lim); + +int diff2ez(int *fp,int fpoff,int *ccrr,void *a,int start_a,int end_a,void *b,int start_b,int end_b,int *edits,int edx,int epo,fp_procs *procs,int p_lim); + +void check_cost(unsigned char *name,int est,int cost); + +SCM_EXPORT SCM diff2edits P((SCM Edits, SCM Fp, SCM Args)); + +SCM_EXPORT SCM diff2editlen P((SCM Fp, SCM A, SCM Args)); + +# define MAX(a,b) (a<b ? b : a) +# define MIN(a,b) (a>b ? b : a) + +long *long_subarray(ra, start, end) + long *ra; int start, end; +{ + return &(ra[start]); +} +short *short_subarray(ra, start, end) + short *ra; int start, end; +{ + return &(ra[start]); +} +char *char_subarray(ra, start, end) + char *ra; int start, end; +{ + return &(ra[start]); +} + +long long_array_refsEql_P(a, x, m, b, y, n) + long *a; int x, m; long *b; int y, n; +{ + return (a[x])==(b[y]); +} +long long_array_refs_revEql_P(a, x, m, b, y, n) + long *a; int x, m; long *b; int y, n; +{ +/* if (x > m) printf("long x(%d) > m(%d)\n", x, m); */ +/* if (y > n) printf("long y(%d) > n(%d)\n", y, n); */ + return a[(m)-(x)-1]==b[(n)-(y)-1]; +} +short short_array_refsEql_P(a, x, m, b, y, n) + short *a; int x, m; short *b; int y, n; +{ + return (a[x])==(b[y]); +} +short short_array_refs_revEql_P(a, x, m, b, y, n) + short *a; int x, m; short *b; int y, n; +{ +/* if (x > m) printf("short x(%d) > m(%d)\n", x, m); */ +/* if (y > n) printf("short y(%d) > n(%d)\n", y, n); */ + return a[(m)-(x)-1]==b[(n)-(y)-1]; +} +char char_array_refsEql_P(a, x, m, b, y, n) + char *a; int x, m; char *b; int y, n; +{ + return (a[x])==(b[y]); +} +char char_array_refs_revEql_P(a, x, m, b, y, n) + char *a; int x, m; char *b; int y, n; +{ +/* if (x > m) printf("char x(%d) > m(%d)\n", x, m); */ +/* if (y > n) printf("char y(%d) > n(%d)\n", y, n); */ + return a[(m)-(x)-1]==b[(n)-(y)-1]; +} + +fp_procs long_procs = + {long_subarray, long_array_refsEql_P, long_array_refs_revEql_P}; +fp_procs short_procs = + {short_subarray, short_array_refsEql_P, short_array_refs_revEql_P}; +fp_procs char_procs = + {char_subarray, char_array_refsEql_P, char_array_refs_revEql_P}; + +int fp_compare(fp, fpoff, cc, a, m, b, n, array_refsEql_P, p_lim) + int *fp; + int fpoff; + int *cc; + void *a; + int m; + void *b; + int n; + int_function array_refsEql_P; + int p_lim; +{ + int delta = (n)-(m); + { + int p = 0; +L_loop: + { + int k = -(p); + while (!((k)>=(delta))) { + fp_run(fp, fpoff, k, a, m, b, n, array_refsEql_P, cc, p); + { + k = 1+(k); + } + } + } + { + int k = (delta)+(p); + while (!((k)<=(delta))) { + fp_run(fp, fpoff, k, a, m, b, n, array_refsEql_P, cc, p); + { + k = -1+(k); + } + } + } + { + int fpval = fp_run(fp, fpoff, delta, a, m, b, n, array_refsEql_P, cc, p); + if ((!(cc)) + && ((n)<=(fpval))) + return (delta)+(2*(p)); + else if ((!(0 > (p_lim))) + && ((p)>=(p_lim))) + return -1; + else { + p = 1+(p); + goto L_loop; + } + } + } +} + +/* Traces runs of matches until they end; then set fp[k]=y. */ +/* If CC is supplied, set each CC[y] = MIN(CC[y], cost) for run. */ +/* Returns furthest y reached. */ + +int fp_run(fp, fpoff, k, a, m, b, n, array_refsEql_P, cc, p) + int *fp; + int fpoff; + int k; + void *a; + int m; + void *b; + int n; + int_function array_refsEql_P; + int *cc; + int p; +{ + int cost = (k)+(p)+(p); + { + int y = MAX((fp[ -1+(k)+(fpoff)])+1, fp[1+(k)+(fpoff)]); +L_snloop: + { + int x = (y)-(k); + if ((cc) + && ((y)<=(n))) + { + int xcst = (m)-(x); + if (0 > (xcst)) + ; + else cc[y] = MIN((xcst)+(cost), cc[y]); + } + if (((x)<(m)) + && ((y)<(n)) + && (array_refsEql_P(a, x, m, b, y, n))) + { + y = 1+(y); + goto L_snloop; + } + else { + fp[(fpoff)+(k)] = y; + return y; + } + } + } +} + +int diff_mid_split(m, n, rr, cc, cost) + int m; + int n; + int *rr; + int *cc; + int cost; +{ + { + int cdx = 1+((n)/2); + int rdx = (n)/2; +L_loop: + if ((cost)==((cc[rdx])+(rr[(n)-(rdx)]))) + return rdx; + else if ((cost)==((cc[cdx])+(rr[(n)-(cdx)]))) + return cdx; + else { + cdx = 1+(cdx); + rdx = -1+(rdx); + goto L_loop; + } + } +} + + +void fp_init(fp, fpoff, fill, mindx, maxdx) + int *fp; + int fpoff; + int fill; + int mindx; + int maxdx; +{ + int mlim = (fpoff)+(mindx); + { + int idx = (fpoff)+(maxdx); + while (!((idx)<(mlim))) { + fp[idx] = fill; + { + idx = -1+(idx); + } + } + } +} + +/* Split A[start-a..end-a] (shorter array) into smaller and smaller chunks. */ +/* EDX is index into EDITS. */ +/* EPO is insert/delete polarity (+1 or -1) */ + +int diff_divide_and_conquer(fp, fpoff, ccrr, a, start_a, end_a, b, start_b, end_b, edits, edx, epo, procs, p_lim) + int *fp; + int fpoff; + int *ccrr; + void *a; + int start_a; + int end_a; + void *b; + int start_b; + int end_b; + int *edits; + int edx; + int epo; + fp_procs *procs; + int p_lim; +{ + int mid_a = ((start_a)+(end_a))/2; + int len_b = (end_b)-(start_b); + int len_a = (end_a)-(start_a); + { + int tcst = (p_lim)+(p_lim)+((len_b)-(len_a)); + int *cc = &(ccrr[0]); + int *rr = &(ccrr[(len_b)+1]); + int m2 = (end_a)-(mid_a); + int m1 = (mid_a)-(start_a); + fp_init(cc, 0, (len_a)+(len_b), 0, len_b); + fp_init(fp, fpoff, -1, -(1+(p_lim)), 1+(p_lim)+((len_b)-(m1))); + fp_compare(fp, fpoff, cc, procs->subarray(a, start_a, mid_a), m1, procs->subarray(b, start_b, end_b), len_b, procs->array_refsEql_P, MIN(p_lim, len_a)); + fp_init(rr, 0, (len_a)+(len_b), 0, len_b); + fp_init(fp, fpoff, -1, -(1+(p_lim)), 1+(p_lim)+((len_b)-(m2))); + fp_compare(fp, fpoff, rr, procs->subarray(a, mid_a, end_a), m2, procs->subarray(b, start_b, end_b), len_b, procs->array_refs_revEql_P, MIN(p_lim, len_a)); + { + int b_splt = diff_mid_split(len_a, len_b, rr, cc, tcst); + int est_c = cc[b_splt]; + int est_r = rr[(len_b)-(b_splt)]; + check_cost("cc", est_c, diff2et(fp, fpoff, ccrr, a, start_a, mid_a, b, start_b, (start_b)+(b_splt), edits, edx, epo, procs, ((est_c)-((b_splt)-((mid_a)-(start_a))))/2)); + check_cost("rr", est_r, diff2et(fp, fpoff, ccrr, a, mid_a, end_a, b, (start_b)+(b_splt), end_b, edits, (est_c)+(edx), epo, procs, ((est_r)-(((len_b)-(b_splt))-((end_a)-(mid_a))))/2)); + return (est_c)+(est_r); + } + } +} + +/* Trim; then diff sub-arrays; either one longer. Returns edit-length */ + +int diff2et(fp, fpoff, ccrr, a, start_a, end_a, b, start_b, end_b, edits, edx, epo, procs, p_lim) + int *fp; + int fpoff; + int *ccrr; + void *a; + int start_a; + int end_a; + void *b; + int start_b; + int end_b; + int *edits; + int edx; + int epo; + fp_procs *procs; + int p_lim; +{ + { + int bdx = -1+(end_b); + int adx = -1+(end_a); + while (((start_b)<=(bdx)) + && ((start_a)<=(adx)) + && (procs->array_refsEql_P(a, adx, 0, b, bdx, 0))) { + { + bdx = -1+(bdx); + adx = -1+(adx); + } + } + { + int bsx = start_b; + int asx = start_a; + while (((bsx)<(bdx)) + && ((asx)<(adx)) + && (procs->array_refsEql_P(a, asx, 0, b, bsx, 0))) { + { + bsx = 1+(bsx); + asx = 1+(asx); + } + } + { + int delta = ((bdx)-(bsx))-((adx)-(asx)); + if (0 > (delta)) + return diff2ez(fp, fpoff, ccrr, b, bsx, 1+(bdx), a, asx, 1+(adx), edits, edx, -(epo), procs, (delta)+(p_lim)); + else return diff2ez(fp, fpoff, ccrr, a, asx, 1+(adx), b, bsx, 1+(bdx), edits, edx, epo, procs, p_lim); + } + } + } +} + +/* Diff sub-arrays, A not longer than B. Returns edit-length */ + +int diff2ez(fp, fpoff, ccrr, a, start_a, end_a, b, start_b, end_b, edits, edx, epo, procs, p_lim) + int *fp; + int fpoff; + int *ccrr; + void *a; + int start_a; + int end_a; + void *b; + int start_b; + int end_b; + int *edits; + int edx; + int epo; + fp_procs *procs; + int p_lim; +{ + int len_a = (end_a)-(start_a); + int len_b = (end_b)-(start_b); + if (!(p_lim)) + if ((len_b)==(len_a)) + return 0; + else { + int T_edx = edx; + int adx = start_a; + int bdx = start_b; + int edx = T_edx; +L_loop: + if ((bdx)>=(end_b)) + return (len_b)-(len_a); + else if ((adx)>=(end_a)) + { + int T_edx = edx; + int idx = bdx; + int edx = T_edx; + while (!((idx)>=(end_b))) { + edits[edx] = (epo)*(1+(idx)); + { + idx = 1+(idx); + edx = 1+(edx); + } + } + return (len_b)-(len_a); + } + else if (procs->array_refsEql_P(a, adx, 0, b, bdx, 0)) + { + adx = 1+(adx); + bdx = 1+(bdx); + goto L_loop; + } + else { + edits[edx] = (epo)*(1+(bdx)); + { + bdx = 1+(bdx); + edx = 1+(edx); + goto L_loop; + } + } + } + else if ((len_a)<=(p_lim)) + { + int idx = start_a; + int jdx = start_b; + while (!(((idx)>=(end_a)) + && ((jdx)>=(end_b)))) { + if ((jdx)<(end_b)) + { + edits[edx] = (epo)*(1+(jdx)); + edx = 1+(edx); + } + if ((idx)<(end_a)) + { + edits[edx] = (epo)*( -1-(idx)); + edx = 1+(edx); + } + { + idx = 1+(idx); + jdx = 1+(jdx); + } + } + return (len_a)+(len_b); + } + else return diff_divide_and_conquer(fp, fpoff, ccrr, a, start_a, end_a, b, start_b, end_b, edits, edx, epo, procs, p_lim); +} + +void check_cost(name, est, cost) + unsigned char *name; + int est; + int cost; +{ + if ((est)!=(cost)) { +/* fprintf(stderr, "%s: cost check failed %d != %d\\n", name, est, cost); */ + wta(MAKINUM(cost), "cost check failed", name); + } +} + +/* Routines interfacing API layer to algorithms. */ + +/* Return the fp_procs appropriate for SCM array prototype */ +fp_procs *raprot2procs(prot, s_name) + SCM prot; + char *s_name; +{ + fp_procs *procs; + if (ICHRP(prot)) procs = &char_procs; + else if (MAKINUM(16L)==prot) procs = &short_procs; + else if (MAKINUM(-16L)==prot) procs = &short_procs; + else if (MAKINUM(32L)==prot) procs = &long_procs; + else if (MAKINUM(-32L)==prot) procs = &long_procs; + else if (EOL==prot) procs = &long_procs; + else wta(prot, (char *)ARG3, s_name); + return procs; +} + +static SCM list_of_0; + +void* array2addr(RA, prot, pos, s_name) + SCM RA, prot; + char *pos; + char s_name[]; +{ + ASRTER(BOOL_T==arrayp(RA, UNDEFINED) && array_prot(RA)==prot, RA, + pos, s_name); + return (void*)scm_addr(cons(RA, list_of_0), s_name); +} + +/* A not longer than B (M <= N) */ +static char s_d2es[] = "diff2edits!"; +static char s_incomp[] = "incompatible array types"; +SCM diff2edits(Edits, Fp, Args) + SCM Edits, Fp, Args; /* Ccrr, A, B; */ +{ + SCM aprot, bprot; + int *edits; + int est; + int *fp; + int *ccrr; + void *a, *b; + int m, n; + fp_procs *procs; + ASRTER(3==ilength(Args), Args, WNA, s_d2es); + edits = array2addr(Edits, MAKINUM(-32), ARG1, s_d2es); + fp = array2addr(Fp, MAKINUM(-32), ARG2, s_d2es); + ccrr = array2addr(CAR(Args), MAKINUM(-32), ARG3, s_d2es); + Args = CDR(Args); + aprot = array_prot(CAR(Args)); + a = array2addr(CAR(Args), aprot, ARG4, s_d2es); + ASRTER(NFALSEP(aprot), aprot, ARG4, s_d2es); + m = INUM(CAR(array_dims(CAR(Args)))); + Args = CDR(Args); + bprot = array_prot(CAR(Args)); + b = array2addr(CAR(Args), bprot, ARG5, s_d2es); + ASRTER(NFALSEP(bprot), bprot, ARG5, s_d2es); + n = INUM(CAR(array_dims(CAR(Args)))); + ASRTER(aprot==bprot, bprot, s_incomp, s_d2es); + procs = raprot2procs(aprot, s_d2es); + est = INUM(CAR(array_dims(Edits))); + { + int p_lim = ((est)-((n)-(m)))/2; + check_cost(s_d2es, est, + diff2et(fp, 1+(p_lim), + ccrr, a, 0, m, b, 0, n, edits, 0, 1, procs, p_lim)); + return UNSPECIFIED; + } +} + +/* A not longer than B (M <= N) */ + +static char s_d2el[] = "diff2editlen"; +SCM diff2editlen(Fp, A, Args) + SCM Fp, A, Args; /* B, P_lim */ +{ + SCM aprot, bprot; + fp_procs *procs; + int p_lim; + int m, n; + int *fp; + void *a, *b; + ASRTER(2==ilength(Args), Args, WNA, s_d2el); + fp = array2addr(Fp, MAKINUM(-32), ARG1, s_d2el); + aprot = array_prot(A); + a = array2addr(A, aprot, ARG2, s_d2el); + ASRTER(NFALSEP(aprot), aprot, ARG2, s_d2el); + m = INUM(CAR(array_dims(A))); + bprot = array_prot(CAR(Args)); + b = array2addr(CAR(Args), bprot, ARG3, s_d2el); + ASRTER(NFALSEP(bprot), bprot, ARG3, s_d2el); + n = INUM(CAR(array_dims(CAR(Args)))); + Args = CDR(Args); + ASRTER(INUMP(CAR(Args)), CAR(Args), ARG4, s_d2el); + p_lim = INUM(CAR(Args)); + ASRTER(aprot==bprot, bprot, s_incomp, s_d2el); + procs = raprot2procs(aprot, s_d2el); + { + int maxdx = 0 > (p_lim) + ?1+(n) + :1+(p_lim)+((n)-(m)); + int mindx = 0 > (p_lim) + ?-(1+(m)) + :-(1+(p_lim)); + int res; + fp_init(fp, -(mindx), -1, mindx, maxdx); + res = fp_compare(fp, -(mindx), 0, a, m, b, n, + procs->array_refsEql_P, p_lim); + return (-1==res) ? BOOL_F : MAKINUM(res); + } +} + +static char s_Idiffer[] = "Idiffer.scm"; +void init_differ() +{ + list_of_0 = cons(INUM0, EOL); + scm_gc_protect(list_of_0); + make_subr(s_d2es, tc7_lsubr_2, diff2edits); + make_subr(s_d2el, tc7_lsubr_2, diff2editlen); + if (scm_ldprog(s_Idiffer)) + wta(*loc_errobj, "couldn't init", s_Idiffer); +} |