diff options
Diffstat (limited to 'skate/reduce_test.go')
-rw-r--r-- | skate/reduce_test.go | 451 |
1 files changed, 451 insertions, 0 deletions
diff --git a/skate/reduce_test.go b/skate/reduce_test.go new file mode 100644 index 0000000..501d8cd --- /dev/null +++ b/skate/reduce_test.go @@ -0,0 +1,451 @@ +package skate + +import ( + "bytes" + "io/ioutil" + "reflect" + "testing" + + "git.archive.org/martin/cgraph/skate/atomic" + "git.archive.org/martin/cgraph/skate/xio" + "github.com/kr/pretty" +) + +func TestLineColumn(t *testing.T) { + var cases = []struct { + line string + sep string + column int + result string + }{ + {"", "", 2, ""}, + {"1 2 3", " ", 1, "1"}, + {"1 2 3", " ", 2, "2"}, + {"1 2 3", " ", 3, "3"}, + {"1 2 3", " ", 4, ""}, + {"1 2 3", "\t", 1, "1 2 3"}, + } + for _, c := range cases { + result := CutSep(c.line, c.sep, c.column) + if result != c.result { + t.Fatalf("got %v, want %v", result, c.result) + } + } +} + +func TestCutBatch(t *testing.T) { + var cases = []struct { + lines []string + column int + result []string + }{ + { + []string{}, + 1, + nil, + }, + { + []string{}, + 9, + nil, + }, + { + []string{"1\t2\n", "3\t4\n"}, + 2, + []string{"2", "4"}, + }, + } + for _, c := range cases { + result := CutBatch(c.lines, c.column) + if !reflect.DeepEqual(result, c.result) { + t.Fatalf("got %v (%d), want %v (%d)", + result, len(result), c.result, len(c.result)) + } + } +} + +func TestUniqueMatches(t *testing.T) { + var cases = []struct { + about string + docs []string + result []*BiblioRef + err error + }{ + { + about: "missing fields are ignored", + docs: []string{`{}`}, + result: []*BiblioRef{&BiblioRef{}}, + err: nil, + }, + { + about: "a single doc is passed on", + docs: []string{`{ + "_id": "s1_0", + "source_release_ident": "s1", + "target_release_ident": "t1"}`}, + result: []*BiblioRef{&BiblioRef{ + Key: "s1_0", + SourceReleaseIdent: "s1", + TargetReleaseIdent: "t1", + }}, + err: nil, + }, + { + about: "we want to keep the exact match, if available", + docs: []string{` + {"_id": "s1_0", + "source_release_ident": "s1", + "target_release_ident": "t1", + "match_status": "fuzzy"}`, + `{"_id": "s1_1", + "source_release_ident": "s1", + "target_release_ident": "t1", + "match_status": "exact"}`, + }, + result: []*BiblioRef{&BiblioRef{ + Key: "s1_1", + SourceReleaseIdent: "s1", + TargetReleaseIdent: "t1", + MatchStatus: "exact", + }}, + err: nil, + }, + { + about: "if both are exact, we just take (any) one", + docs: []string{` + {"_id": "s1_0", + "source_release_ident": "s1", + "target_release_ident": "t1", + "match_status": "exact", + "match_reason": "a"}`, + `{"_id": "s1_1", + "source_release_ident": "s1", + "target_release_ident": "t1", + "match_status": "exact", + "match_reason": "b"}`, + }, + result: []*BiblioRef{&BiblioRef{ + Key: "s1_1", + SourceReleaseIdent: "s1", + TargetReleaseIdent: "t1", + MatchStatus: "exact", + MatchReason: "b", + }}, + err: nil, + }, + { + about: "regression; a buggy sort?", + docs: []string{` + {"_id": "s1_0", + "source_release_ident": "s1", + "target_release_ident": "t1", + "match_status": "exact", + "match_reason": "a"}`, + `{"_id": "s1_1", + "source_release_ident": "s1", + "target_release_ident": "t1", + "match_status": "fuzzy", + "match_reason": "b"}`, + }, + result: []*BiblioRef{&BiblioRef{ + Key: "s1_0", + SourceReleaseIdent: "s1", + TargetReleaseIdent: "t1", + MatchStatus: "exact", + MatchReason: "a", + }}, + err: nil, + }, + } + for _, c := range cases { + result, err := uniqueMatches(c.docs, &statsAugment{}) + if err != c.err { + t.Fatalf("got %v, want %v (%s)", err, c.err, c.about) + } + if !reflect.DeepEqual(result, c.result) { + t.Fatalf("got %#v, want %#v (%s)", + pretty.Sprint(result), + pretty.Sprint(c.result), c.about) + } + } +} + +func TestMatchedRefsExtend(t *testing.T) { + var cases = []struct { + matched []*BiblioRef + refs []*Ref + result []*BiblioRef + }{ + { + matched: []*BiblioRef{}, + refs: []*Ref{}, + result: []*BiblioRef{}, + }, + { + matched: []*BiblioRef{ + &BiblioRef{ + RefIndex: 2, + RefKey: "K2", + }, + }, + refs: []*Ref{}, + result: []*BiblioRef{ + &BiblioRef{ + RefIndex: 2, + RefKey: "K2", + }, + }, + }, + { + matched: []*BiblioRef{ + &BiblioRef{ + SourceReleaseIdent: "pud5shsflfgrth77lmlernavjm", + RefIndex: 2, + RefKey: "K2", + }, + }, + refs: []*Ref{ + &Ref{ + ReleaseIdent: "0000", + Biblio: Biblio{ + Title: "Title", + }, + Index: 3, + Key: "K3", + }, + }, + result: []*BiblioRef{ + &BiblioRef{ + SourceReleaseIdent: "pud5shsflfgrth77lmlernavjm", + RefIndex: 2, + RefKey: "K2", + }, + &BiblioRef{ + Key: "0000_3", + SourceReleaseIdent: "0000", + RefIndex: 3, + RefKey: "K3", + MatchStatus: StatusUnmatched.Short(), + MatchReason: ReasonUnknown.Short(), + SourceYear: "0", + }, + }, + }, + { + matched: []*BiblioRef{ + &BiblioRef{ + SourceReleaseIdent: "pud5shsflfgrth77lmlernavjm", + RefIndex: 2, + RefKey: "K2", + }, + }, + refs: []*Ref{ + &Ref{ + ReleaseIdent: "0000", + Biblio: Biblio{ + Title: "Title", + }, + Index: 2, + Key: "K2", + }, + }, + result: []*BiblioRef{ + &BiblioRef{ + SourceReleaseIdent: "pud5shsflfgrth77lmlernavjm", + RefIndex: 2, + RefKey: "K2", + }, + }, + }, + } + for i, c := range cases { + result := matchedRefsExtend(c.matched, c.refs, &statsAugment{}) + for _, v := range result { + v.IndexedTs = "" // we do not want to mock out time, now + } + if !reflect.DeepEqual(result, c.result) { + t.Fatalf("[%d]: got %v, want %v (%v)", + i+1, result, c.result, pretty.Diff(result, c.result)) + } + } +} + +func TestRemoveSelfLinks(t *testing.T) { + var cases = []struct { + brefs []*BiblioRef + result []*BiblioRef + }{ + { + brefs: nil, + result: nil, + }, + { + brefs: []*BiblioRef{}, + result: []*BiblioRef{}, + }, + { + brefs: []*BiblioRef{ + &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "a"}, + &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "b"}, + }, + result: []*BiblioRef{ + &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "b"}, + }, + }, + { + brefs: []*BiblioRef{ + &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "a"}, + &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "a"}, + &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "a"}, + &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "b"}, + }, + result: []*BiblioRef{ + &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "b"}, + }, + }, + } + for i, c := range cases { + result := removeSelfLinks(c.brefs) + if !reflect.DeepEqual(result, c.result) { + t.Fatalf("[%d]: got %v, want %v (%v)", + i, result, c.result, pretty.Diff(result, c.result)) + } + } +} + +func TestDeduplicateBrefs(t *testing.T) { + var cases = []struct { + brefs []*BiblioRef + result []*BiblioRef + }{ + { + brefs: nil, + result: nil, + }, + { + brefs: []*BiblioRef{}, + result: []*BiblioRef{}, + }, + { + brefs: []*BiblioRef{ + &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()}, + &BiblioRef{Key: "123", MatchStatus: StatusExact.Short()}, + }, + result: []*BiblioRef{ + &BiblioRef{Key: "123", MatchStatus: StatusExact.Short()}, + }, + }, + { + brefs: []*BiblioRef{ + &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()}, + &BiblioRef{Key: "123", MatchStatus: StatusUnmatched.Short()}, + }, + result: []*BiblioRef{ + &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()}, + }, + }, + { + brefs: []*BiblioRef{ + &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()}, + &BiblioRef{Key: "123", MatchStatus: StatusWeak.Short()}, + }, + result: []*BiblioRef{ + &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()}, + }, + }, + { + brefs: []*BiblioRef{ + &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()}, + &BiblioRef{Key: "123", MatchStatus: StatusAmbiguous.Short()}, + }, + result: []*BiblioRef{ + &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()}, + }, + }, + } + for i, c := range cases { + result := deduplicateBrefs(c.brefs) + if !reflect.DeepEqual(result, c.result) { + t.Fatalf("[%d]: got %v, want %v (%v)", + i, result, c.result, pretty.Diff(result, c.result)) + } + } +} + +func TestZippyExact(t *testing.T) { + var cases = []struct { + a, b, out string + err error + }{ + { + a: "testdata/zippy/cE00a.json", + b: "testdata/zippy/cE00b.json", + out: "testdata/zippy/cE00r.json", + err: nil, + }, + { + a: "testdata/zippy/cE01a.json", + b: "testdata/zippy/cE01b.json", + out: "testdata/zippy/cE01r.json", + err: nil, + }, + { + a: "testdata/zippy/cE02a.json", + b: "testdata/zippy/cE02b.json", + out: "testdata/zippy/cE02r.json", + err: nil, + }, + } + for i, c := range cases { + a, b, err := xio.OpenTwo(c.a, c.b) + if err != nil { + t.Errorf("failed to open test files: %v, %v", c.a, c.b) + } + var ( + buf bytes.Buffer + matchResult = MatchResult{Status: StatusExact, Reason: ReasonDOI} + ) + err = ZippyExact(a, b, matchResult, &buf) + if err != c.err { + t.Errorf("[%d] got %v, want %v", i, err, c.err) + } + ok, err := equalsFilename(&buf, c.out) + if err != nil { + t.Errorf("failed to open test file: %v", c.out) + } + if !ok { + filename, err := tempWriteFile(&buf) + if err != nil { + t.Logf("could not write temp file") + } + t.Errorf("[%d] output mismatch (buffer length=%d, content=%v), want %v", i, buf.Len(), filename, c.out) + } + } +} + +// equalsFilename returns true, if the contents of a given buffer matches the +// contents of a file given by filename. +func equalsFilename(buf *bytes.Buffer, filename string) (bool, error) { + b, err := ioutil.ReadFile(filename) + if err != nil { + return false, err + } + bb := buf.Bytes() + if len(bb) == 0 && len(b) == 0 { + return true, nil + } + return reflect.DeepEqual(b, bb), nil +} + +// tempWriteFile writes the content of a buffer to a temporary file and returns +// its path. +func tempWriteFile(buf *bytes.Buffer) (string, error) { + f, err := ioutil.TempFile("", "skate-test-*") + if err != nil { + return "", err + } + if err = atomic.WriteFile(f.Name(), buf.Bytes(), 0755); err != nil { + return "", err + } + return f.Name(), nil +} |