aboutsummaryrefslogtreecommitdiffstats
path: root/skate/reduce_test.go
diff options
context:
space:
mode:
Diffstat (limited to 'skate/reduce_test.go')
-rw-r--r--skate/reduce_test.go451
1 files changed, 451 insertions, 0 deletions
diff --git a/skate/reduce_test.go b/skate/reduce_test.go
new file mode 100644
index 0000000..501d8cd
--- /dev/null
+++ b/skate/reduce_test.go
@@ -0,0 +1,451 @@
+package skate
+
+import (
+ "bytes"
+ "io/ioutil"
+ "reflect"
+ "testing"
+
+ "git.archive.org/martin/cgraph/skate/atomic"
+ "git.archive.org/martin/cgraph/skate/xio"
+ "github.com/kr/pretty"
+)
+
+func TestLineColumn(t *testing.T) {
+ var cases = []struct {
+ line string
+ sep string
+ column int
+ result string
+ }{
+ {"", "", 2, ""},
+ {"1 2 3", " ", 1, "1"},
+ {"1 2 3", " ", 2, "2"},
+ {"1 2 3", " ", 3, "3"},
+ {"1 2 3", " ", 4, ""},
+ {"1 2 3", "\t", 1, "1 2 3"},
+ }
+ for _, c := range cases {
+ result := CutSep(c.line, c.sep, c.column)
+ if result != c.result {
+ t.Fatalf("got %v, want %v", result, c.result)
+ }
+ }
+}
+
+func TestCutBatch(t *testing.T) {
+ var cases = []struct {
+ lines []string
+ column int
+ result []string
+ }{
+ {
+ []string{},
+ 1,
+ nil,
+ },
+ {
+ []string{},
+ 9,
+ nil,
+ },
+ {
+ []string{"1\t2\n", "3\t4\n"},
+ 2,
+ []string{"2", "4"},
+ },
+ }
+ for _, c := range cases {
+ result := CutBatch(c.lines, c.column)
+ if !reflect.DeepEqual(result, c.result) {
+ t.Fatalf("got %v (%d), want %v (%d)",
+ result, len(result), c.result, len(c.result))
+ }
+ }
+}
+
+func TestUniqueMatches(t *testing.T) {
+ var cases = []struct {
+ about string
+ docs []string
+ result []*BiblioRef
+ err error
+ }{
+ {
+ about: "missing fields are ignored",
+ docs: []string{`{}`},
+ result: []*BiblioRef{&BiblioRef{}},
+ err: nil,
+ },
+ {
+ about: "a single doc is passed on",
+ docs: []string{`{
+ "_id": "s1_0",
+ "source_release_ident": "s1",
+ "target_release_ident": "t1"}`},
+ result: []*BiblioRef{&BiblioRef{
+ Key: "s1_0",
+ SourceReleaseIdent: "s1",
+ TargetReleaseIdent: "t1",
+ }},
+ err: nil,
+ },
+ {
+ about: "we want to keep the exact match, if available",
+ docs: []string{`
+ {"_id": "s1_0",
+ "source_release_ident": "s1",
+ "target_release_ident": "t1",
+ "match_status": "fuzzy"}`,
+ `{"_id": "s1_1",
+ "source_release_ident": "s1",
+ "target_release_ident": "t1",
+ "match_status": "exact"}`,
+ },
+ result: []*BiblioRef{&BiblioRef{
+ Key: "s1_1",
+ SourceReleaseIdent: "s1",
+ TargetReleaseIdent: "t1",
+ MatchStatus: "exact",
+ }},
+ err: nil,
+ },
+ {
+ about: "if both are exact, we just take (any) one",
+ docs: []string{`
+ {"_id": "s1_0",
+ "source_release_ident": "s1",
+ "target_release_ident": "t1",
+ "match_status": "exact",
+ "match_reason": "a"}`,
+ `{"_id": "s1_1",
+ "source_release_ident": "s1",
+ "target_release_ident": "t1",
+ "match_status": "exact",
+ "match_reason": "b"}`,
+ },
+ result: []*BiblioRef{&BiblioRef{
+ Key: "s1_1",
+ SourceReleaseIdent: "s1",
+ TargetReleaseIdent: "t1",
+ MatchStatus: "exact",
+ MatchReason: "b",
+ }},
+ err: nil,
+ },
+ {
+ about: "regression; a buggy sort?",
+ docs: []string{`
+ {"_id": "s1_0",
+ "source_release_ident": "s1",
+ "target_release_ident": "t1",
+ "match_status": "exact",
+ "match_reason": "a"}`,
+ `{"_id": "s1_1",
+ "source_release_ident": "s1",
+ "target_release_ident": "t1",
+ "match_status": "fuzzy",
+ "match_reason": "b"}`,
+ },
+ result: []*BiblioRef{&BiblioRef{
+ Key: "s1_0",
+ SourceReleaseIdent: "s1",
+ TargetReleaseIdent: "t1",
+ MatchStatus: "exact",
+ MatchReason: "a",
+ }},
+ err: nil,
+ },
+ }
+ for _, c := range cases {
+ result, err := uniqueMatches(c.docs, &statsAugment{})
+ if err != c.err {
+ t.Fatalf("got %v, want %v (%s)", err, c.err, c.about)
+ }
+ if !reflect.DeepEqual(result, c.result) {
+ t.Fatalf("got %#v, want %#v (%s)",
+ pretty.Sprint(result),
+ pretty.Sprint(c.result), c.about)
+ }
+ }
+}
+
+func TestMatchedRefsExtend(t *testing.T) {
+ var cases = []struct {
+ matched []*BiblioRef
+ refs []*Ref
+ result []*BiblioRef
+ }{
+ {
+ matched: []*BiblioRef{},
+ refs: []*Ref{},
+ result: []*BiblioRef{},
+ },
+ {
+ matched: []*BiblioRef{
+ &BiblioRef{
+ RefIndex: 2,
+ RefKey: "K2",
+ },
+ },
+ refs: []*Ref{},
+ result: []*BiblioRef{
+ &BiblioRef{
+ RefIndex: 2,
+ RefKey: "K2",
+ },
+ },
+ },
+ {
+ matched: []*BiblioRef{
+ &BiblioRef{
+ SourceReleaseIdent: "pud5shsflfgrth77lmlernavjm",
+ RefIndex: 2,
+ RefKey: "K2",
+ },
+ },
+ refs: []*Ref{
+ &Ref{
+ ReleaseIdent: "0000",
+ Biblio: Biblio{
+ Title: "Title",
+ },
+ Index: 3,
+ Key: "K3",
+ },
+ },
+ result: []*BiblioRef{
+ &BiblioRef{
+ SourceReleaseIdent: "pud5shsflfgrth77lmlernavjm",
+ RefIndex: 2,
+ RefKey: "K2",
+ },
+ &BiblioRef{
+ Key: "0000_3",
+ SourceReleaseIdent: "0000",
+ RefIndex: 3,
+ RefKey: "K3",
+ MatchStatus: StatusUnmatched.Short(),
+ MatchReason: ReasonUnknown.Short(),
+ SourceYear: "0",
+ },
+ },
+ },
+ {
+ matched: []*BiblioRef{
+ &BiblioRef{
+ SourceReleaseIdent: "pud5shsflfgrth77lmlernavjm",
+ RefIndex: 2,
+ RefKey: "K2",
+ },
+ },
+ refs: []*Ref{
+ &Ref{
+ ReleaseIdent: "0000",
+ Biblio: Biblio{
+ Title: "Title",
+ },
+ Index: 2,
+ Key: "K2",
+ },
+ },
+ result: []*BiblioRef{
+ &BiblioRef{
+ SourceReleaseIdent: "pud5shsflfgrth77lmlernavjm",
+ RefIndex: 2,
+ RefKey: "K2",
+ },
+ },
+ },
+ }
+ for i, c := range cases {
+ result := matchedRefsExtend(c.matched, c.refs, &statsAugment{})
+ for _, v := range result {
+ v.IndexedTs = "" // we do not want to mock out time, now
+ }
+ if !reflect.DeepEqual(result, c.result) {
+ t.Fatalf("[%d]: got %v, want %v (%v)",
+ i+1, result, c.result, pretty.Diff(result, c.result))
+ }
+ }
+}
+
+func TestRemoveSelfLinks(t *testing.T) {
+ var cases = []struct {
+ brefs []*BiblioRef
+ result []*BiblioRef
+ }{
+ {
+ brefs: nil,
+ result: nil,
+ },
+ {
+ brefs: []*BiblioRef{},
+ result: []*BiblioRef{},
+ },
+ {
+ brefs: []*BiblioRef{
+ &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "a"},
+ &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "b"},
+ },
+ result: []*BiblioRef{
+ &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "b"},
+ },
+ },
+ {
+ brefs: []*BiblioRef{
+ &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "a"},
+ &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "a"},
+ &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "a"},
+ &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "b"},
+ },
+ result: []*BiblioRef{
+ &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "b"},
+ },
+ },
+ }
+ for i, c := range cases {
+ result := removeSelfLinks(c.brefs)
+ if !reflect.DeepEqual(result, c.result) {
+ t.Fatalf("[%d]: got %v, want %v (%v)",
+ i, result, c.result, pretty.Diff(result, c.result))
+ }
+ }
+}
+
+func TestDeduplicateBrefs(t *testing.T) {
+ var cases = []struct {
+ brefs []*BiblioRef
+ result []*BiblioRef
+ }{
+ {
+ brefs: nil,
+ result: nil,
+ },
+ {
+ brefs: []*BiblioRef{},
+ result: []*BiblioRef{},
+ },
+ {
+ brefs: []*BiblioRef{
+ &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()},
+ &BiblioRef{Key: "123", MatchStatus: StatusExact.Short()},
+ },
+ result: []*BiblioRef{
+ &BiblioRef{Key: "123", MatchStatus: StatusExact.Short()},
+ },
+ },
+ {
+ brefs: []*BiblioRef{
+ &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()},
+ &BiblioRef{Key: "123", MatchStatus: StatusUnmatched.Short()},
+ },
+ result: []*BiblioRef{
+ &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()},
+ },
+ },
+ {
+ brefs: []*BiblioRef{
+ &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()},
+ &BiblioRef{Key: "123", MatchStatus: StatusWeak.Short()},
+ },
+ result: []*BiblioRef{
+ &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()},
+ },
+ },
+ {
+ brefs: []*BiblioRef{
+ &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()},
+ &BiblioRef{Key: "123", MatchStatus: StatusAmbiguous.Short()},
+ },
+ result: []*BiblioRef{
+ &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()},
+ },
+ },
+ }
+ for i, c := range cases {
+ result := deduplicateBrefs(c.brefs)
+ if !reflect.DeepEqual(result, c.result) {
+ t.Fatalf("[%d]: got %v, want %v (%v)",
+ i, result, c.result, pretty.Diff(result, c.result))
+ }
+ }
+}
+
+func TestZippyExact(t *testing.T) {
+ var cases = []struct {
+ a, b, out string
+ err error
+ }{
+ {
+ a: "testdata/zippy/cE00a.json",
+ b: "testdata/zippy/cE00b.json",
+ out: "testdata/zippy/cE00r.json",
+ err: nil,
+ },
+ {
+ a: "testdata/zippy/cE01a.json",
+ b: "testdata/zippy/cE01b.json",
+ out: "testdata/zippy/cE01r.json",
+ err: nil,
+ },
+ {
+ a: "testdata/zippy/cE02a.json",
+ b: "testdata/zippy/cE02b.json",
+ out: "testdata/zippy/cE02r.json",
+ err: nil,
+ },
+ }
+ for i, c := range cases {
+ a, b, err := xio.OpenTwo(c.a, c.b)
+ if err != nil {
+ t.Errorf("failed to open test files: %v, %v", c.a, c.b)
+ }
+ var (
+ buf bytes.Buffer
+ matchResult = MatchResult{Status: StatusExact, Reason: ReasonDOI}
+ )
+ err = ZippyExact(a, b, matchResult, &buf)
+ if err != c.err {
+ t.Errorf("[%d] got %v, want %v", i, err, c.err)
+ }
+ ok, err := equalsFilename(&buf, c.out)
+ if err != nil {
+ t.Errorf("failed to open test file: %v", c.out)
+ }
+ if !ok {
+ filename, err := tempWriteFile(&buf)
+ if err != nil {
+ t.Logf("could not write temp file")
+ }
+ t.Errorf("[%d] output mismatch (buffer length=%d, content=%v), want %v", i, buf.Len(), filename, c.out)
+ }
+ }
+}
+
+// equalsFilename returns true, if the contents of a given buffer matches the
+// contents of a file given by filename.
+func equalsFilename(buf *bytes.Buffer, filename string) (bool, error) {
+ b, err := ioutil.ReadFile(filename)
+ if err != nil {
+ return false, err
+ }
+ bb := buf.Bytes()
+ if len(bb) == 0 && len(b) == 0 {
+ return true, nil
+ }
+ return reflect.DeepEqual(b, bb), nil
+}
+
+// tempWriteFile writes the content of a buffer to a temporary file and returns
+// its path.
+func tempWriteFile(buf *bytes.Buffer) (string, error) {
+ f, err := ioutil.TempFile("", "skate-test-*")
+ if err != nil {
+ return "", err
+ }
+ if err = atomic.WriteFile(f.Name(), buf.Bytes(), 0755); err != nil {
+ return "", err
+ }
+ return f.Name(), nil
+}