package skate import ( "bytes" "io/ioutil" "reflect" "testing" "time" "git.archive.org/martin/cgraph/skate/atomic" "git.archive.org/martin/cgraph/skate/xio" "github.com/kr/pretty" ) func TestLineColumn(t *testing.T) { var cases = []struct { line string sep string column int result string }{ {"", "", 2, ""}, {"1 2 3", " ", 1, "1"}, {"1 2 3", " ", 2, "2"}, {"1 2 3", " ", 3, "3"}, {"1 2 3", " ", 4, ""}, {"1 2 3", "\t", 1, "1 2 3"}, } for _, c := range cases { result := CutSep(c.line, c.sep, c.column) if result != c.result { t.Fatalf("got %v, want %v", result, c.result) } } } func TestUniqueMatches(t *testing.T) { var cases = []struct { about string docs []string result []*BiblioRef err error }{ { about: "missing fields are ignored", docs: []string{`{}`}, result: []*BiblioRef{&BiblioRef{}}, err: nil, }, { about: "a single doc is passed on", docs: []string{`{ "_id": "s1_0", "source_release_ident": "s1", "target_release_ident": "t1"}`}, result: []*BiblioRef{&BiblioRef{ Key: "s1_0", SourceReleaseIdent: "s1", TargetReleaseIdent: "t1", }}, err: nil, }, { about: "we want to keep the exact match, if available", docs: []string{` {"_id": "s1_0", "source_release_ident": "s1", "target_release_ident": "t1", "match_status": "fuzzy"}`, `{"_id": "s1_1", "source_release_ident": "s1", "target_release_ident": "t1", "match_status": "exact"}`, }, result: []*BiblioRef{&BiblioRef{ Key: "s1_1", SourceReleaseIdent: "s1", TargetReleaseIdent: "t1", MatchStatus: "exact", }}, err: nil, }, { about: "if both are exact, we just take (any) one", docs: []string{` {"_id": "s1_0", "source_release_ident": "s1", "target_release_ident": "t1", "match_status": "exact", "match_reason": "a"}`, `{"_id": "s1_1", "source_release_ident": "s1", "target_release_ident": "t1", "match_status": "exact", "match_reason": "b"}`, }, result: []*BiblioRef{&BiblioRef{ Key: "s1_1", SourceReleaseIdent: "s1", TargetReleaseIdent: "t1", MatchStatus: "exact", MatchReason: "b", }}, err: nil, }, { about: "regression; a buggy sort?", docs: []string{` {"_id": "s1_0", "source_release_ident": "s1", "target_release_ident": "t1", "match_status": "exact", "match_reason": "a"}`, `{"_id": "s1_1", "source_release_ident": "s1", "target_release_ident": "t1", "match_status": "fuzzy", "match_reason": "b"}`, }, result: []*BiblioRef{&BiblioRef{ Key: "s1_0", SourceReleaseIdent: "s1", TargetReleaseIdent: "t1", MatchStatus: "exact", MatchReason: "a", }}, err: nil, }, } for _, c := range cases { result, err := uniqueMatches(c.docs, &statsAugment{}) if err != c.err { t.Fatalf("got %v, want %v (%s)", err, c.err, c.about) } if !reflect.DeepEqual(result, c.result) { t.Fatalf("got %#v, want %#v (%s)", pretty.Sprint(result), pretty.Sprint(c.result), c.about) } } } func TestMatchedRefsExtend(t *testing.T) { var cases = []struct { matched []*BiblioRef refs []*Ref result []*BiblioRef }{ { matched: []*BiblioRef{}, refs: []*Ref{}, result: []*BiblioRef{}, }, { matched: []*BiblioRef{ &BiblioRef{ RefIndex: 2, RefKey: "K2", }, }, refs: []*Ref{}, result: []*BiblioRef{ &BiblioRef{ RefIndex: 2, RefKey: "K2", }, }, }, { matched: []*BiblioRef{ &BiblioRef{ SourceReleaseIdent: "pud5shsflfgrth77lmlernavjm", RefIndex: 2, RefKey: "K2", }, }, refs: []*Ref{ &Ref{ ReleaseIdent: "0000", Biblio: Biblio{ Title: "Title", }, Index: 3, Key: "K3", }, }, result: []*BiblioRef{ &BiblioRef{ SourceReleaseIdent: "pud5shsflfgrth77lmlernavjm", RefIndex: 2, RefKey: "K2", }, &BiblioRef{ Key: "0000_3", SourceReleaseIdent: "0000", RefIndex: 3, RefKey: "K3", MatchStatus: StatusUnmatched.Short(), MatchReason: ReasonUnknown.Short(), SourceYear: "0", }, }, }, { matched: []*BiblioRef{ &BiblioRef{ SourceReleaseIdent: "pud5shsflfgrth77lmlernavjm", RefIndex: 2, RefKey: "K2", }, }, refs: []*Ref{ &Ref{ ReleaseIdent: "0000", Biblio: Biblio{ Title: "Title", }, Index: 2, Key: "K2", }, }, result: []*BiblioRef{ &BiblioRef{ SourceReleaseIdent: "pud5shsflfgrth77lmlernavjm", RefIndex: 2, RefKey: "K2", }, }, }, } for i, c := range cases { result := matchedRefsExtend(c.matched, c.refs, &statsAugment{}) for _, v := range result { v.IndexedTs = "" // we do not want to mock out time, now } if !reflect.DeepEqual(result, c.result) { t.Fatalf("[%d]: got %v, want %v (%v)", i+1, result, c.result, pretty.Diff(result, c.result)) } } } func TestRemoveSelfLinks(t *testing.T) { var cases = []struct { brefs []*BiblioRef result []*BiblioRef }{ { brefs: nil, result: nil, }, { brefs: []*BiblioRef{}, result: []*BiblioRef{}, }, { brefs: []*BiblioRef{ &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "a"}, &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "b"}, }, result: []*BiblioRef{ &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "b"}, }, }, { brefs: []*BiblioRef{ &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "a"}, &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "a"}, &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "a"}, &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "b"}, }, result: []*BiblioRef{ &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "b"}, }, }, } for i, c := range cases { result := removeSelfLinks(c.brefs) if !reflect.DeepEqual(result, c.result) { t.Fatalf("[%d]: got %v, want %v (%v)", i, result, c.result, pretty.Diff(result, c.result)) } } } func TestDeduplicateBrefs(t *testing.T) { var cases = []struct { brefs []*BiblioRef result []*BiblioRef }{ { brefs: nil, result: nil, }, { brefs: []*BiblioRef{}, result: []*BiblioRef{}, }, { brefs: []*BiblioRef{ &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()}, &BiblioRef{Key: "123", MatchStatus: StatusExact.Short()}, }, result: []*BiblioRef{ &BiblioRef{Key: "123", MatchStatus: StatusExact.Short()}, }, }, { brefs: []*BiblioRef{ &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()}, &BiblioRef{Key: "123", MatchStatus: StatusUnmatched.Short()}, }, result: []*BiblioRef{ &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()}, }, }, { brefs: []*BiblioRef{ &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()}, &BiblioRef{Key: "123", MatchStatus: StatusWeak.Short()}, }, result: []*BiblioRef{ &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()}, }, }, { brefs: []*BiblioRef{ &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()}, &BiblioRef{Key: "123", MatchStatus: StatusAmbiguous.Short()}, }, result: []*BiblioRef{ &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()}, }, }, } for i, c := range cases { result := deduplicateBrefs(c.brefs) if !reflect.DeepEqual(result, c.result) { t.Fatalf("[%d]: got %v, want %v (%v)", i, result, c.result, pretty.Diff(result, c.result)) } } } func TestZippyExact(t *testing.T) { var cases = []struct { a, b, out string err error }{ { a: "testdata/zippy/cE00a.json", b: "testdata/zippy/cE00b.json", out: "testdata/zippy/cE00r.json", err: nil, }, { a: "testdata/zippy/cE01a.json", b: "testdata/zippy/cE01b.json", out: "testdata/zippy/cE01r.json", err: nil, }, { a: "testdata/zippy/cE02a.json", b: "testdata/zippy/cE02b.json", out: "testdata/zippy/cE02r.json", err: nil, }, } timeNow = func() time.Time { return time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC) } for i, c := range cases { a, b, err := xio.OpenTwo(c.a, c.b) if err != nil { t.Errorf("failed to open test files: %v, %v", c.a, c.b) } var ( buf bytes.Buffer matchResult = MatchResult{Status: StatusExact, Reason: ReasonDOI} ) err = ZippyExact(a, b, matchResult, &buf) if err != c.err { t.Errorf("[%d] got %v, want %v", i, err, c.err) } ok, err := equalsFilename(&buf, c.out) if err != nil { t.Errorf("failed to open test file: %v", c.out) } if !ok { filename, err := tempWriteFile(&buf) if err != nil { t.Logf("could not write temp file") } t.Errorf("[%d] output mismatch (buffer length=%d, content=%v), want %v", i, buf.Len(), filename, c.out) } } } // equalsFilename returns true, if the contents of a given buffer matches the // contents of a file given by filename. func equalsFilename(buf *bytes.Buffer, filename string) (bool, error) { b, err := ioutil.ReadFile(filename) if err != nil { return false, err } bb := buf.Bytes() if len(bb) == 0 && len(b) == 0 { return true, nil } return reflect.DeepEqual(b, bb), nil } // tempWriteFile writes the content of a buffer to a temporary file and returns // its path. func tempWriteFile(buf *bytes.Buffer) (string, error) { f, err := ioutil.TempFile("", "skate-test-*") if err != nil { return "", err } if err = atomic.WriteFile(f.Name(), buf.Bytes(), 0755); err != nil { return "", err } return f.Name(), nil } func TestCutBatch(t *testing.T) { var cases = []struct { lines []string column int result []string }{ { []string{}, 1, nil, }, { []string{}, 9, nil, }, { []string{"1\t2\n", "3\t4\n"}, 2, []string{"2", "4"}, }, } for _, c := range cases { result := CutBatch(c.lines, c.column) if !reflect.DeepEqual(result, c.result) { t.Fatalf("got %v (%d), want %v (%d)", result, len(result), c.result, len(c.result)) } } } func TestCutSep(t *testing.T) { var cases = []struct { line string sep string column int result string }{ {"", "\t", 1, ""}, {"", "\t", 2, ""}, {"a\tb", "\t", 1, "a"}, {"a\tb", "\t", 2, "b"}, {"a\tb", "\t", 3, ""}, {"a\t\tb", "\t", 1, "a"}, {"a\t\tb", "\t", 2, ""}, {"a\t\tb", "\t", 3, "b"}, {"\tb", "\t", 1, ""}, } for _, c := range cases { result := CutSep(c.line, c.sep, c.column) if !reflect.DeepEqual(result, c.result) { t.Fatalf("got %v, want %v", result, c.result) } } }