package skate import ( "bytes" "reflect" "testing" "github.com/benbjohnson/clock" "github.com/kr/pretty" "gitlab.com/internetarchive/refcat/skate/testutil" "gitlab.com/internetarchive/refcat/skate/xio" ) func TestLineColumn(t *testing.T) { var cases = []struct { line string sep string column int result string }{ {"", "", 2, ""}, {"1 2 3", " ", 1, "1"}, {"1 2 3", " ", 2, "2"}, {"1 2 3", " ", 3, "3"}, {"1 2 3", " ", 4, ""}, {"1 2 3", "\t", 1, "1 2 3"}, } for _, c := range cases { result := CutSep(c.line, c.sep, c.column) if result != c.result { t.Fatalf("got %v, want %v", result, c.result) } } } func TestUniqueMatches(t *testing.T) { var cases = []struct { about string docs []string result []*BiblioRef err error }{ { about: "missing fields are ignored, but a biblioref document is created nonetheless", docs: []string{`{}`}, result: []*BiblioRef{ &BiblioRef{}, }, err: nil, }, { about: "a single doc is passed on", docs: []string{`{ "_id": "s1_0", "source_release_ident": "s1", "target_release_ident": "t1"}`}, result: []*BiblioRef{&BiblioRef{ Key: "s1_0", SourceReleaseIdent: "s1", TargetReleaseIdent: "t1", }}, err: nil, }, { about: "we want to keep the exact match, if available", docs: []string{` {"_id": "s1_0", "source_release_ident": "s1", "target_release_ident": "t1", "match_status": "fuzzy"}`, `{"_id": "s1_1", "source_release_ident": "s1", "target_release_ident": "t1", "match_status": "exact"}`, }, result: []*BiblioRef{&BiblioRef{ Key: "s1_1", SourceReleaseIdent: "s1", TargetReleaseIdent: "t1", MatchStatus: "exact", }}, err: nil, }, { about: "if both are exact, we just take (any) one", docs: []string{` {"_id": "s1_0", "source_release_ident": "s1", "target_release_ident": "t1", "match_status": "exact", "match_reason": "a"}`, `{"_id": "s1_1", "source_release_ident": "s1", "target_release_ident": "t1", "match_status": "exact", "match_reason": "b"}`, }, result: []*BiblioRef{&BiblioRef{ Key: "s1_1", SourceReleaseIdent: "s1", TargetReleaseIdent: "t1", MatchStatus: "exact", MatchReason: "b", }}, err: nil, }, { about: "regression; probably caused by a buggy match reason comparison", docs: []string{` {"_id": "s1_0", "source_release_ident": "s1", "target_release_ident": "t1", "match_status": "exact", "match_reason": "a"}`, `{"_id": "s1_1", "source_release_ident": "s1", "target_release_ident": "t1", "match_status": "fuzzy", "match_reason": "b"}`, }, result: []*BiblioRef{&BiblioRef{ Key: "s1_0", SourceReleaseIdent: "s1", TargetReleaseIdent: "t1", MatchStatus: "exact", MatchReason: "a", }}, err: nil, }, } for _, c := range cases { result, err := uniqueMatches(c.docs, &statsAugment{}) if err != c.err { t.Fatalf("got %v, want %v (%s)", err, c.err, c.about) } if !reflect.DeepEqual(result, c.result) { t.Fatalf("got %#v, want %#v (%s)", pretty.Sprint(result), pretty.Sprint(c.result), c.about) } } } func TestMatchedRefsExtend(t *testing.T) { var cases = []struct { about string matched []*BiblioRef refs []*Ref result []*BiblioRef }{ { about: "nothing to extend", matched: []*BiblioRef{}, refs: []*Ref{}, result: []*BiblioRef{}, }, { about: "matched docs are carried over", matched: []*BiblioRef{ &BiblioRef{ RefIndex: 2, RefKey: "K2", }, }, refs: []*Ref{}, result: []*BiblioRef{ &BiblioRef{ RefIndex: 2, RefKey: "K2", }, }, }, { about: "two unrelated docs get merged", matched: []*BiblioRef{ &BiblioRef{ SourceReleaseIdent: "pud5shsflfgrth77lmlernavjm", RefIndex: 2, RefKey: "K2", }, }, refs: []*Ref{ &Ref{ ReleaseIdent: "0000", Biblio: Biblio{ Title: "Title", }, Index: 3, Key: "K3", }, }, result: []*BiblioRef{ &BiblioRef{ SourceReleaseIdent: "pud5shsflfgrth77lmlernavjm", RefIndex: 2, RefKey: "K2", }, &BiblioRef{ Key: "0000_3", SourceReleaseIdent: "0000", RefIndex: 3, RefKey: "K3", MatchStatus: StatusUnmatched.Short(), MatchReason: ReasonUnknown.Short(), SourceYear: "0", TargetCSL: &CSL{ Accessed: nil, Author: nil, CollectionTitle: "", ContainerTitle: "", ContainerTitleShort: "", DOI: "", ID: "", ISBN: "", ISSN: "", Issue: "", Issued: &CSLDate{}, JournalAbbreviation: "", Language: "", NumberOfPages: "", OriginalTitle: "", PMCID: "", PMID: "", Page: "", PageFirst: "", Publisher: "", Source: "", Status: "", Title: "Title", TitleShort: "", Type: "", URL: "", Volume: "", VolumeTitle: "", VolumeTitleShort: "", YearSuffix: "", }, }, }, }, { about: "here, the bref will match the key and index of ref, hence we keep the bref", matched: []*BiblioRef{ &BiblioRef{ SourceReleaseIdent: "pud5shsflfgrth77lmlernavjm", RefIndex: 2, RefKey: "K2", }, }, refs: []*Ref{ &Ref{ ReleaseIdent: "0000", Biblio: Biblio{ Title: "Title", }, Index: 2, Key: "K2", }, }, result: []*BiblioRef{ &BiblioRef{ SourceReleaseIdent: "pud5shsflfgrth77lmlernavjm", RefIndex: 2, RefKey: "K2", }, }, }, } for i, c := range cases { result := matchedRefsExtend(c.matched, c.refs, &statsAugment{}) for _, v := range result { v.IndexedTs = "" // we do not want to mock out time, now } if !reflect.DeepEqual(result, c.result) { t.Fatalf("[%d][%s]: got %v, want %v (%v)", i+1, c.about, result, c.result, pretty.Diff(result, c.result)) } } } func TestRemoveSelfLinks(t *testing.T) { var cases = []struct { brefs []*BiblioRef result []*BiblioRef }{ { brefs: nil, result: nil, }, { brefs: []*BiblioRef{}, result: []*BiblioRef{}, }, { brefs: []*BiblioRef{ &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "a"}, &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "b"}, }, result: []*BiblioRef{ &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "b"}, }, }, { brefs: []*BiblioRef{ &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "a"}, &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "a"}, &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "a"}, &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "b"}, }, result: []*BiblioRef{ &BiblioRef{SourceReleaseIdent: "a", TargetReleaseIdent: "b"}, }, }, } for i, c := range cases { result := removeSelfLinks(c.brefs) if !reflect.DeepEqual(result, c.result) { t.Fatalf("[%d]: got %v, want %v (%v)", i, result, c.result, pretty.Diff(result, c.result)) } } } func TestDeduplicateBrefs(t *testing.T) { var cases = []struct { brefs []*BiblioRef result []*BiblioRef }{ { brefs: nil, result: nil, }, { brefs: []*BiblioRef{}, result: []*BiblioRef{}, }, { brefs: []*BiblioRef{ &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()}, &BiblioRef{Key: "123", MatchStatus: StatusExact.Short()}, }, result: []*BiblioRef{ &BiblioRef{Key: "123", MatchStatus: StatusExact.Short()}, }, }, { brefs: []*BiblioRef{ &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()}, &BiblioRef{Key: "123", MatchStatus: StatusUnmatched.Short()}, }, result: []*BiblioRef{ &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()}, }, }, { brefs: []*BiblioRef{ &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()}, &BiblioRef{Key: "123", MatchStatus: StatusWeak.Short()}, }, result: []*BiblioRef{ &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()}, }, }, { brefs: []*BiblioRef{ &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()}, &BiblioRef{Key: "123", MatchStatus: StatusAmbiguous.Short()}, }, result: []*BiblioRef{ &BiblioRef{Key: "123", MatchStatus: StatusStrong.Short()}, }, }, } for i, c := range cases { result := deduplicateBrefs(c.brefs) if !reflect.DeepEqual(result, c.result) { t.Fatalf("[%d]: got %v, want %v (%v)", i, result, c.result, pretty.Diff(result, c.result)) } } } func TestZippyExact(t *testing.T) { var cases = []struct { a, b, out string err error }{ { a: "testdata/zippy/cE00a.json", b: "testdata/zippy/cE00b.json", out: "testdata/zippy/cE00r.json", err: nil, }, { a: "testdata/zippy/cE01a.json", b: "testdata/zippy/cE01b.json", out: "testdata/zippy/cE01r.json", err: nil, }, { a: "testdata/zippy/cE02a.json", b: "testdata/zippy/cE02b.json", out: "testdata/zippy/cE02r.json", err: nil, }, } T = clock.NewMock() for i, c := range cases { a, b, err := xio.OpenTwo(c.a, c.b) if err != nil { t.Errorf("failed to open test files: %v, %v", c.a, c.b) } var ( buf bytes.Buffer matchResult = MatchResult{Status: StatusExact, Reason: ReasonDOI} ) err = ZippyExact(a, b, matchResult, &buf) if err != c.err { t.Errorf("[%d] got %v, want %v", i, err, c.err) } ok, err := testutil.BufferFileEquals(&buf, c.out) if err != nil { t.Errorf("failed to open test file: %v", c.out) } if !ok { filename, err := testutil.BufferToTemp(&buf) if err != nil { t.Logf("could not write temp file") } t.Errorf("[%d] output mismatch (buffer length=%d, content=%v), want %v", i, buf.Len(), filename, c.out) } } } func TestZippyBrefAugment(t *testing.T) { var cases = []struct { a, b, out string err error }{ { a: "testdata/zippy/cB00a.json", b: "testdata/zippy/cB00b.json", out: "testdata/zippy/cB00r.json", err: nil, }, { a: "testdata/zippy/cB01a.json", b: "testdata/zippy/cB01b.json", out: "testdata/zippy/cB01r.json", err: nil, }, } T = clock.NewMock() for i, c := range cases { a, b, err := xio.OpenTwo(c.a, c.b) if err != nil { t.Errorf("failed to open test files: %v, %v", c.a, c.b) } var buf bytes.Buffer err = ZippyBrefAugment(a, b, &buf) if err != c.err { t.Errorf("[%d] got %v, want %v", i, err, c.err) } ok, err := testutil.BufferFileEquals(&buf, c.out) if err != nil { t.Errorf("failed to open test file: %v", c.out) } if !ok { filename, err := testutil.BufferToTemp(&buf) if err != nil { t.Logf("could not write temp file: %v", err) } t.Errorf("[%d] output mismatch (buffer length=%d, content=%v), want %v", i, buf.Len(), filename, c.out) } } } func TestCutBatch(t *testing.T) { var cases = []struct { lines []string column int result []string }{ { []string{}, 1, nil, }, { []string{}, 9, nil, }, { []string{"1\t2\n", "3\t4\n"}, 2, []string{"2", "4"}, }, } for _, c := range cases { result := CutBatch(c.lines, c.column) if !reflect.DeepEqual(result, c.result) { t.Fatalf("got %v (%d), want %v (%d)", result, len(result), c.result, len(c.result)) } } } func TestCutSep(t *testing.T) { var cases = []struct { line string sep string column int result string }{ {"", "\t", 1, ""}, {"", "\t", 2, ""}, {"a\tb", "\t", 1, "a"}, {"a\tb", "\t", 2, "b"}, {"a\tb", "\t", 3, ""}, {"a\t\tb", "\t", 1, "a"}, {"a\t\tb", "\t", 2, ""}, {"a\t\tb", "\t", 3, "b"}, {"\tb", "\t", 1, ""}, } for _, c := range cases { result := CutSep(c.line, c.sep, c.column) if !reflect.DeepEqual(result, c.result) { t.Fatalf("got %v, want %v", result, c.result) } } } func TestCleanOpenLibraryIdentifier(t *testing.T) { var cases = []struct { s string result string }{ {"", ""}, {"/books/OL31189321M", "OL31189321M"}, {"/b/OL31189321M", "OL31189321M"}, {"OL31189321M", "OL31189321M"}, {"OL123", "OL123"}, {"123", ""}, } for _, c := range cases { result := cleanOpenLibraryIdentifier(c.s) if result != c.result { t.Fatalf("got %v, want %v", result, c.result) } } }