diff options
-rw-r--r-- | skate/zippy.go | 11 | ||||
-rw-r--r-- | skate/zippy_test.go | 19 |
2 files changed, 25 insertions, 5 deletions
diff --git a/skate/zippy.go b/skate/zippy.go index 54038c0..7a6902c 100644 --- a/skate/zippy.go +++ b/skate/zippy.go @@ -16,6 +16,7 @@ import ( "log" "sort" "strings" + "time" "git.archive.org/martin/cgraph/skate/set" "git.archive.org/martin/cgraph/skate/zipkey" @@ -378,6 +379,7 @@ func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) [ continue } var bref BiblioRef + bref.IndexedTs = time.Now().UTC().Format(time.RFC3339) bref.Key = fmt.Sprintf("%s_%d", r.ReleaseIdent, r.Index) bref.RefIndex = r.Index bref.RefKey = r.Key @@ -397,7 +399,9 @@ func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) [ // uniqueMatches takes a list of bref docs (unserialized) and will return a // list of deserialized bref docs, containing unique matches only (e.g. filter -// out things duplicate matches, e.g. from exact and fuzzy). +// out things duplicate matches, e.g. from exact and fuzzy). We are including +// "skate-bref-id" post-processing here as well (but there is surely a better +// place for that). func uniqueMatches(docs []string, stats *statsAugment) (result []*BiblioRef, err error) { var brefs []*BiblioRef for _, doc := range docs { @@ -405,6 +409,11 @@ func uniqueMatches(docs []string, stats *statsAugment) (result []*BiblioRef, err if err := json.Unmarshal([]byte(doc), &bref); err != nil { return nil, err } + // On-the-fly add elasticsearch "_id" and indexed timestamp, if not already set. + if bref.Key == "" && bref.SourceReleaseIdent != "" { + bref.Key = fmt.Sprintf("%s_%d", bref.SourceReleaseIdent, bref.RefIndex) + bref.IndexedTs = time.Now().UTC().Format(time.RFC3339) + } brefs = append(brefs, &bref) } // Make sure exact matches come first. diff --git a/skate/zippy_test.go b/skate/zippy_test.go index d0ebbd4..3e64dd0 100644 --- a/skate/zippy_test.go +++ b/skate/zippy_test.go @@ -76,9 +76,11 @@ func TestUniqueMatches(t *testing.T) { { about: "a single doc is passed on", docs: []string{`{ + "_id": "s1_0", "source_release_ident": "s1", "target_release_ident": "t1"}`}, result: []*BiblioRef{&BiblioRef{ + Key: "s1_0", SourceReleaseIdent: "s1", TargetReleaseIdent: "t1", }}, @@ -87,14 +89,17 @@ func TestUniqueMatches(t *testing.T) { { about: "we want to keep the exact match, if available", docs: []string{` - {"source_release_ident": "s1", + {"_id": "s1_0", + "source_release_ident": "s1", "target_release_ident": "t1", "match_status": "fuzzy"}`, - `{"source_release_ident": "s1", + `{"_id": "s1_1", + "source_release_ident": "s1", "target_release_ident": "t1", "match_status": "exact"}`, }, result: []*BiblioRef{&BiblioRef{ + Key: "s1_1", SourceReleaseIdent: "s1", TargetReleaseIdent: "t1", MatchStatus: "exact", @@ -104,16 +109,19 @@ func TestUniqueMatches(t *testing.T) { { about: "if both are exact, we just take (any) one", docs: []string{` - {"source_release_ident": "s1", + {"_id": "s1_0", + "source_release_ident": "s1", "target_release_ident": "t1", "match_status": "exact", "match_reason": "a"}`, - `{"source_release_ident": "s1", + `{"_id": "s1_1", + "source_release_ident": "s1", "target_release_ident": "t1", "match_status": "exact", "match_reason": "b"}`, }, result: []*BiblioRef{&BiblioRef{ + Key: "s1_1", SourceReleaseIdent: "s1", TargetReleaseIdent: "t1", MatchStatus: "exact", @@ -237,6 +245,9 @@ func TestMatchedRefsExtend(t *testing.T) { } for i, c := range cases { result := matchedRefsExtend(c.matched, c.refs, &statsAugment{}) + for _, v := range result { + v.IndexedTs = "" // we do not want to mock out time, now + } if !reflect.DeepEqual(result, c.result) { t.Fatalf("[%d]: got %v, want %v (%v)", i+1, result, c.result, pretty.Diff(result, c.result)) |