aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--skate/zippy.go11
-rw-r--r--skate/zippy_test.go19
2 files changed, 25 insertions, 5 deletions
diff --git a/skate/zippy.go b/skate/zippy.go
index 54038c0..7a6902c 100644
--- a/skate/zippy.go
+++ b/skate/zippy.go
@@ -16,6 +16,7 @@ import (
"log"
"sort"
"strings"
+ "time"
"git.archive.org/martin/cgraph/skate/set"
"git.archive.org/martin/cgraph/skate/zipkey"
@@ -378,6 +379,7 @@ func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) [
continue
}
var bref BiblioRef
+ bref.IndexedTs = time.Now().UTC().Format(time.RFC3339)
bref.Key = fmt.Sprintf("%s_%d", r.ReleaseIdent, r.Index)
bref.RefIndex = r.Index
bref.RefKey = r.Key
@@ -397,7 +399,9 @@ func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) [
// uniqueMatches takes a list of bref docs (unserialized) and will return a
// list of deserialized bref docs, containing unique matches only (e.g. filter
-// out things duplicate matches, e.g. from exact and fuzzy).
+// out things duplicate matches, e.g. from exact and fuzzy). We are including
+// "skate-bref-id" post-processing here as well (but there is surely a better
+// place for that).
func uniqueMatches(docs []string, stats *statsAugment) (result []*BiblioRef, err error) {
var brefs []*BiblioRef
for _, doc := range docs {
@@ -405,6 +409,11 @@ func uniqueMatches(docs []string, stats *statsAugment) (result []*BiblioRef, err
if err := json.Unmarshal([]byte(doc), &bref); err != nil {
return nil, err
}
+ // On-the-fly add elasticsearch "_id" and indexed timestamp, if not already set.
+ if bref.Key == "" && bref.SourceReleaseIdent != "" {
+ bref.Key = fmt.Sprintf("%s_%d", bref.SourceReleaseIdent, bref.RefIndex)
+ bref.IndexedTs = time.Now().UTC().Format(time.RFC3339)
+ }
brefs = append(brefs, &bref)
}
// Make sure exact matches come first.
diff --git a/skate/zippy_test.go b/skate/zippy_test.go
index d0ebbd4..3e64dd0 100644
--- a/skate/zippy_test.go
+++ b/skate/zippy_test.go
@@ -76,9 +76,11 @@ func TestUniqueMatches(t *testing.T) {
{
about: "a single doc is passed on",
docs: []string{`{
+ "_id": "s1_0",
"source_release_ident": "s1",
"target_release_ident": "t1"}`},
result: []*BiblioRef{&BiblioRef{
+ Key: "s1_0",
SourceReleaseIdent: "s1",
TargetReleaseIdent: "t1",
}},
@@ -87,14 +89,17 @@ func TestUniqueMatches(t *testing.T) {
{
about: "we want to keep the exact match, if available",
docs: []string{`
- {"source_release_ident": "s1",
+ {"_id": "s1_0",
+ "source_release_ident": "s1",
"target_release_ident": "t1",
"match_status": "fuzzy"}`,
- `{"source_release_ident": "s1",
+ `{"_id": "s1_1",
+ "source_release_ident": "s1",
"target_release_ident": "t1",
"match_status": "exact"}`,
},
result: []*BiblioRef{&BiblioRef{
+ Key: "s1_1",
SourceReleaseIdent: "s1",
TargetReleaseIdent: "t1",
MatchStatus: "exact",
@@ -104,16 +109,19 @@ func TestUniqueMatches(t *testing.T) {
{
about: "if both are exact, we just take (any) one",
docs: []string{`
- {"source_release_ident": "s1",
+ {"_id": "s1_0",
+ "source_release_ident": "s1",
"target_release_ident": "t1",
"match_status": "exact",
"match_reason": "a"}`,
- `{"source_release_ident": "s1",
+ `{"_id": "s1_1",
+ "source_release_ident": "s1",
"target_release_ident": "t1",
"match_status": "exact",
"match_reason": "b"}`,
},
result: []*BiblioRef{&BiblioRef{
+ Key: "s1_1",
SourceReleaseIdent: "s1",
TargetReleaseIdent: "t1",
MatchStatus: "exact",
@@ -237,6 +245,9 @@ func TestMatchedRefsExtend(t *testing.T) {
}
for i, c := range cases {
result := matchedRefsExtend(c.matched, c.refs, &statsAugment{})
+ for _, v := range result {
+ v.IndexedTs = "" // we do not want to mock out time, now
+ }
if !reflect.DeepEqual(result, c.result) {
t.Fatalf("[%d]: got %v, want %v (%v)",
i+1, result, c.result, pretty.Diff(result, c.result))