diff options
-rw-r--r-- | skate/reduce.go | 20 |
1 files changed, 14 insertions, 6 deletions
diff --git a/skate/reduce.go b/skate/reduce.go index e03bea1..3978ee9 100644 --- a/skate/reduce.go +++ b/skate/reduce.go @@ -54,6 +54,7 @@ func groupLogf(g *zipkey.Group, s string, vs ...interface{}) { // match result, e.g. for doi matches. func ZippyExact(releases, refs io.Reader, matchResult MatchResult, w io.Writer) error { var ( + ts = timeNow().UTC().Format(time.RFC3339) enc = json.NewEncoder(xio.NewSyncWriter(w)) keyer = makeKeyFunc("\t", 1) grouper = func(g *zipkey.Group) error { @@ -77,7 +78,7 @@ func ZippyExact(releases, refs io.Reader, matchResult MatchResult, w io.Writer) } bref.Reset() bref.Key = fmt.Sprintf("%s_%d", ref.ReleaseIdent, ref.Index+1) - bref.IndexedTs = timeNow().UTC().Format(time.RFC3339) + bref.IndexedTs = ts bref.SourceReleaseIdent = ref.ReleaseIdent bref.SourceWorkIdent = ref.WorkIdent bref.SourceReleaseStage = ref.ReleaseStage @@ -369,6 +370,7 @@ func ZippyVerifyRefsOpenLibrary(olr, refs io.Reader, w io.Writer) error { // write a bref document for each match. func ZippyWayback(refs, cdx io.Reader, w io.Writer) error { var ( + ts = timeNow().UTC().Format(time.RFC3339) enc = json.NewEncoder(xio.NewSyncWriter(w)) keyer = makeKeyFunc("\t", 1) grouper = func(g *zipkey.Group) error { @@ -390,7 +392,7 @@ func ZippyWayback(refs, cdx io.Reader, w io.Writer) error { _, _ = h.Write([]byte(cdx.Line)) hashedURL := fmt.Sprintf("%x", h.Sum(nil)) bref.Key = fmt.Sprintf("web_%s_%s", ref.ReleaseIdent, hashedURL) - bref.IndexedTs = timeNow().UTC().Format(time.RFC3339) + bref.IndexedTs = ts bref.SourceReleaseIdent = ref.ReleaseIdent bref.SourceWorkIdent = ref.WorkIdent bref.SourceReleaseStage = ref.ReleaseStage @@ -550,7 +552,10 @@ func deduplicateBrefs(brefs []*BiblioRef) []*BiblioRef { // did not result in a match (determined by e.g. ref key and index). XXX: We // may have duplicate refs as well - how to distinguish them? func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) []*BiblioRef { - seen := set.New() // store "key + index" of matched items + var ( + seen = set.New() // store "key + index" of matched items + ts = timeNow().UTC().Format(time.RFC3339) + ) for _, m := range matched { s := m.RefKey + fmt.Sprintf("%d", m.RefIndex) seen.Add(s) @@ -565,7 +570,7 @@ func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) [ } // Assemble bref from unmatched ref. var bref BiblioRef - bref.IndexedTs = timeNow().UTC().Format(time.RFC3339) + bref.IndexedTs = ts bref.Key = fmt.Sprintf("%s_%d", r.ReleaseIdent, r.Index) bref.RefIndex = r.Index bref.RefKey = r.Key @@ -625,7 +630,10 @@ func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) [ // "skate-bref-id" post-processing here as well (but there is surely a better // place for that). func uniqueMatches(docs []string, stats *statsAugment) (result []*BiblioRef, err error) { - var brefs []*BiblioRef + var ( + brefs []*BiblioRef + ts = timeNow().UTC().Format(time.RFC3339) + ) for _, doc := range docs { var bref BiblioRef if err := json.Unmarshal([]byte(doc), &bref); err != nil { @@ -634,7 +642,7 @@ func uniqueMatches(docs []string, stats *statsAugment) (result []*BiblioRef, err // On-the-fly add elasticsearch "_id" and indexed timestamp, if not already set. if bref.Key == "" && bref.SourceReleaseIdent != "" { bref.Key = fmt.Sprintf("%s_%d", bref.SourceReleaseIdent, bref.RefIndex) - bref.IndexedTs = timeNow().UTC().Format(time.RFC3339) + bref.IndexedTs = ts } brefs = append(brefs, &bref) } |