diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-07-27 10:41:40 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-07-27 10:41:40 +0200 |
commit | 4e98c1d73cb318791e3085e4e5fccace5a43b137 (patch) | |
tree | 6c80992610e24856a8d9c93e3fd1166490e506e9 | |
parent | f60974d1e7a3ceceaa677e2c2faa40bfdbd78bb3 (diff) | |
download | refcat-4e98c1d73cb318791e3085e4e5fccace5a43b137.tar.gz refcat-4e98c1d73cb318791e3085e4e5fccace5a43b137.zip |
reuse timestamps
while time.Now is not really slow, thanks to vDSO (cf.
https://git.io/J4SOH), it will be even faster to just call it once at
the start of the processing; also:
https://twitter.com/davidcrawshaw/status/1414243408936280073
> Turns out http://time.Now was taking its usual amount of time on
linux, ~50 nanoseconds [...]
-rw-r--r-- | skate/reduce.go | 20 |
1 files changed, 14 insertions, 6 deletions
diff --git a/skate/reduce.go b/skate/reduce.go index e03bea1..3978ee9 100644 --- a/skate/reduce.go +++ b/skate/reduce.go @@ -54,6 +54,7 @@ func groupLogf(g *zipkey.Group, s string, vs ...interface{}) { // match result, e.g. for doi matches. func ZippyExact(releases, refs io.Reader, matchResult MatchResult, w io.Writer) error { var ( + ts = timeNow().UTC().Format(time.RFC3339) enc = json.NewEncoder(xio.NewSyncWriter(w)) keyer = makeKeyFunc("\t", 1) grouper = func(g *zipkey.Group) error { @@ -77,7 +78,7 @@ func ZippyExact(releases, refs io.Reader, matchResult MatchResult, w io.Writer) } bref.Reset() bref.Key = fmt.Sprintf("%s_%d", ref.ReleaseIdent, ref.Index+1) - bref.IndexedTs = timeNow().UTC().Format(time.RFC3339) + bref.IndexedTs = ts bref.SourceReleaseIdent = ref.ReleaseIdent bref.SourceWorkIdent = ref.WorkIdent bref.SourceReleaseStage = ref.ReleaseStage @@ -369,6 +370,7 @@ func ZippyVerifyRefsOpenLibrary(olr, refs io.Reader, w io.Writer) error { // write a bref document for each match. func ZippyWayback(refs, cdx io.Reader, w io.Writer) error { var ( + ts = timeNow().UTC().Format(time.RFC3339) enc = json.NewEncoder(xio.NewSyncWriter(w)) keyer = makeKeyFunc("\t", 1) grouper = func(g *zipkey.Group) error { @@ -390,7 +392,7 @@ func ZippyWayback(refs, cdx io.Reader, w io.Writer) error { _, _ = h.Write([]byte(cdx.Line)) hashedURL := fmt.Sprintf("%x", h.Sum(nil)) bref.Key = fmt.Sprintf("web_%s_%s", ref.ReleaseIdent, hashedURL) - bref.IndexedTs = timeNow().UTC().Format(time.RFC3339) + bref.IndexedTs = ts bref.SourceReleaseIdent = ref.ReleaseIdent bref.SourceWorkIdent = ref.WorkIdent bref.SourceReleaseStage = ref.ReleaseStage @@ -550,7 +552,10 @@ func deduplicateBrefs(brefs []*BiblioRef) []*BiblioRef { // did not result in a match (determined by e.g. ref key and index). XXX: We // may have duplicate refs as well - how to distinguish them? func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) []*BiblioRef { - seen := set.New() // store "key + index" of matched items + var ( + seen = set.New() // store "key + index" of matched items + ts = timeNow().UTC().Format(time.RFC3339) + ) for _, m := range matched { s := m.RefKey + fmt.Sprintf("%d", m.RefIndex) seen.Add(s) @@ -565,7 +570,7 @@ func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) [ } // Assemble bref from unmatched ref. var bref BiblioRef - bref.IndexedTs = timeNow().UTC().Format(time.RFC3339) + bref.IndexedTs = ts bref.Key = fmt.Sprintf("%s_%d", r.ReleaseIdent, r.Index) bref.RefIndex = r.Index bref.RefKey = r.Key @@ -625,7 +630,10 @@ func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) [ // "skate-bref-id" post-processing here as well (but there is surely a better // place for that). func uniqueMatches(docs []string, stats *statsAugment) (result []*BiblioRef, err error) { - var brefs []*BiblioRef + var ( + brefs []*BiblioRef + ts = timeNow().UTC().Format(time.RFC3339) + ) for _, doc := range docs { var bref BiblioRef if err := json.Unmarshal([]byte(doc), &bref); err != nil { @@ -634,7 +642,7 @@ func uniqueMatches(docs []string, stats *statsAugment) (result []*BiblioRef, err // On-the-fly add elasticsearch "_id" and indexed timestamp, if not already set. if bref.Key == "" && bref.SourceReleaseIdent != "" { bref.Key = fmt.Sprintf("%s_%d", bref.SourceReleaseIdent, bref.RefIndex) - bref.IndexedTs = timeNow().UTC().Format(time.RFC3339) + bref.IndexedTs = ts } brefs = append(brefs, &bref) } |