aboutsummaryrefslogtreecommitdiffstats
path: root/skate
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-07-27 10:41:40 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-07-27 10:41:40 +0200
commit4e98c1d73cb318791e3085e4e5fccace5a43b137 (patch)
tree6c80992610e24856a8d9c93e3fd1166490e506e9 /skate
parentf60974d1e7a3ceceaa677e2c2faa40bfdbd78bb3 (diff)
downloadrefcat-4e98c1d73cb318791e3085e4e5fccace5a43b137.tar.gz
refcat-4e98c1d73cb318791e3085e4e5fccace5a43b137.zip
reuse timestamps
while time.Now is not really slow, thanks to vDSO (cf. https://git.io/J4SOH), it will be even faster to just call it once at the start of the processing; also: https://twitter.com/davidcrawshaw/status/1414243408936280073 > Turns out http://time.Now was taking its usual amount of time on linux, ~50 nanoseconds [...]
Diffstat (limited to 'skate')
-rw-r--r--skate/reduce.go20
1 files changed, 14 insertions, 6 deletions
diff --git a/skate/reduce.go b/skate/reduce.go
index e03bea1..3978ee9 100644
--- a/skate/reduce.go
+++ b/skate/reduce.go
@@ -54,6 +54,7 @@ func groupLogf(g *zipkey.Group, s string, vs ...interface{}) {
// match result, e.g. for doi matches.
func ZippyExact(releases, refs io.Reader, matchResult MatchResult, w io.Writer) error {
var (
+ ts = timeNow().UTC().Format(time.RFC3339)
enc = json.NewEncoder(xio.NewSyncWriter(w))
keyer = makeKeyFunc("\t", 1)
grouper = func(g *zipkey.Group) error {
@@ -77,7 +78,7 @@ func ZippyExact(releases, refs io.Reader, matchResult MatchResult, w io.Writer)
}
bref.Reset()
bref.Key = fmt.Sprintf("%s_%d", ref.ReleaseIdent, ref.Index+1)
- bref.IndexedTs = timeNow().UTC().Format(time.RFC3339)
+ bref.IndexedTs = ts
bref.SourceReleaseIdent = ref.ReleaseIdent
bref.SourceWorkIdent = ref.WorkIdent
bref.SourceReleaseStage = ref.ReleaseStage
@@ -369,6 +370,7 @@ func ZippyVerifyRefsOpenLibrary(olr, refs io.Reader, w io.Writer) error {
// write a bref document for each match.
func ZippyWayback(refs, cdx io.Reader, w io.Writer) error {
var (
+ ts = timeNow().UTC().Format(time.RFC3339)
enc = json.NewEncoder(xio.NewSyncWriter(w))
keyer = makeKeyFunc("\t", 1)
grouper = func(g *zipkey.Group) error {
@@ -390,7 +392,7 @@ func ZippyWayback(refs, cdx io.Reader, w io.Writer) error {
_, _ = h.Write([]byte(cdx.Line))
hashedURL := fmt.Sprintf("%x", h.Sum(nil))
bref.Key = fmt.Sprintf("web_%s_%s", ref.ReleaseIdent, hashedURL)
- bref.IndexedTs = timeNow().UTC().Format(time.RFC3339)
+ bref.IndexedTs = ts
bref.SourceReleaseIdent = ref.ReleaseIdent
bref.SourceWorkIdent = ref.WorkIdent
bref.SourceReleaseStage = ref.ReleaseStage
@@ -550,7 +552,10 @@ func deduplicateBrefs(brefs []*BiblioRef) []*BiblioRef {
// did not result in a match (determined by e.g. ref key and index). XXX: We
// may have duplicate refs as well - how to distinguish them?
func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) []*BiblioRef {
- seen := set.New() // store "key + index" of matched items
+ var (
+ seen = set.New() // store "key + index" of matched items
+ ts = timeNow().UTC().Format(time.RFC3339)
+ )
for _, m := range matched {
s := m.RefKey + fmt.Sprintf("%d", m.RefIndex)
seen.Add(s)
@@ -565,7 +570,7 @@ func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) [
}
// Assemble bref from unmatched ref.
var bref BiblioRef
- bref.IndexedTs = timeNow().UTC().Format(time.RFC3339)
+ bref.IndexedTs = ts
bref.Key = fmt.Sprintf("%s_%d", r.ReleaseIdent, r.Index)
bref.RefIndex = r.Index
bref.RefKey = r.Key
@@ -625,7 +630,10 @@ func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) [
// "skate-bref-id" post-processing here as well (but there is surely a better
// place for that).
func uniqueMatches(docs []string, stats *statsAugment) (result []*BiblioRef, err error) {
- var brefs []*BiblioRef
+ var (
+ brefs []*BiblioRef
+ ts = timeNow().UTC().Format(time.RFC3339)
+ )
for _, doc := range docs {
var bref BiblioRef
if err := json.Unmarshal([]byte(doc), &bref); err != nil {
@@ -634,7 +642,7 @@ func uniqueMatches(docs []string, stats *statsAugment) (result []*BiblioRef, err
// On-the-fly add elasticsearch "_id" and indexed timestamp, if not already set.
if bref.Key == "" && bref.SourceReleaseIdent != "" {
bref.Key = fmt.Sprintf("%s_%d", bref.SourceReleaseIdent, bref.RefIndex)
- bref.IndexedTs = timeNow().UTC().Format(time.RFC3339)
+ bref.IndexedTs = ts
}
brefs = append(brefs, &bref)
}