aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--skate/reduce.go8
1 files changed, 7 insertions, 1 deletions
diff --git a/skate/reduce.go b/skate/reduce.go
index 3bb2466..253b378 100644
--- a/skate/reduce.go
+++ b/skate/reduce.go
@@ -10,6 +10,8 @@
// the readers (and string groups): release, ref, ref-as-release, open library,
// wikipedia, ...
//
+// We call the biblioref schema sometimes just bref.
+//
// TODO:
// * [ ] pass release stage through all match types
// * [ ] switch to faster logging, e.g. zerolog, https://github.com/rs/zerolog#benchmarks
@@ -375,11 +377,14 @@ func ZippyVerifyRefsOpenLibrary(olr, refs io.Reader, w io.Writer) error {
// refs, which have not been matched. It also gets rid of duplicate matches.
// Note: This operates on two streams: raw refs with about 2.5B (07/2021) and
// matches, which will be about 1B; in essence we have to iterate through about
-// 3.5B records; small tweak here may be worthwhile.
+// 3.5B records; small tweaks here may be worthwhile.
//
// We can identify, which docs have been matched by checking the source ident,
// ref index and key.
//
+// TODO: Iff no match, and no unstructured string, pass-through structured
+// metadata fields as CSL JSON.
+//
// TODO: This needs to be completed and made fast.
func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error {
var (
@@ -502,6 +507,7 @@ func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) [
stats.skipMatchedRef, len(matched), r.ReleaseIdent, r.Biblio.Title, r.Key, r.Index)
continue
}
+ // Assemble bref from unmatched ref.
var bref BiblioRef
bref.IndexedTs = timeNow().UTC().Format(time.RFC3339)
bref.Key = fmt.Sprintf("%s_%d", r.ReleaseIdent, r.Index)