diff options
Diffstat (limited to 'skate/reduce.go')
-rw-r--r-- | skate/reduce.go | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/skate/reduce.go b/skate/reduce.go index 3bb2466..253b378 100644 --- a/skate/reduce.go +++ b/skate/reduce.go @@ -10,6 +10,8 @@ // the readers (and string groups): release, ref, ref-as-release, open library, // wikipedia, ... // +// We call the biblioref schema sometimes just bref. +// // TODO: // * [ ] pass release stage through all match types // * [ ] switch to faster logging, e.g. zerolog, https://github.com/rs/zerolog#benchmarks @@ -375,11 +377,14 @@ func ZippyVerifyRefsOpenLibrary(olr, refs io.Reader, w io.Writer) error { // refs, which have not been matched. It also gets rid of duplicate matches. // Note: This operates on two streams: raw refs with about 2.5B (07/2021) and // matches, which will be about 1B; in essence we have to iterate through about -// 3.5B records; small tweak here may be worthwhile. +// 3.5B records; small tweaks here may be worthwhile. // // We can identify, which docs have been matched by checking the source ident, // ref index and key. // +// TODO: Iff no match, and no unstructured string, pass-through structured +// metadata fields as CSL JSON. +// // TODO: This needs to be completed and made fast. func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error { var ( @@ -502,6 +507,7 @@ func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) [ stats.skipMatchedRef, len(matched), r.ReleaseIdent, r.Biblio.Title, r.Key, r.Index) continue } + // Assemble bref from unmatched ref. var bref BiblioRef bref.IndexedTs = timeNow().UTC().Format(time.RFC3339) bref.Key = fmt.Sprintf("%s_%d", r.ReleaseIdent, r.Index) |