diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-07-13 13:24:58 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-07-13 13:24:58 +0200 |
commit | 3db70f6cd71b0bd5e5f4732b3318dcb4c26ca881 (patch) | |
tree | 06c762f38e17ffba64b3d9e508d6b0cba005dcad | |
parent | 3b919c9ce8609611acc2b7a0f9a6a08629c10cdb (diff) | |
download | refcat-3db70f6cd71b0bd5e5f4732b3318dcb4c26ca881.tar.gz refcat-3db70f6cd71b0bd5e5f4732b3318dcb4c26ca881.zip |
update docs
-rw-r--r-- | skate/reduce.go | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/skate/reduce.go b/skate/reduce.go index 3bb2466..253b378 100644 --- a/skate/reduce.go +++ b/skate/reduce.go @@ -10,6 +10,8 @@ // the readers (and string groups): release, ref, ref-as-release, open library, // wikipedia, ... // +// We call the biblioref schema sometimes just bref. +// // TODO: // * [ ] pass release stage through all match types // * [ ] switch to faster logging, e.g. zerolog, https://github.com/rs/zerolog#benchmarks @@ -375,11 +377,14 @@ func ZippyVerifyRefsOpenLibrary(olr, refs io.Reader, w io.Writer) error { // refs, which have not been matched. It also gets rid of duplicate matches. // Note: This operates on two streams: raw refs with about 2.5B (07/2021) and // matches, which will be about 1B; in essence we have to iterate through about -// 3.5B records; small tweak here may be worthwhile. +// 3.5B records; small tweaks here may be worthwhile. // // We can identify, which docs have been matched by checking the source ident, // ref index and key. // +// TODO: Iff no match, and no unstructured string, pass-through structured +// metadata fields as CSL JSON. +// // TODO: This needs to be completed and made fast. func ZippyBrefAugment(bref, raw io.Reader, w io.Writer) error { var ( @@ -502,6 +507,7 @@ func matchedRefsExtend(matched []*BiblioRef, refs []*Ref, stats *statsAugment) [ stats.skipMatchedRef, len(matched), r.ReleaseIdent, r.Biblio.Title, r.Key, r.Index) continue } + // Assemble bref from unmatched ref. var bref BiblioRef bref.IndexedTs = timeNow().UTC().Format(time.RFC3339) bref.Key = fmt.Sprintf("%s_%d", r.ReleaseIdent, r.Index) |