aboutsummaryrefslogtreecommitdiffstats
path: root/skate
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-07-10 01:46:59 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-07-10 01:46:59 +0200
commit738c7e99035b0548f6a8ea3082f03eeb1e36dc98 (patch)
treeb2c79d08c0e6a6c3b4152d64566970f0fc09fc69 /skate
parent9095e6089b9f679c4d84be0613224b4edd02f0f4 (diff)
downloadrefcat-738c7e99035b0548f6a8ea3082f03eeb1e36dc98.tar.gz
refcat-738c7e99035b0548f6a8ea3082f03eeb1e36dc98.zip
reduce: open library id tweaks
Diffstat (limited to 'skate')
-rw-r--r--skate/reduce.go32
1 files changed, 27 insertions, 5 deletions
diff --git a/skate/reduce.go b/skate/reduce.go
index 823255c..2a8ac1c 100644
--- a/skate/reduce.go
+++ b/skate/reduce.go
@@ -13,7 +13,7 @@
// TODO:
// * [ ] pass release stage through all match types
// * [ ] switch to faster logging, e.g. zerolog, https://github.com/rs/zerolog#benchmarks
-// * [ ] batch, parallelize
+// * [x] batch, parallelize
// * [ ] unify flags to "-a", "-b"
package skate
@@ -225,6 +225,7 @@ func ZippyVerifyRefs(releases, refs io.Reader, w io.Writer) error {
if result.Reason == ReasonDOI {
continue
}
+ // XXX: what should be the provenance?
br := generateBiblioRef(re, pivot, result, "fuzzy")
if err := enc.Encode(br); err != nil {
return err
@@ -242,7 +243,8 @@ func ZippyVerifyRefs(releases, refs io.Reader, w io.Writer) error {
}
// ZippyVerifyRefsOpenLibraryTable takes OL editions (as release) and refs (as
-// release) and emits a match table for manual inspection.
+// release) and emits a match table for manual inspection. This is mainly for
+// debugging.
func ZippyVerifyRefsOpenLibraryTable(olr, refs io.Reader, w io.Writer) error {
var (
keyer = makeKeyFunc("\t", 1)
@@ -287,8 +289,24 @@ func ZippyVerifyRefsOpenLibraryTable(olr, refs io.Reader, w io.Writer) error {
// release) and writes biblioref.
func ZippyVerifyRefsOpenLibrary(olr, refs io.Reader, w io.Writer) error {
var (
- enc = json.NewEncoder(xio.NewSingleWriter(w))
- keyer = makeKeyFunc("\t", 1)
+ enc = json.NewEncoder(xio.NewSingleWriter(w))
+ keyer = makeKeyFunc("\t", 1)
+ cleanIdentifier = func(s string) string {
+ // Turn ids like /books/OL31189321M into OL31189321M
+ s = strings.TrimSpace(s)
+ if len(s) == 0 {
+ return ""
+ }
+ var (
+ parts = strings.Split(s, "/")
+ last = parts[len(parts)-1]
+ )
+ if strings.HasPrefix(last, "OL") {
+ return last
+ }
+ log.Printf("warning: unexpected OL id: %s", s)
+ return ""
+ }
grouper = func(g *zipkey.Group) error {
var (
ref, pivot *Release // ref (reference), pivot (open library)
@@ -311,6 +329,10 @@ func ZippyVerifyRefsOpenLibrary(olr, refs io.Reader, w io.Writer) error {
result := Verify(pivot, ref)
switch result.Status {
case StatusExact, StatusStrong:
+ openLibraryWorkID := cleanIdentifier(pivot.WorkID)
+ if openLibraryWorkID == "" {
+ continue
+ }
var bref BiblioRef
bref.SourceReleaseIdent = ref.Ident
bref.SourceWorkIdent = ref.WorkID
@@ -318,7 +340,7 @@ func ZippyVerifyRefsOpenLibrary(olr, refs io.Reader, w io.Writer) error {
bref.SourceYear = fmt.Sprintf("%d", ref.ReleaseYear())
bref.RefIndex = ref.Extra.Skate.Ref.Index + 1 // we want 1-index (also helps with omitempty)
bref.RefKey = ref.Extra.Skate.Ref.Key
- bref.TargetOpenLibraryWork = pivot.WorkID
+ bref.TargetOpenLibraryWork = openLibraryWorkID
bref.MatchProvenance = ref.Extra.Skate.Ref.Source
bref.MatchStatus = result.Status.Short()
bref.MatchReason = result.Reason.Short()