From 98476056aa3666a6ed2499aee1da552c041c8564 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Fri, 4 Jun 2021 17:27:15 +0200 Subject: carry ref information over into release.extra.skate.ref we need index, source, etc. in final assembly --- skate/cmd/skate-reduce/main.go | 10 +++++++-- skate/schema.go | 8 ++++++- skate/schema_test.go | 1 + skate/zippy.go | 51 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 67 insertions(+), 3 deletions(-) diff --git a/skate/cmd/skate-reduce/main.go b/skate/cmd/skate-reduce/main.go index 605c9b6..c1d8fba 100644 --- a/skate/cmd/skate-reduce/main.go +++ b/skate/cmd/skate-reduce/main.go @@ -36,7 +36,7 @@ // | $ skate-reduce -m wiki -L a.ndj -W b.ndj // | // | -// * oledt | zippy mode for releases and OL inputs, dumps table. +// * oledt | zippy mode for releases and OL inputs, dumps table for debugging. // | // | $ skate-reduce -m oled -F a.ndj -O b.ndj // | @@ -151,7 +151,13 @@ func main() { log.Fatal(err) } case "oled": - log.Fatalf("not yet implemented") + o, f, err := xio.OpenTwo(*openLibrary, *refs) + if err != nil { + log.Fatal(err) + } + if err := skate.ZippyVerifyRefsOpenLibrary(o, f, bw); err != nil { + log.Fatal(err) + } default: log.Fatalf("invalid mode") } diff --git a/skate/schema.go b/skate/schema.go index e6c33ec..5696844 100644 --- a/skate/schema.go +++ b/skate/schema.go @@ -47,6 +47,7 @@ type Ref struct { Biblio Biblio `json:"biblio"` Index int64 `json:"index,omitempty"` Key string `json:"key,omitempty"` + Locator string `json:"locator:omitempty"` RefSource string `json:"ref_source,omitempty"` ReleaseYear int `json:"release_year,omitempty"` ReleaseIdent string `json:"release_ident,omitempty"` @@ -110,6 +111,11 @@ func RefToRelease(ref *Ref) (*Release, error) { if strings.Contains(strings.ToLower(ref.Biblio.Unstructured), "isbn") { release.ExtIDs.ISBN = ParseIsbn(ref.Biblio.Unstructured) } + // Extra info stashed into extra.skate.ref. + release.Extra.Skate.Ref.Index = ref.Index + release.Extra.Skate.Ref.Key = ref.Key + release.Extra.Skate.Ref.Locator = ref.Locator + release.Extra.Skate.Ref.Source = ref.RefSource return &release, nil } @@ -238,6 +244,7 @@ type Release struct { Index int64 `json:"index,omitempty"` Key string `json:"key,omitempty"` Locator string `json:"locator,omitempty"` + Source string `json:"source,omitempty"` } `json:"ref,omitempty"` ResearchGate struct { URL string `json:"url,omitempty"` @@ -618,7 +625,6 @@ func OpenLibraryEditionToRelease(v *OpenLibraryEdition, authorMap map[string]str // "marc:marc_loc_2016/BooksAll.2016.part25.utf8:104915596:921" // ] release.Extra.OpenLibrary.SourceRecords = v.SourceRecords - for _, l := range openLibraryDateLayouts { t, err := time.Parse(l, v.PublishDate) if err != nil { diff --git a/skate/schema_test.go b/skate/schema_test.go index 57c4700..fe59996 100644 --- a/skate/schema_test.go +++ b/skate/schema_test.go @@ -103,6 +103,7 @@ func TestOpenLibraryToRelease(t *testing.T) { Index int64 `json:"index,omitempty"` Key string `json:"key,omitempty"` Locator string `json:"locator,omitempty"` + Source string `json:"source,omitempty"` } `json:"ref,omitempty"` ResearchGate struct { URL string `json:"url,omitempty"` diff --git a/skate/zippy.go b/skate/zippy.go index 6034351..4d3aa04 100644 --- a/skate/zippy.go +++ b/skate/zippy.go @@ -200,6 +200,57 @@ func ZippyVerifyRefsOpenLibraryTable(olr, refs io.Reader, w io.Writer) error { return zipper.Run() } +// ZippyVerifyRefsOpenLibrary takes OL editions (as release) and refs (as +// release) and emits a match table for manual inspection. +func ZippyVerifyRefsOpenLibrary(olr, refs io.Reader, w io.Writer) error { + var ( + enc = json.NewEncoder(w) + keyer = makeKeyFunc("\t", 1) + grouper = func(g *zipkey.Group) error { + var ( + ref, pivot *Release // ref (reference), pivot (open library) + err error + ) + if len(g.G0) == 0 || len(g.G1) == 0 { + return nil + } + // We take a single edition from OL. + if pivot, err = stringToRelease(Cut(g.G0[0], 2)); err != nil { + return err + } + for _, line := range g.G1 { + if ref, err = stringToRelease(Cut(line, 2)); err != nil { + return err + } + // The refs have a container name, but not a title, but here we + // compare against titles from open library. + result := Verify(pivot, ref) + switch result.Status { + case StatusExact, StatusStrong: + var bref BiblioRef + bref.SourceReleaseIdent = ref.Ident + bref.SourceWorkIdent = ref.WorkID + bref.SourceReleaseStage = ref.ReleaseStage + bref.SourceYear = fmt.Sprintf("%d", ref.ReleaseYear()) + bref.RefIndex = ref.Extra.Skate.Ref.Index + 1 // we want 1-index (also helps with omitempty) + bref.RefKey = ref.Extra.Skate.Ref.Key + bref.TargetOpenLibraryWork = pivot.WorkID + bref.MatchProvenance = ref.Extra.Skate.Ref.Source + bref.MatchStatus = result.Status.Short() + bref.MatchReason = result.Reason.Short() + if err := enc.Encode(bref); err != nil { + return err + } + default: + } + } + return nil + } + ) + zipper := zipkey.New(olr, refs, keyer, grouper) + return zipper.Run() +} + // Cut returns a specific column (1-indexed, like CutSep) from a tabular // file, returns empty string if column is invalid. func Cut(line string, column int) string { -- cgit v1.2.3