aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-06-04 17:27:15 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-06-04 17:27:15 +0200
commit98476056aa3666a6ed2499aee1da552c041c8564 (patch)
tree1019e1265294b1a2e0b8f401296ae9c2b0020dd5
parentfe020f887fe3e4153bd6e234371142bf691f8509 (diff)
downloadrefcat-98476056aa3666a6ed2499aee1da552c041c8564.tar.gz
refcat-98476056aa3666a6ed2499aee1da552c041c8564.zip
carry ref information over into release.extra.skate.ref
we need index, source, etc. in final assembly
-rw-r--r--skate/cmd/skate-reduce/main.go10
-rw-r--r--skate/schema.go8
-rw-r--r--skate/schema_test.go1
-rw-r--r--skate/zippy.go51
4 files changed, 67 insertions, 3 deletions
diff --git a/skate/cmd/skate-reduce/main.go b/skate/cmd/skate-reduce/main.go
index 605c9b6..c1d8fba 100644
--- a/skate/cmd/skate-reduce/main.go
+++ b/skate/cmd/skate-reduce/main.go
@@ -36,7 +36,7 @@
// | $ skate-reduce -m wiki -L a.ndj -W b.ndj
// |
// |
-// * oledt | zippy mode for releases and OL inputs, dumps table.
+// * oledt | zippy mode for releases and OL inputs, dumps table for debugging.
// |
// | $ skate-reduce -m oled -F a.ndj -O b.ndj
// |
@@ -151,7 +151,13 @@ func main() {
log.Fatal(err)
}
case "oled":
- log.Fatalf("not yet implemented")
+ o, f, err := xio.OpenTwo(*openLibrary, *refs)
+ if err != nil {
+ log.Fatal(err)
+ }
+ if err := skate.ZippyVerifyRefsOpenLibrary(o, f, bw); err != nil {
+ log.Fatal(err)
+ }
default:
log.Fatalf("invalid mode")
}
diff --git a/skate/schema.go b/skate/schema.go
index e6c33ec..5696844 100644
--- a/skate/schema.go
+++ b/skate/schema.go
@@ -47,6 +47,7 @@ type Ref struct {
Biblio Biblio `json:"biblio"`
Index int64 `json:"index,omitempty"`
Key string `json:"key,omitempty"`
+ Locator string `json:"locator:omitempty"`
RefSource string `json:"ref_source,omitempty"`
ReleaseYear int `json:"release_year,omitempty"`
ReleaseIdent string `json:"release_ident,omitempty"`
@@ -110,6 +111,11 @@ func RefToRelease(ref *Ref) (*Release, error) {
if strings.Contains(strings.ToLower(ref.Biblio.Unstructured), "isbn") {
release.ExtIDs.ISBN = ParseIsbn(ref.Biblio.Unstructured)
}
+ // Extra info stashed into extra.skate.ref.
+ release.Extra.Skate.Ref.Index = ref.Index
+ release.Extra.Skate.Ref.Key = ref.Key
+ release.Extra.Skate.Ref.Locator = ref.Locator
+ release.Extra.Skate.Ref.Source = ref.RefSource
return &release, nil
}
@@ -238,6 +244,7 @@ type Release struct {
Index int64 `json:"index,omitempty"`
Key string `json:"key,omitempty"`
Locator string `json:"locator,omitempty"`
+ Source string `json:"source,omitempty"`
} `json:"ref,omitempty"`
ResearchGate struct {
URL string `json:"url,omitempty"`
@@ -618,7 +625,6 @@ func OpenLibraryEditionToRelease(v *OpenLibraryEdition, authorMap map[string]str
// "marc:marc_loc_2016/BooksAll.2016.part25.utf8:104915596:921"
// ]
release.Extra.OpenLibrary.SourceRecords = v.SourceRecords
-
for _, l := range openLibraryDateLayouts {
t, err := time.Parse(l, v.PublishDate)
if err != nil {
diff --git a/skate/schema_test.go b/skate/schema_test.go
index 57c4700..fe59996 100644
--- a/skate/schema_test.go
+++ b/skate/schema_test.go
@@ -103,6 +103,7 @@ func TestOpenLibraryToRelease(t *testing.T) {
Index int64 `json:"index,omitempty"`
Key string `json:"key,omitempty"`
Locator string `json:"locator,omitempty"`
+ Source string `json:"source,omitempty"`
} `json:"ref,omitempty"`
ResearchGate struct {
URL string `json:"url,omitempty"`
diff --git a/skate/zippy.go b/skate/zippy.go
index 6034351..4d3aa04 100644
--- a/skate/zippy.go
+++ b/skate/zippy.go
@@ -200,6 +200,57 @@ func ZippyVerifyRefsOpenLibraryTable(olr, refs io.Reader, w io.Writer) error {
return zipper.Run()
}
+// ZippyVerifyRefsOpenLibrary takes OL editions (as release) and refs (as
+// release) and emits a match table for manual inspection.
+func ZippyVerifyRefsOpenLibrary(olr, refs io.Reader, w io.Writer) error {
+ var (
+ enc = json.NewEncoder(w)
+ keyer = makeKeyFunc("\t", 1)
+ grouper = func(g *zipkey.Group) error {
+ var (
+ ref, pivot *Release // ref (reference), pivot (open library)
+ err error
+ )
+ if len(g.G0) == 0 || len(g.G1) == 0 {
+ return nil
+ }
+ // We take a single edition from OL.
+ if pivot, err = stringToRelease(Cut(g.G0[0], 2)); err != nil {
+ return err
+ }
+ for _, line := range g.G1 {
+ if ref, err = stringToRelease(Cut(line, 2)); err != nil {
+ return err
+ }
+ // The refs have a container name, but not a title, but here we
+ // compare against titles from open library.
+ result := Verify(pivot, ref)
+ switch result.Status {
+ case StatusExact, StatusStrong:
+ var bref BiblioRef
+ bref.SourceReleaseIdent = ref.Ident
+ bref.SourceWorkIdent = ref.WorkID
+ bref.SourceReleaseStage = ref.ReleaseStage
+ bref.SourceYear = fmt.Sprintf("%d", ref.ReleaseYear())
+ bref.RefIndex = ref.Extra.Skate.Ref.Index + 1 // we want 1-index (also helps with omitempty)
+ bref.RefKey = ref.Extra.Skate.Ref.Key
+ bref.TargetOpenLibraryWork = pivot.WorkID
+ bref.MatchProvenance = ref.Extra.Skate.Ref.Source
+ bref.MatchStatus = result.Status.Short()
+ bref.MatchReason = result.Reason.Short()
+ if err := enc.Encode(bref); err != nil {
+ return err
+ }
+ default:
+ }
+ }
+ return nil
+ }
+ )
+ zipper := zipkey.New(olr, refs, keyer, grouper)
+ return zipper.Run()
+}
+
// Cut returns a specific column (1-indexed, like CutSep) from a tabular
// file, returns empty string if column is invalid.
func Cut(line string, column int) string {