aboutsummaryrefslogtreecommitdiffstats
path: root/skate/zippy.go
diff options
context:
space:
mode:
Diffstat (limited to 'skate/zippy.go')
-rw-r--r--skate/zippy.go171
1 files changed, 171 insertions, 0 deletions
diff --git a/skate/zippy.go b/skate/zippy.go
new file mode 100644
index 0000000..76f576d
--- /dev/null
+++ b/skate/zippy.go
@@ -0,0 +1,171 @@
+package skate
+
+import (
+ "fmt"
+ "io"
+ "strings"
+
+ "git.archive.org/martin/cgraph/skate/zipkey"
+ json "github.com/segmentio/encoding/json"
+)
+
+// This file contains the two-stream (zippy) matchers.
+
+// ZippyFixed takes a release and refs reader (tsv, with ident, key, doc)
+// and assigns a fixed match result.
+func ZippyFixed(releases, refs io.Reader, mr MatchResult, provenance string, w io.Writer) error {
+ var (
+ enc = json.NewEncoder(w)
+ keyer = makeKeyFunc("\t", 2)
+ grouper = func(g *zipkey.Group) error {
+ if len(g.G0) == 0 || len(g.G1) == 0 {
+ return nil
+ }
+ target, err := stringToRelease(lineColumn(g.G0[0], "\t", 3))
+ if err != nil {
+ return err
+ }
+ for _, line := range g.G1 {
+ ref, err := stringToRef(lineColumn(line, "\t", 3))
+ if err != nil {
+ return err
+ }
+ var bref BiblioRef
+ bref.SourceReleaseIdent = ref.ReleaseIdent
+ bref.SourceWorkIdent = ref.WorkIdent
+ bref.SourceReleaseStage = ref.ReleaseStage
+ bref.SourceYear = fmt.Sprintf("%d", ref.ReleaseYear)
+ bref.RefIndex = ref.Index + 1 // we want 1-index (also helps with omitempty)
+ bref.RefKey = ref.Key
+ bref.TargetReleaseIdent = target.Ident
+ bref.TargetWorkIdent = target.WorkID
+ bref.MatchProvenance = provenance
+ bref.MatchStatus = mr.Status.Short()
+ bref.MatchReason = mr.Reason.Short()
+ if err := enc.Encode(bref); err != nil {
+ return err
+ }
+ }
+ return nil
+ }
+ )
+ zipper := zipkey.New(releases, refs, keyer, grouper)
+ return zipper.Run()
+}
+
+// ZippyFixedWiki takes a release and wiki reader (tsv, with ident, key, doc)
+// and assigns a fixed match result.
+func ZippyFixedWiki(releases, wiki io.Reader, mr MatchResult, provenance string, w io.Writer) error {
+ var (
+ enc = json.NewEncoder(w)
+ keyer = makeKeyFunc("\t", 2)
+ grouper = func(g *zipkey.Group) error {
+ if len(g.G0) == 0 || len(g.G1) == 0 {
+ return nil
+ }
+ target, err := stringToRelease(lineColumn(g.G0[0], "\t", 3))
+ if err != nil {
+ return err
+ }
+ for _, line := range g.G1 {
+ wiki, err := stringToWiki(lineColumn(line, "\t", 3))
+ if err != nil {
+ return err
+ }
+ var bref BiblioRef
+ bref.Key = fmt.Sprintf("%s_%s", slugifyString(wiki.PageTitle), target.Ident) // XXX: what should we use?
+ bref.SourceWikipediaArticle = wiki.PageTitle
+ bref.TargetReleaseIdent = target.Ident
+ bref.TargetWorkIdent = target.WorkID
+ bref.MatchProvenance = provenance
+ bref.MatchStatus = mr.Status.Short()
+ bref.MatchReason = mr.Reason.Short()
+ if err := enc.Encode(bref); err != nil {
+ return err
+ }
+ }
+ return nil
+ }
+ )
+ zipper := zipkey.New(releases, wiki, keyer, grouper)
+ return zipper.Run()
+}
+
+// ZippyVerifyRefs takes a release and refs reader (tsv, with ident, key, doc)
+// and will execute gf for each group found.
+func ZippyVerifyRefs(releases, refs io.Reader, w io.Writer) error {
+ // Define a grouper, working on one set of refs and releases with the same
+ // key at a time. Here, we do verification and write out the generated
+ // biblioref.
+ var (
+ enc = json.NewEncoder(w)
+ keyer = makeKeyFunc("\t", 2)
+ grouper = func(g *zipkey.Group) error {
+ if len(g.G0) == 0 || len(g.G1) == 0 {
+ return nil
+ }
+ pivot, err := stringToRelease(lineColumn(g.G0[0], "\t", 3))
+ if err != nil {
+ return err
+ }
+ for _, line := range g.G1 {
+ re, err := stringToRelease(lineColumn(line, "\t", 3))
+ if err != nil {
+ return err
+ }
+ result := Verify(pivot, re)
+ switch result.Status {
+ case StatusExact, StatusStrong:
+ if result.Reason == ReasonDOI {
+ continue
+ }
+ br := generateBiblioRef(re, pivot, result, "fuzzy")
+ if err := enc.Encode(br); err != nil {
+ return err
+ }
+ }
+ }
+ return nil
+ }
+ )
+ zipper := zipkey.New(releases, refs, keyer, grouper)
+ return zipper.Run()
+}
+
+// makeKeyFunc creates a function that can be used as keyFunc, selecting a
+// column from sep.
+func makeKeyFunc(sep string, column int) func(string) (string, error) {
+ return func(s string) (string, error) {
+ if k := lineColumn(s, "\t", 2); k == "" {
+ return k, fmt.Errorf("cannot get key: %s", s)
+ } else {
+ return k, nil
+ }
+ }
+}
+
+// lineColumn returns a specific column (1-indexed, like cut) from a tabular
+// file, returns empty string if column is invalid.
+func lineColumn(line, sep string, column int) string {
+ parts := strings.Split(strings.TrimSpace(line), sep)
+ if len(parts) < column {
+ return ""
+ } else {
+ return parts[column-1]
+ }
+}
+
+func stringToRelease(s string) (r *Release, err error) {
+ err = json.Unmarshal([]byte(s), &r)
+ return
+}
+
+func stringToRef(s string) (r *Ref, err error) {
+ err = json.Unmarshal([]byte(s), &r)
+ return
+}
+
+func stringToWiki(s string) (r *MinimalCitations, err error) {
+ err = json.Unmarshal([]byte(s), &r)
+ return
+}