aboutsummaryrefslogtreecommitdiffstats
path: root/skate
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-03-31 19:08:30 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-03-31 19:08:30 +0200
commit9ce31a8187d336be1e11187ca7e9595dd987e3dc (patch)
treef70e2ce9c277d3d9f5707806c06886d4f894b8c4 /skate
parentac7548d85b414f1d34e13ef5ec46af4ad647040f (diff)
downloadrefcat-9ce31a8187d336be1e11187ca7e9595dd987e3dc.tar.gz
refcat-9ce31a8187d336be1e11187ca7e9595dd987e3dc.zip
some streamlining
Diffstat (limited to 'skate')
-rw-r--r--skate/verify.go195
1 files changed, 95 insertions, 100 deletions
diff --git a/skate/verify.go b/skate/verify.go
index 1f59514..cfe31ec 100644
--- a/skate/verify.go
+++ b/skate/verify.go
@@ -229,7 +229,7 @@ func RefClusterToBiblioRef(p []byte) ([]byte, error) {
if result.Reason == ReasonDOI {
continue // Assume we already have the DOI matches.
}
- br = generateBiblioRef(re, pivot, result.Status, result.Reason, "fuzzy")
+ br = generateBiblioRef(re, pivot, result, "fuzzy")
return jsonMarshalLine(br)
default:
continue
@@ -239,7 +239,8 @@ func RefClusterToBiblioRef(p []byte) ([]byte, error) {
}
// generateBiblioRef generates a bibliographic schema document.
-func generateBiblioRef(source, target *Release, matchStatus Status, matchReason Reason, provenance string) *BiblioRef {
+func generateBiblioRef(source, target *Release,
+ matchResult MatchResult, provenance string) *BiblioRef {
var bref BiblioRef
bref.SourceReleaseIdent = source.Ident
bref.SourceWorkIdent = source.WorkID
@@ -252,56 +253,61 @@ func generateBiblioRef(source, target *Release, matchStatus Status, matchReason
bref.TargetReleaseIdent = target.Ident
bref.TargetWorkIdent = target.WorkID
bref.MatchProvenance = provenance
- bref.MatchStatus = matchStatus.Short()
- bref.MatchReason = matchReason.Short()
+ bref.MatchStatus = matchResult.Status.Short()
+ bref.MatchReason = matchResult.Reason.Short()
return &bref
}
-// ZipUnverified takes a release and refs reader (tsv, with ident, key, doc)
-// and assigns a fixed match result.
-func ZipUnverified(releases, refs io.Reader, mr MatchResult, provenance string, w io.Writer) error {
- // Define a grouper, working on one set of refs and releases with the same
- // key at a time. Here, we do verification and write out the generated
- // biblioref.
- enc := json.NewEncoder(w)
- keyer := func(s string) (string, error) {
+// makeKeyFunc creates a function that can be used as keyFunc, selecting a
+// column from sep.
+func makeKeyFunc(sep string, column int) func(string) (string, error) {
+ return func(s string) (string, error) {
if k := lineColumn(s, "\t", 2); k == "" {
return k, fmt.Errorf("cannot get key: %s", s)
} else {
return k, nil
}
}
- grouper := func(g *zipkey.Group) error {
- if len(g.G0) == 0 || len(g.G1) == 0 {
- return nil
- }
- target, err := stringToRelease(lineColumn(g.G0[0], "\t", 3))
- if err != nil {
- return err
- }
- for _, line := range g.G1 {
- ref, err := stringToRef(lineColumn(line, "\t", 3))
+}
+
+// ZipUnverified takes a release and refs reader (tsv, with ident, key, doc)
+// and assigns a fixed match result.
+func ZipUnverified(releases, refs io.Reader, mr MatchResult, provenance string, w io.Writer) error {
+ var (
+ enc = json.NewEncoder(w)
+ keyer = makeKeyFunc("\t", 2)
+ grouper = func(g *zipkey.Group) error {
+ if len(g.G0) == 0 || len(g.G1) == 0 {
+ return nil
+ }
+ target, err := stringToRelease(lineColumn(g.G0[0], "\t", 3))
if err != nil {
return err
}
- var bref BiblioRef
- bref.SourceReleaseIdent = ref.ReleaseIdent
- bref.SourceWorkIdent = ref.WorkIdent
- bref.SourceReleaseStage = ref.ReleaseStage
- bref.SourceYear = fmt.Sprintf("%d", ref.ReleaseYear)
- bref.RefIndex = ref.Index + 1 // we want 1-index (also helps with omitempty)
- bref.RefKey = ref.Key
- bref.TargetReleaseIdent = target.Ident
- bref.TargetWorkIdent = target.WorkID
- bref.MatchProvenance = provenance
- bref.MatchStatus = mr.Status.Short()
- bref.MatchReason = mr.Reason.Short()
- if err := enc.Encode(bref); err != nil {
- return err
+ for _, line := range g.G1 {
+ ref, err := stringToRef(lineColumn(line, "\t", 3))
+ if err != nil {
+ return err
+ }
+ var bref BiblioRef
+ bref.SourceReleaseIdent = ref.ReleaseIdent
+ bref.SourceWorkIdent = ref.WorkIdent
+ bref.SourceReleaseStage = ref.ReleaseStage
+ bref.SourceYear = fmt.Sprintf("%d", ref.ReleaseYear)
+ bref.RefIndex = ref.Index + 1 // we want 1-index (also helps with omitempty)
+ bref.RefKey = ref.Key
+ bref.TargetReleaseIdent = target.Ident
+ bref.TargetWorkIdent = target.WorkID
+ bref.MatchProvenance = provenance
+ bref.MatchStatus = mr.Status.Short()
+ bref.MatchReason = mr.Reason.Short()
+ if err := enc.Encode(bref); err != nil {
+ return err
+ }
}
+ return nil
}
- return nil
- }
+ )
zipper := zipkey.New(releases, refs, keyer, grouper)
return zipper.Run()
}
@@ -309,44 +315,37 @@ func ZipUnverified(releases, refs io.Reader, mr MatchResult, provenance string,
// ZipWikiUnverified takes a release and wiki reader (tsv, with ident, key, doc)
// and assigns a fixed match result.
func ZipWikiUnverified(releases, wiki io.Reader, mr MatchResult, provenance string, w io.Writer) error {
- // Define a grouper, working on one set of refs and releases with the same
- // key at a time. Here, we do verification and write out the generated
- // biblioref.
- enc := json.NewEncoder(w)
- keyer := func(s string) (string, error) {
- if k := lineColumn(s, "\t", 2); k == "" {
- return k, fmt.Errorf("cannot get key: %s", s)
- } else {
- return k, nil
- }
- }
- grouper := func(g *zipkey.Group) error {
- if len(g.G0) == 0 || len(g.G1) == 0 {
- return nil
- }
- target, err := stringToRelease(lineColumn(g.G0[0], "\t", 3))
- if err != nil {
- return err
- }
- for _, line := range g.G1 {
- wiki, err := stringToWiki(lineColumn(line, "\t", 3))
+ var (
+ enc = json.NewEncoder(w)
+ keyer = makeKeyFunc("\t", 2)
+ grouper = func(g *zipkey.Group) error {
+ if len(g.G0) == 0 || len(g.G1) == 0 {
+ return nil
+ }
+ target, err := stringToRelease(lineColumn(g.G0[0], "\t", 3))
if err != nil {
return err
}
- var bref BiblioRef
- bref.Key = fmt.Sprintf("%s_%s", slugifyString(wiki.PageTitle), target.Ident) // XXX: what should we use?
- bref.SourceWikipediaArticle = wiki.PageTitle
- bref.TargetReleaseIdent = target.Ident
- bref.TargetWorkIdent = target.WorkID
- bref.MatchProvenance = provenance
- bref.MatchStatus = mr.Status.Short()
- bref.MatchReason = mr.Reason.Short()
- if err := enc.Encode(bref); err != nil {
- return err
+ for _, line := range g.G1 {
+ wiki, err := stringToWiki(lineColumn(line, "\t", 3))
+ if err != nil {
+ return err
+ }
+ var bref BiblioRef
+ bref.Key = fmt.Sprintf("%s_%s", slugifyString(wiki.PageTitle), target.Ident) // XXX: what should we use?
+ bref.SourceWikipediaArticle = wiki.PageTitle
+ bref.TargetReleaseIdent = target.Ident
+ bref.TargetWorkIdent = target.WorkID
+ bref.MatchProvenance = provenance
+ bref.MatchStatus = mr.Status.Short()
+ bref.MatchReason = mr.Reason.Short()
+ if err := enc.Encode(bref); err != nil {
+ return err
+ }
}
+ return nil
}
- return nil
- }
+ )
zipper := zipkey.New(releases, wiki, keyer, grouper)
return zipper.Run()
}
@@ -357,41 +356,37 @@ func ZipVerifyRefs(releases, refs io.Reader, w io.Writer) error {
// Define a grouper, working on one set of refs and releases with the same
// key at a time. Here, we do verification and write out the generated
// biblioref.
- enc := json.NewEncoder(w)
- keyer := func(s string) (string, error) {
- if k := lineColumn(s, "\t", 2); k == "" {
- return k, fmt.Errorf("cannot get key: %s", s)
- } else {
- return k, nil
- }
- }
- grouper := func(g *zipkey.Group) error {
- if len(g.G0) == 0 || len(g.G1) == 0 {
- return nil
- }
- pivot, err := stringToRelease(lineColumn(g.G0[0], "\t", 3))
- if err != nil {
- return err
- }
- for _, line := range g.G1 {
- re, err := stringToRelease(lineColumn(line, "\t", 3))
+ var (
+ enc = json.NewEncoder(w)
+ keyer = makeKeyFunc("\t", 2)
+ grouper = func(g *zipkey.Group) error {
+ if len(g.G0) == 0 || len(g.G1) == 0 {
+ return nil
+ }
+ pivot, err := stringToRelease(lineColumn(g.G0[0], "\t", 3))
if err != nil {
return err
}
- result := Verify(pivot, re, 5)
- switch result.Status {
- case StatusExact, StatusStrong:
- if result.Reason == ReasonDOI {
- continue
- }
- br := generateBiblioRef(re, pivot, result.Status, result.Reason, "fuzzy")
- if err := enc.Encode(br); err != nil {
+ for _, line := range g.G1 {
+ re, err := stringToRelease(lineColumn(line, "\t", 3))
+ if err != nil {
return err
}
+ result := Verify(pivot, re, 5)
+ switch result.Status {
+ case StatusExact, StatusStrong:
+ if result.Reason == ReasonDOI {
+ continue
+ }
+ br := generateBiblioRef(re, pivot, result, "fuzzy")
+ if err := enc.Encode(br); err != nil {
+ return err
+ }
+ }
}
+ return nil
}
- return nil
- }
+ )
zipper := zipkey.New(releases, refs, keyer, grouper)
return zipper.Run()
}
@@ -399,7 +394,7 @@ func ZipVerifyRefs(releases, refs io.Reader, w io.Writer) error {
// lineColumn returns a specific column (1-indexed, like cut) from a tabular
// file, returns empty string if column is invalid.
func lineColumn(line, sep string, column int) string {
- var parts = strings.Split(strings.TrimSpace(line), sep)
+ parts := strings.Split(strings.TrimSpace(line), sep)
if len(parts) < column {
return ""
} else {