diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-03-31 19:08:30 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-03-31 19:08:30 +0200 |
commit | 9ce31a8187d336be1e11187ca7e9595dd987e3dc (patch) | |
tree | f70e2ce9c277d3d9f5707806c06886d4f894b8c4 | |
parent | ac7548d85b414f1d34e13ef5ec46af4ad647040f (diff) | |
download | refcat-9ce31a8187d336be1e11187ca7e9595dd987e3dc.tar.gz refcat-9ce31a8187d336be1e11187ca7e9595dd987e3dc.zip |
some streamlining
-rw-r--r-- | skate/verify.go | 195 |
1 files changed, 95 insertions, 100 deletions
diff --git a/skate/verify.go b/skate/verify.go index 1f59514..cfe31ec 100644 --- a/skate/verify.go +++ b/skate/verify.go @@ -229,7 +229,7 @@ func RefClusterToBiblioRef(p []byte) ([]byte, error) { if result.Reason == ReasonDOI { continue // Assume we already have the DOI matches. } - br = generateBiblioRef(re, pivot, result.Status, result.Reason, "fuzzy") + br = generateBiblioRef(re, pivot, result, "fuzzy") return jsonMarshalLine(br) default: continue @@ -239,7 +239,8 @@ func RefClusterToBiblioRef(p []byte) ([]byte, error) { } // generateBiblioRef generates a bibliographic schema document. -func generateBiblioRef(source, target *Release, matchStatus Status, matchReason Reason, provenance string) *BiblioRef { +func generateBiblioRef(source, target *Release, + matchResult MatchResult, provenance string) *BiblioRef { var bref BiblioRef bref.SourceReleaseIdent = source.Ident bref.SourceWorkIdent = source.WorkID @@ -252,56 +253,61 @@ func generateBiblioRef(source, target *Release, matchStatus Status, matchReason bref.TargetReleaseIdent = target.Ident bref.TargetWorkIdent = target.WorkID bref.MatchProvenance = provenance - bref.MatchStatus = matchStatus.Short() - bref.MatchReason = matchReason.Short() + bref.MatchStatus = matchResult.Status.Short() + bref.MatchReason = matchResult.Reason.Short() return &bref } -// ZipUnverified takes a release and refs reader (tsv, with ident, key, doc) -// and assigns a fixed match result. -func ZipUnverified(releases, refs io.Reader, mr MatchResult, provenance string, w io.Writer) error { - // Define a grouper, working on one set of refs and releases with the same - // key at a time. Here, we do verification and write out the generated - // biblioref. - enc := json.NewEncoder(w) - keyer := func(s string) (string, error) { +// makeKeyFunc creates a function that can be used as keyFunc, selecting a +// column from sep. +func makeKeyFunc(sep string, column int) func(string) (string, error) { + return func(s string) (string, error) { if k := lineColumn(s, "\t", 2); k == "" { return k, fmt.Errorf("cannot get key: %s", s) } else { return k, nil } } - grouper := func(g *zipkey.Group) error { - if len(g.G0) == 0 || len(g.G1) == 0 { - return nil - } - target, err := stringToRelease(lineColumn(g.G0[0], "\t", 3)) - if err != nil { - return err - } - for _, line := range g.G1 { - ref, err := stringToRef(lineColumn(line, "\t", 3)) +} + +// ZipUnverified takes a release and refs reader (tsv, with ident, key, doc) +// and assigns a fixed match result. +func ZipUnverified(releases, refs io.Reader, mr MatchResult, provenance string, w io.Writer) error { + var ( + enc = json.NewEncoder(w) + keyer = makeKeyFunc("\t", 2) + grouper = func(g *zipkey.Group) error { + if len(g.G0) == 0 || len(g.G1) == 0 { + return nil + } + target, err := stringToRelease(lineColumn(g.G0[0], "\t", 3)) if err != nil { return err } - var bref BiblioRef - bref.SourceReleaseIdent = ref.ReleaseIdent - bref.SourceWorkIdent = ref.WorkIdent - bref.SourceReleaseStage = ref.ReleaseStage - bref.SourceYear = fmt.Sprintf("%d", ref.ReleaseYear) - bref.RefIndex = ref.Index + 1 // we want 1-index (also helps with omitempty) - bref.RefKey = ref.Key - bref.TargetReleaseIdent = target.Ident - bref.TargetWorkIdent = target.WorkID - bref.MatchProvenance = provenance - bref.MatchStatus = mr.Status.Short() - bref.MatchReason = mr.Reason.Short() - if err := enc.Encode(bref); err != nil { - return err + for _, line := range g.G1 { + ref, err := stringToRef(lineColumn(line, "\t", 3)) + if err != nil { + return err + } + var bref BiblioRef + bref.SourceReleaseIdent = ref.ReleaseIdent + bref.SourceWorkIdent = ref.WorkIdent + bref.SourceReleaseStage = ref.ReleaseStage + bref.SourceYear = fmt.Sprintf("%d", ref.ReleaseYear) + bref.RefIndex = ref.Index + 1 // we want 1-index (also helps with omitempty) + bref.RefKey = ref.Key + bref.TargetReleaseIdent = target.Ident + bref.TargetWorkIdent = target.WorkID + bref.MatchProvenance = provenance + bref.MatchStatus = mr.Status.Short() + bref.MatchReason = mr.Reason.Short() + if err := enc.Encode(bref); err != nil { + return err + } } + return nil } - return nil - } + ) zipper := zipkey.New(releases, refs, keyer, grouper) return zipper.Run() } @@ -309,44 +315,37 @@ func ZipUnverified(releases, refs io.Reader, mr MatchResult, provenance string, // ZipWikiUnverified takes a release and wiki reader (tsv, with ident, key, doc) // and assigns a fixed match result. func ZipWikiUnverified(releases, wiki io.Reader, mr MatchResult, provenance string, w io.Writer) error { - // Define a grouper, working on one set of refs and releases with the same - // key at a time. Here, we do verification and write out the generated - // biblioref. - enc := json.NewEncoder(w) - keyer := func(s string) (string, error) { - if k := lineColumn(s, "\t", 2); k == "" { - return k, fmt.Errorf("cannot get key: %s", s) - } else { - return k, nil - } - } - grouper := func(g *zipkey.Group) error { - if len(g.G0) == 0 || len(g.G1) == 0 { - return nil - } - target, err := stringToRelease(lineColumn(g.G0[0], "\t", 3)) - if err != nil { - return err - } - for _, line := range g.G1 { - wiki, err := stringToWiki(lineColumn(line, "\t", 3)) + var ( + enc = json.NewEncoder(w) + keyer = makeKeyFunc("\t", 2) + grouper = func(g *zipkey.Group) error { + if len(g.G0) == 0 || len(g.G1) == 0 { + return nil + } + target, err := stringToRelease(lineColumn(g.G0[0], "\t", 3)) if err != nil { return err } - var bref BiblioRef - bref.Key = fmt.Sprintf("%s_%s", slugifyString(wiki.PageTitle), target.Ident) // XXX: what should we use? - bref.SourceWikipediaArticle = wiki.PageTitle - bref.TargetReleaseIdent = target.Ident - bref.TargetWorkIdent = target.WorkID - bref.MatchProvenance = provenance - bref.MatchStatus = mr.Status.Short() - bref.MatchReason = mr.Reason.Short() - if err := enc.Encode(bref); err != nil { - return err + for _, line := range g.G1 { + wiki, err := stringToWiki(lineColumn(line, "\t", 3)) + if err != nil { + return err + } + var bref BiblioRef + bref.Key = fmt.Sprintf("%s_%s", slugifyString(wiki.PageTitle), target.Ident) // XXX: what should we use? + bref.SourceWikipediaArticle = wiki.PageTitle + bref.TargetReleaseIdent = target.Ident + bref.TargetWorkIdent = target.WorkID + bref.MatchProvenance = provenance + bref.MatchStatus = mr.Status.Short() + bref.MatchReason = mr.Reason.Short() + if err := enc.Encode(bref); err != nil { + return err + } } + return nil } - return nil - } + ) zipper := zipkey.New(releases, wiki, keyer, grouper) return zipper.Run() } @@ -357,41 +356,37 @@ func ZipVerifyRefs(releases, refs io.Reader, w io.Writer) error { // Define a grouper, working on one set of refs and releases with the same // key at a time. Here, we do verification and write out the generated // biblioref. - enc := json.NewEncoder(w) - keyer := func(s string) (string, error) { - if k := lineColumn(s, "\t", 2); k == "" { - return k, fmt.Errorf("cannot get key: %s", s) - } else { - return k, nil - } - } - grouper := func(g *zipkey.Group) error { - if len(g.G0) == 0 || len(g.G1) == 0 { - return nil - } - pivot, err := stringToRelease(lineColumn(g.G0[0], "\t", 3)) - if err != nil { - return err - } - for _, line := range g.G1 { - re, err := stringToRelease(lineColumn(line, "\t", 3)) + var ( + enc = json.NewEncoder(w) + keyer = makeKeyFunc("\t", 2) + grouper = func(g *zipkey.Group) error { + if len(g.G0) == 0 || len(g.G1) == 0 { + return nil + } + pivot, err := stringToRelease(lineColumn(g.G0[0], "\t", 3)) if err != nil { return err } - result := Verify(pivot, re, 5) - switch result.Status { - case StatusExact, StatusStrong: - if result.Reason == ReasonDOI { - continue - } - br := generateBiblioRef(re, pivot, result.Status, result.Reason, "fuzzy") - if err := enc.Encode(br); err != nil { + for _, line := range g.G1 { + re, err := stringToRelease(lineColumn(line, "\t", 3)) + if err != nil { return err } + result := Verify(pivot, re, 5) + switch result.Status { + case StatusExact, StatusStrong: + if result.Reason == ReasonDOI { + continue + } + br := generateBiblioRef(re, pivot, result, "fuzzy") + if err := enc.Encode(br); err != nil { + return err + } + } } + return nil } - return nil - } + ) zipper := zipkey.New(releases, refs, keyer, grouper) return zipper.Run() } @@ -399,7 +394,7 @@ func ZipVerifyRefs(releases, refs io.Reader, w io.Writer) error { // lineColumn returns a specific column (1-indexed, like cut) from a tabular // file, returns empty string if column is invalid. func lineColumn(line, sep string, column int) string { - var parts = strings.Split(strings.TrimSpace(line), sep) + parts := strings.Split(strings.TrimSpace(line), sep) if len(parts) < column { return "" } else { |