diff options
Diffstat (limited to 'skate/verify.go')
-rw-r--r-- | skate/verify.go | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/skate/verify.go b/skate/verify.go index cd40279..e6eb8b8 100644 --- a/skate/verify.go +++ b/skate/verify.go @@ -247,6 +247,51 @@ func ZipUnverified(releases, refs io.Reader, mr MatchResult, provenance string, return zipper.Run() } +// ZipWikiUnverified takes a release and wiki reader (tsv, with ident, key, doc) +// and assigns a fixed match result. +func ZipWikiUnverified(releases, wiki io.Reader, mr MatchResult, provenance string, w io.Writer) error { + // Define a grouper, working on one set of refs and releases with the same + // key at a time. Here, we do verification and write out the generated + // biblioref. + enc := json.NewEncoder(w) + keyer := func(s string) (string, error) { + if k := lineColumn(s, "\t", 2); k == "" { + return k, fmt.Errorf("cannot get key: %s", s) + } else { + return k, nil + } + } + grouper := func(g *zipkey.Group) error { + if len(g.G0) == 0 || len(g.G1) == 0 { + return nil + } + target, err := stringToRelease(lineColumn(g.G0[0], "\t", 3)) + if err != nil { + return err + } + for _, line := range g.G1 { + wiki, err := stringToWiki(lineColumn(line, "\t", 3)) + if err != nil { + return err + } + var bref BiblioRef + bref.Key = fmt.Sprintf("%s_%s", slugifyString(wiki.PageTitle), target.Ident) // XXX: what should we use? + bref.SourceWikipediaArticle = wiki.PageTitle + bref.TargetReleaseIdent = target.Ident + bref.TargetWorkIdent = target.WorkID + bref.MatchProvenance = provenance + bref.MatchStatus = mr.Status.Short() + bref.MatchReason = mr.Reason.Short() + if err := enc.Encode(bref); err != nil { + return err + } + } + return nil + } + zipper := zipkey.New(releases, wiki, keyer, grouper) + return zipper.Run() +} + // ZipVerifyRefs takes a release and refs reader (tsv, with ident, key, doc) // and will execute gf for each group found. func ZipVerifyRefs(releases, refs io.Reader, w io.Writer) error { @@ -313,6 +358,11 @@ func stringToRef(s string) (r *Ref, err error) { return } +func stringToWiki(s string) (r *MinimalCitations, err error) { + err = json.Unmarshal([]byte(s), &r) + return +} + // Verify follows the fuzzycat (Python) implementation of this function: it // compares two release entities. The Go version can be used for large batch // processing (where the Python version might take two or more days). |