diff options
Diffstat (limited to 'skate/cmd/skate-reduce')
-rw-r--r-- | skate/cmd/skate-reduce/main.go | 68 |
1 files changed, 32 insertions, 36 deletions
diff --git a/skate/cmd/skate-reduce/main.go b/skate/cmd/skate-reduce/main.go index 7918a28..6d743b7 100644 --- a/skate/cmd/skate-reduce/main.go +++ b/skate/cmd/skate-reduce/main.go @@ -1,6 +1,6 @@ -// skate-reduce takes prepared inputs (e.g. from skate-map or skate-cluster) -// and applies various verification and conversion functions. The output will -// often be a stream of biblioref schema docs. +// skate-reduce takes prepared inputs (e.g. from skate-map) and applies various +// verification and conversion functions. The output will often be a stream of +// biblioref schema docs. // // Support various "modes", e.g. exact, verify, ref, bref, wiki. Each mode may // work on one or two files, and may need extra args. @@ -9,38 +9,38 @@ // | will emit biblioref docs relating *one* element from releases with *all* // | elements from ref; this is for "doi", "pmid" and other id matches, where no // | further checks are necessary. The match reason, e.g. "doi" needs to be -// | supplied. +// | supplied // | // | $ skate-reduce -m exact -r doi -F a.tsv -L b.tsv // | // | // * fuzzy | takes two (key, doc) TSV files (one for release, one for refs), -// | runs verification within a group and will emit biblioref. +// | runs verification within a group and will emit biblioref // | // | $ skate-reduce -m fuzzy -F a.tsv -L b.tsv // | // | -// * wiki | zippy mode for releases and wikipedia inputs. +// * wiki | zippy mode for releases and wikipedia inputs // | // | $ skate-reduce -m wiki -L a.ndj -W b.ndj // | // | -// * oledt | zippy mode for releases and OL inputs, dumps table for debugging. +// * oledt | zippy mode for releases and OL inputs, dumps table for debugging // | // | $ skate-reduce -m oled -F a.ndj -O b.ndj // | // | -// * oled | zippy mode for releases and OL inputs, emit biblioref. +// * oled | zippy mode for releases and OL inputs, emit biblioref // | // | $ skate-reduce -m oled -F a.ndj -O b.ndj // | // | -// * rere | zippy mode for OL release and ref (as release) inputs, emit biblioref. +// * rere | zippy mode for OL release and ref (as release) inputs, emit biblioref // | // | $ skate-reduce -m rere -O a.ndj -F b.ndj // | // | -// * unmatched | join matched and unmatched reference data; do deduplicate on the fly +// * unmatched | join matched and unmatched reference data; deduplicate on the fly // | // | $ skate-reduce -m unmatched -B a.ndj -F b.ndj // | @@ -81,8 +81,8 @@ var ( cdxAdhoc = flag.String("C", "", "path to cdx adhoc schema") // Extra args. - reason = flag.String("r", "", "reason for match: doi, pmid, pmcid, arxiv, unknown") - reasonMap = map[string]skate.MatchResult{ + matchResult = flag.String("r", "", "reason for match: doi, pmid, pmcid, arxiv, unknown") + matchResultMap = map[string]skate.MatchResult{ "doi": skate.MatchResult{Status: skate.StatusExact, Reason: skate.ReasonDOI}, "pmid": skate.MatchResult{Status: skate.StatusExact, Reason: skate.ReasonPMID}, "pmcid": skate.MatchResult{Status: skate.StatusExact, Reason: skate.ReasonPMCID}, @@ -118,76 +118,72 @@ func main() { if *quite { log.SetOutput(ioutil.Discard) } - bw := bufio.NewWriter(os.Stdout) + var ( + bw = bufio.NewWriter(os.Stdout) + l, f, w, o, b, c *os.File + r skate.MatchResult + ok bool + err error + ) defer bw.Flush() switch *mode { case "exact": - l, f, err := xio.OpenTwo(*releases, *refs) - if err != nil { + if l, f, err = xio.OpenTwo(*releases, *refs); err != nil { log.Fatal(err) } - r, ok := reasonMap[*reason] - if !ok { - log.Fatalf("unknown reason: %v", *reason) + if r, ok = matchResultMap[*matchResult]; !ok { + log.Fatalf("unknown reason: %v", *matchResult) } if err := skate.ZippyExact(l, f, r, bw); err != nil { log.Fatal(err) } case "fuzzy": - l, f, err := xio.OpenTwo(*releases, *refs) - if err != nil { + if l, f, err = xio.OpenTwo(*releases, *refs); err != nil { log.Fatal(err) } if err := skate.ZippyVerifyRefs(l, f, bw); err != nil { log.Fatal(err) } case "wiki": - l, w, err := xio.OpenTwo(*releases, *wiki) - if err != nil { + if l, w, err = xio.OpenTwo(*releases, *wiki); err != nil { log.Fatal(err) } - if err := skate.ZippyExactWiki(l, w, reasonMap["doi"], bw); err != nil { + if err := skate.ZippyExactWiki(l, w, matchResultMap["doi"], bw); err != nil { log.Fatal(err) } case "oledt": - o, f, err := xio.OpenTwo(*openLibrary, *refs) - if err != nil { + if o, f, err = xio.OpenTwo(*openLibrary, *refs); err != nil { log.Fatal(err) } if err := skate.ZippyVerifyRefsOpenLibraryTable(o, f, bw); err != nil { log.Fatal(err) } case "oled": - o, f, err := xio.OpenTwo(*openLibrary, *refs) - if err != nil { + if o, f, err = xio.OpenTwo(*openLibrary, *refs); err != nil { log.Fatal(err) } if err := skate.ZippyVerifyRefsOpenLibrary(o, f, bw); err != nil { log.Fatal(err) } case "rere": - o, f, err := xio.OpenTwo(*releases, *refs) - if err != nil { + if o, f, err = xio.OpenTwo(*releases, *refs); err != nil { log.Fatal(err) } - r, ok := reasonMap[*reason] - if !ok { - log.Fatalf("unknown reason: %v", *reason) + if r, ok = matchResultMap[*matchResult]; !ok { + log.Fatalf("unknown reason: %v", *matchResult) } if err := skate.ZippyExactReleases(o, f, r, bw); err != nil { log.Fatal(err) } case "unmatched": - b, f, err := xio.OpenTwo(*bref, *refs) - if err != nil { + if b, f, err = xio.OpenTwo(*bref, *refs); err != nil { log.Fatal(err) } if err := skate.ZippyBrefAugment(b, f, bw); err != nil { log.Fatal(err) } case "wb": - f, c, err := xio.OpenTwo(*refs, *cdxAdhoc) - if err != nil { + if f, c, err = xio.OpenTwo(*refs, *cdxAdhoc); err != nil { log.Fatal(err) } if err := skate.ZippyWayback(f, c, bw); err != nil { |