aboutsummaryrefslogtreecommitdiffstats
path: root/skate/cmd/skate-reduce
diff options
context:
space:
mode:
Diffstat (limited to 'skate/cmd/skate-reduce')
-rw-r--r--skate/cmd/skate-reduce/main.go68
1 files changed, 32 insertions, 36 deletions
diff --git a/skate/cmd/skate-reduce/main.go b/skate/cmd/skate-reduce/main.go
index 7918a28..6d743b7 100644
--- a/skate/cmd/skate-reduce/main.go
+++ b/skate/cmd/skate-reduce/main.go
@@ -1,6 +1,6 @@
-// skate-reduce takes prepared inputs (e.g. from skate-map or skate-cluster)
-// and applies various verification and conversion functions. The output will
-// often be a stream of biblioref schema docs.
+// skate-reduce takes prepared inputs (e.g. from skate-map) and applies various
+// verification and conversion functions. The output will often be a stream of
+// biblioref schema docs.
//
// Support various "modes", e.g. exact, verify, ref, bref, wiki. Each mode may
// work on one or two files, and may need extra args.
@@ -9,38 +9,38 @@
// | will emit biblioref docs relating *one* element from releases with *all*
// | elements from ref; this is for "doi", "pmid" and other id matches, where no
// | further checks are necessary. The match reason, e.g. "doi" needs to be
-// | supplied.
+// | supplied
// |
// | $ skate-reduce -m exact -r doi -F a.tsv -L b.tsv
// |
// |
// * fuzzy | takes two (key, doc) TSV files (one for release, one for refs),
-// | runs verification within a group and will emit biblioref.
+// | runs verification within a group and will emit biblioref
// |
// | $ skate-reduce -m fuzzy -F a.tsv -L b.tsv
// |
// |
-// * wiki | zippy mode for releases and wikipedia inputs.
+// * wiki | zippy mode for releases and wikipedia inputs
// |
// | $ skate-reduce -m wiki -L a.ndj -W b.ndj
// |
// |
-// * oledt | zippy mode for releases and OL inputs, dumps table for debugging.
+// * oledt | zippy mode for releases and OL inputs, dumps table for debugging
// |
// | $ skate-reduce -m oled -F a.ndj -O b.ndj
// |
// |
-// * oled | zippy mode for releases and OL inputs, emit biblioref.
+// * oled | zippy mode for releases and OL inputs, emit biblioref
// |
// | $ skate-reduce -m oled -F a.ndj -O b.ndj
// |
// |
-// * rere | zippy mode for OL release and ref (as release) inputs, emit biblioref.
+// * rere | zippy mode for OL release and ref (as release) inputs, emit biblioref
// |
// | $ skate-reduce -m rere -O a.ndj -F b.ndj
// |
// |
-// * unmatched | join matched and unmatched reference data; do deduplicate on the fly
+// * unmatched | join matched and unmatched reference data; deduplicate on the fly
// |
// | $ skate-reduce -m unmatched -B a.ndj -F b.ndj
// |
@@ -81,8 +81,8 @@ var (
cdxAdhoc = flag.String("C", "", "path to cdx adhoc schema")
// Extra args.
- reason = flag.String("r", "", "reason for match: doi, pmid, pmcid, arxiv, unknown")
- reasonMap = map[string]skate.MatchResult{
+ matchResult = flag.String("r", "", "reason for match: doi, pmid, pmcid, arxiv, unknown")
+ matchResultMap = map[string]skate.MatchResult{
"doi": skate.MatchResult{Status: skate.StatusExact, Reason: skate.ReasonDOI},
"pmid": skate.MatchResult{Status: skate.StatusExact, Reason: skate.ReasonPMID},
"pmcid": skate.MatchResult{Status: skate.StatusExact, Reason: skate.ReasonPMCID},
@@ -118,76 +118,72 @@ func main() {
if *quite {
log.SetOutput(ioutil.Discard)
}
- bw := bufio.NewWriter(os.Stdout)
+ var (
+ bw = bufio.NewWriter(os.Stdout)
+ l, f, w, o, b, c *os.File
+ r skate.MatchResult
+ ok bool
+ err error
+ )
defer bw.Flush()
switch *mode {
case "exact":
- l, f, err := xio.OpenTwo(*releases, *refs)
- if err != nil {
+ if l, f, err = xio.OpenTwo(*releases, *refs); err != nil {
log.Fatal(err)
}
- r, ok := reasonMap[*reason]
- if !ok {
- log.Fatalf("unknown reason: %v", *reason)
+ if r, ok = matchResultMap[*matchResult]; !ok {
+ log.Fatalf("unknown reason: %v", *matchResult)
}
if err := skate.ZippyExact(l, f, r, bw); err != nil {
log.Fatal(err)
}
case "fuzzy":
- l, f, err := xio.OpenTwo(*releases, *refs)
- if err != nil {
+ if l, f, err = xio.OpenTwo(*releases, *refs); err != nil {
log.Fatal(err)
}
if err := skate.ZippyVerifyRefs(l, f, bw); err != nil {
log.Fatal(err)
}
case "wiki":
- l, w, err := xio.OpenTwo(*releases, *wiki)
- if err != nil {
+ if l, w, err = xio.OpenTwo(*releases, *wiki); err != nil {
log.Fatal(err)
}
- if err := skate.ZippyExactWiki(l, w, reasonMap["doi"], bw); err != nil {
+ if err := skate.ZippyExactWiki(l, w, matchResultMap["doi"], bw); err != nil {
log.Fatal(err)
}
case "oledt":
- o, f, err := xio.OpenTwo(*openLibrary, *refs)
- if err != nil {
+ if o, f, err = xio.OpenTwo(*openLibrary, *refs); err != nil {
log.Fatal(err)
}
if err := skate.ZippyVerifyRefsOpenLibraryTable(o, f, bw); err != nil {
log.Fatal(err)
}
case "oled":
- o, f, err := xio.OpenTwo(*openLibrary, *refs)
- if err != nil {
+ if o, f, err = xio.OpenTwo(*openLibrary, *refs); err != nil {
log.Fatal(err)
}
if err := skate.ZippyVerifyRefsOpenLibrary(o, f, bw); err != nil {
log.Fatal(err)
}
case "rere":
- o, f, err := xio.OpenTwo(*releases, *refs)
- if err != nil {
+ if o, f, err = xio.OpenTwo(*releases, *refs); err != nil {
log.Fatal(err)
}
- r, ok := reasonMap[*reason]
- if !ok {
- log.Fatalf("unknown reason: %v", *reason)
+ if r, ok = matchResultMap[*matchResult]; !ok {
+ log.Fatalf("unknown reason: %v", *matchResult)
}
if err := skate.ZippyExactReleases(o, f, r, bw); err != nil {
log.Fatal(err)
}
case "unmatched":
- b, f, err := xio.OpenTwo(*bref, *refs)
- if err != nil {
+ if b, f, err = xio.OpenTwo(*bref, *refs); err != nil {
log.Fatal(err)
}
if err := skate.ZippyBrefAugment(b, f, bw); err != nil {
log.Fatal(err)
}
case "wb":
- f, c, err := xio.OpenTwo(*refs, *cdxAdhoc)
- if err != nil {
+ if f, c, err = xio.OpenTwo(*refs, *cdxAdhoc); err != nil {
log.Fatal(err)
}
if err := skate.ZippyWayback(f, c, bw); err != nil {