diff options
Diffstat (limited to 'skate/cmd/skate-reduce')
-rw-r--r-- | skate/cmd/skate-reduce/main.go | 26 |
1 files changed, 13 insertions, 13 deletions
diff --git a/skate/cmd/skate-reduce/main.go b/skate/cmd/skate-reduce/main.go index df72ef4..d0cc0e3 100644 --- a/skate/cmd/skate-reduce/main.go +++ b/skate/cmd/skate-reduce/main.go @@ -1,24 +1,25 @@ // skate-reduce takes prepared inputs (e.g. from skate-map or skate-cluster) // and applies various verification and conversion functions. The output will -// often be the biblioref schema. +// often be a biblioref schema stream. // -// Support various modes. +// Support various modes, e.g. exact, verify, ref, bref, wiki. Each mode may +// work on one or two files, and may need extra args. // -// * exact: takes (key, doc) TSV files (one for releases, one for refs) and -// will emit biblioref docs relating one element from releases with all -// elements from ref; this is for "doi", "pmid" and other id matches, where no -// further checks are necessary. The match reason, e.g. "doi" needs to be -// supplied. +// * exact: takes two (key, doc) TSV files (one for releases, one for refs) and +// will emit biblioref docs relating *one* element from releases with *all* +// elements from ref; this is for "doi", "pmid" and other id matches, where no +// further checks are necessary. The match reason, e.g. "doi" needs to be +// supplied. // // $ skate-reduce -m exact -r doi -F a.tsv -L b.tsv // -// * verify: takes (key, doc) TSV files (one for release, one for refs), runs -// verification within a group and will emit biblioref. +// * verify: takes two (key, doc) TSV files (one for release, one for refs), +// runs verification within a group and will emit biblioref. // // $ skate-reduce -m verify -F a.tsv -L b.tsv // // * ref: takes a single file with clusters containing releases and refs and -// will emit verification results. +// will emit verification results. // // $ skate-reduce -m ref < a.ndj // @@ -48,13 +49,12 @@ import ( var ( numWorkers = flag.Int("w", runtime.NumCPU(), "number of workers") batchSize = flag.Int("b", 10000, "batch size") - // Each mode may work on one or two files, and may need extra args. - mode = flag.String("m", "ref", "mode, e.g. exact, verify, ref, bref, wiki") + mode = flag.String("m", "ref", "mode, e.g. exact, verify, ref, bref, wiki") cpuProfile = flag.String("cpuprofile", "", "write cpu profile to file") memProfile = flag.String("memprofile", "", "write heap profile to file (go tool pprof -png --alloc_objects program mem.pprof > mem.png)") - // Possible inputs, we could switch to a subcommand cli parser. + // Possible inputs -- we could switch to a subcommand cli parser? refs = flag.String("F", "", "path to refs input") releases = flag.String("L", "", "path to release input") wiki = flag.String("W", "", "path to wiki input") |