aboutsummaryrefslogtreecommitdiffstats
path: root/covid19_tool.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-04-09 17:49:08 -0700
committerBryan Newbold <bnewbold@archive.org>2020-04-09 17:49:08 -0700
commit8b3b3e5892a10bf6748c7824549641d20e2447d7 (patch)
tree493938a53995cf29f5e2f435271c309bd4ce4aa6 /covid19_tool.py
parent042bd36c25206ff45e305d094028b6482a4c4074 (diff)
downloadfatcat-covid19-8b3b3e5892a10bf6748c7824549641d20e2447d7.tar.gz
fatcat-covid19-8b3b3e5892a10bf6748c7824549641d20e2447d7.zip
add dedupe and query-fatcat commands
Diffstat (limited to 'covid19_tool.py')
-rwxr-xr-xcovid19_tool.py30
1 files changed, 30 insertions, 0 deletions
diff --git a/covid19_tool.py b/covid19_tool.py
index 345aa6e..6b84f69 100755
--- a/covid19_tool.py
+++ b/covid19_tool.py
@@ -10,7 +10,9 @@ import sys
import argparse
from fatcat_covid19.parse import parse_cord19_file
+from fatcat_covid19.query import query_fatcat
from fatcat_covid19.enrich import enrich_fatcat_file
+from fatcat_covid19.dedupe import dedupe_file
from fatcat_covid19.derivatives import enrich_derivatives_file
from fatcat_covid19.transform import transform_es_file
@@ -36,6 +38,30 @@ def main():
type=argparse.FileType('w'),
default=sys.stdout)
+ sub_query_fatcat = subparsers.add_parser('query-fatcat',
+ help="query fatcat search index for releases")
+ sub_query_fatcat.set_defaults(
+ action='query-fatcat',
+ )
+ sub_query_fatcat.add_argument('--json-output',
+ help="file to write to",
+ type=argparse.FileType('w'),
+ default=sys.stdout)
+
+ sub_dedupe = subparsers.add_parser('dedupe',
+ help="emit only one JSON line per fatcat release_id")
+ sub_dedupe.set_defaults(
+ action='dedupe',
+ )
+ sub_dedupe.add_argument('--json-input',
+ help="input JSON rows file (eg, CORD-19 parsed JSON)",
+ type=argparse.FileType('r'),
+ default=sys.stdin)
+ sub_dedupe.add_argument('--json-output',
+ help="file to write to",
+ type=argparse.FileType('w'),
+ default=sys.stdout)
+
sub_enrich_fatcat = subparsers.add_parser('enrich-fatcat',
help="lookup fatcat releases from JSON metadata")
sub_enrich_fatcat.set_defaults(
@@ -98,6 +124,10 @@ def main():
if args.action == 'parse-cord19':
parse_cord19_file(args.csv_path, args.json_output)
+ elif args.action == 'query-fatcat':
+ query_fatcat(args.json_output)
+ elif args.action == 'dedupe':
+ dedupe_file(args.json_input, args.json_output)
elif args.action == 'enrich-fatcat':
enrich_fatcat_file(args.json_file, args.json_output)
elif args.action == 'enrich-derivatives':