diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-04-09 17:49:08 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-04-09 17:49:08 -0700 |
commit | 8b3b3e5892a10bf6748c7824549641d20e2447d7 (patch) | |
tree | 493938a53995cf29f5e2f435271c309bd4ce4aa6 /covid19_tool.py | |
parent | 042bd36c25206ff45e305d094028b6482a4c4074 (diff) | |
download | fatcat-covid19-8b3b3e5892a10bf6748c7824549641d20e2447d7.tar.gz fatcat-covid19-8b3b3e5892a10bf6748c7824549641d20e2447d7.zip |
add dedupe and query-fatcat commands
Diffstat (limited to 'covid19_tool.py')
-rwxr-xr-x | covid19_tool.py | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/covid19_tool.py b/covid19_tool.py index 345aa6e..6b84f69 100755 --- a/covid19_tool.py +++ b/covid19_tool.py @@ -10,7 +10,9 @@ import sys import argparse from fatcat_covid19.parse import parse_cord19_file +from fatcat_covid19.query import query_fatcat from fatcat_covid19.enrich import enrich_fatcat_file +from fatcat_covid19.dedupe import dedupe_file from fatcat_covid19.derivatives import enrich_derivatives_file from fatcat_covid19.transform import transform_es_file @@ -36,6 +38,30 @@ def main(): type=argparse.FileType('w'), default=sys.stdout) + sub_query_fatcat = subparsers.add_parser('query-fatcat', + help="query fatcat search index for releases") + sub_query_fatcat.set_defaults( + action='query-fatcat', + ) + sub_query_fatcat.add_argument('--json-output', + help="file to write to", + type=argparse.FileType('w'), + default=sys.stdout) + + sub_dedupe = subparsers.add_parser('dedupe', + help="emit only one JSON line per fatcat release_id") + sub_dedupe.set_defaults( + action='dedupe', + ) + sub_dedupe.add_argument('--json-input', + help="input JSON rows file (eg, CORD-19 parsed JSON)", + type=argparse.FileType('r'), + default=sys.stdin) + sub_dedupe.add_argument('--json-output', + help="file to write to", + type=argparse.FileType('w'), + default=sys.stdout) + sub_enrich_fatcat = subparsers.add_parser('enrich-fatcat', help="lookup fatcat releases from JSON metadata") sub_enrich_fatcat.set_defaults( @@ -98,6 +124,10 @@ def main(): if args.action == 'parse-cord19': parse_cord19_file(args.csv_path, args.json_output) + elif args.action == 'query-fatcat': + query_fatcat(args.json_output) + elif args.action == 'dedupe': + dedupe_file(args.json_input, args.json_output) elif args.action == 'enrich-fatcat': enrich_fatcat_file(args.json_file, args.json_output) elif args.action == 'enrich-derivatives': |