aboutsummaryrefslogtreecommitdiffstats
path: root/covid19_tool.py
diff options
context:
space:
mode:
Diffstat (limited to 'covid19_tool.py')
-rwxr-xr-xcovid19_tool.py30
1 files changed, 30 insertions, 0 deletions
diff --git a/covid19_tool.py b/covid19_tool.py
index 345aa6e..6b84f69 100755
--- a/covid19_tool.py
+++ b/covid19_tool.py
@@ -10,7 +10,9 @@ import sys
import argparse
from fatcat_covid19.parse import parse_cord19_file
+from fatcat_covid19.query import query_fatcat
from fatcat_covid19.enrich import enrich_fatcat_file
+from fatcat_covid19.dedupe import dedupe_file
from fatcat_covid19.derivatives import enrich_derivatives_file
from fatcat_covid19.transform import transform_es_file
@@ -36,6 +38,30 @@ def main():
type=argparse.FileType('w'),
default=sys.stdout)
+ sub_query_fatcat = subparsers.add_parser('query-fatcat',
+ help="query fatcat search index for releases")
+ sub_query_fatcat.set_defaults(
+ action='query-fatcat',
+ )
+ sub_query_fatcat.add_argument('--json-output',
+ help="file to write to",
+ type=argparse.FileType('w'),
+ default=sys.stdout)
+
+ sub_dedupe = subparsers.add_parser('dedupe',
+ help="emit only one JSON line per fatcat release_id")
+ sub_dedupe.set_defaults(
+ action='dedupe',
+ )
+ sub_dedupe.add_argument('--json-input',
+ help="input JSON rows file (eg, CORD-19 parsed JSON)",
+ type=argparse.FileType('r'),
+ default=sys.stdin)
+ sub_dedupe.add_argument('--json-output',
+ help="file to write to",
+ type=argparse.FileType('w'),
+ default=sys.stdout)
+
sub_enrich_fatcat = subparsers.add_parser('enrich-fatcat',
help="lookup fatcat releases from JSON metadata")
sub_enrich_fatcat.set_defaults(
@@ -98,6 +124,10 @@ def main():
if args.action == 'parse-cord19':
parse_cord19_file(args.csv_path, args.json_output)
+ elif args.action == 'query-fatcat':
+ query_fatcat(args.json_output)
+ elif args.action == 'dedupe':
+ dedupe_file(args.json_input, args.json_output)
elif args.action == 'enrich-fatcat':
enrich_fatcat_file(args.json_file, args.json_output)
elif args.action == 'enrich-derivatives':