aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_import.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_import.py')
-rwxr-xr-xpython/fatcat_import.py49
1 files changed, 36 insertions, 13 deletions
diff --git a/python/fatcat_import.py b/python/fatcat_import.py
index fe5b24a6..0e176b2c 100755
--- a/python/fatcat_import.py
+++ b/python/fatcat_import.py
@@ -1,14 +1,18 @@
#!/usr/bin/env python3
-import sys
-import argparse
+"""
+"""
+
+import os, sys, argparse
+from fatcat_tools import authenticated_api
from fatcat_tools.importers import CrossrefImporter, OrcidImporter, \
IssnImporter, MatchedImporter, GrobidMetadataImporter, make_kafka_consumer
def run_crossref(args):
- fci = CrossrefImporter(args.host_url, args.issn_map_file,
- args.extid_map_file, create_containers=(not args.no_create_containers),
+ fci = CrossrefImporter(args.api, args.issn_map_file,
+ extid_map_file=args.extid_map_file,
+ create_containers=(not args.no_create_containers),
check_existing=(not args.no_release_updates))
if args.kafka_mode:
consumer = make_kafka_consumer(
@@ -19,23 +23,23 @@ def run_crossref(args):
fci.describe_run()
def run_orcid(args):
- foi = OrcidImporter(args.host_url)
+ foi = OrcidImporter(args.api)
foi.process_batch(args.json_file, size=args.batch_size)
foi.describe_run()
def run_issn(args):
- fii = IssnImporter(args.host_url)
+ fii = IssnImporter(args.api)
fii.process_csv_batch(args.csv_file, size=args.batch_size)
fii.describe_run()
def run_matched(args):
- fmi = MatchedImporter(args.host_url,
+ fmi = MatchedImporter(args.api,
skip_file_updates=args.no_file_updates)
fmi.process_batch(args.json_file, size=args.batch_size)
fmi.describe_run()
def run_grobid_metadata(args):
- fmi = GrobidMetadataImporter(args.host_url)
+ fmi = GrobidMetadataImporter(args.api)
fmi.process_source(args.tsv_file, group_size=args.group_size)
fmi.describe_run()
@@ -56,7 +60,10 @@ def main():
subparsers = parser.add_subparsers()
sub_crossref = subparsers.add_parser('crossref')
- sub_crossref.set_defaults(func=run_crossref)
+ sub_crossref.set_defaults(
+ func=run_crossref,
+ auth_var="FATCAT_AUTH_WORKER_CROSSREF",
+ )
sub_crossref.add_argument('json_file',
help="crossref JSON file to import from",
default=sys.stdin, type=argparse.FileType('r'))
@@ -80,7 +87,10 @@ def main():
help="don't lookup existing DOIs, just insert (only for bootstrap)")
sub_orcid = subparsers.add_parser('orcid')
- sub_orcid.set_defaults(func=run_orcid)
+ sub_orcid.set_defaults(
+ func=run_orcid,
+ auth_var="FATCAT_AUTH_WORKER_ORCID"
+ )
sub_orcid.add_argument('json_file',
help="orcid JSON file to import from (or stdin)",
default=sys.stdin, type=argparse.FileType('r'))
@@ -89,7 +99,10 @@ def main():
default=50, type=int)
sub_issn = subparsers.add_parser('issn')
- sub_issn.set_defaults(func=run_issn)
+ sub_issn.set_defaults(
+ func=run_issn,
+ auth_var="FATCAT_AUTH_WORKER_ISSN",
+ )
sub_issn.add_argument('csv_file',
help="Journal ISSN CSV metadata file to import from (or stdin)",
default=sys.stdin, type=argparse.FileType('r'))
@@ -98,7 +111,10 @@ def main():
default=50, type=int)
sub_matched = subparsers.add_parser('matched')
- sub_matched.set_defaults(func=run_matched)
+ sub_matched.set_defaults(
+ func=run_matched,
+ auth_var="FATCAT_AUTH_WORKER_MATCHED",
+ )
sub_matched.add_argument('json_file',
help="JSON file to import from (or stdin)",
default=sys.stdin, type=argparse.FileType('r'))
@@ -110,7 +126,10 @@ def main():
default=50, type=int)
sub_grobid_metadata = subparsers.add_parser('grobid-metadata')
- sub_grobid_metadata.set_defaults(func=run_grobid_metadata)
+ sub_grobid_metadata.set_defaults(
+ func=run_grobid_metadata,
+ auth_var="FATCAT_AUTH_WORKER_GROBID_METADATA",
+ )
sub_grobid_metadata.add_argument('tsv_file',
help="TSV file to import from (or stdin)",
default=sys.stdin, type=argparse.FileType('r'))
@@ -122,6 +141,10 @@ def main():
if not args.__dict__.get("func"):
print("tell me what to do!")
sys.exit(-1)
+
+ args.api = authenticated_api(
+ args.host_url,
+ token=os.environ.get(args.auth_var))
args.func(args)
if __name__ == '__main__':