From 92db2c8bb2464db8455b61b245a007cb57f2c92f Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 17 Nov 2020 19:40:54 -0800 Subject: implement remainder of DOAJ article importer --- python/fatcat_import.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'python/fatcat_import.py') diff --git a/python/fatcat_import.py b/python/fatcat_import.py index 19cf43ec..ff6c94dc 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -256,6 +256,24 @@ def run_datacite(args): else: JsonLinePusher(dci, args.json_file).run() +def run_doaj_article(args): + dai = DoajArticleImporter(args.api, + args.issn_map_file, + edit_batch_size=args.batch_size, + do_updates=args.do_updates, + ) + if args.kafka_mode: + KafkaJsonPusher( + dai, + args.kafka_hosts, + args.kafka_env, + "api-doaj", + "fatcat-{}-import-doaj".format(args.kafka_env), + consume_batch_size=args.batch_size, + ).run() + else: + JsonLinePusher(dai, args.json_file).run() + def run_file_meta(args): # do_updates defaults to true for this importer fmi = FileMetaImporter(args.api, @@ -606,6 +624,25 @@ def main(): auth_var="FATCAT_AUTH_WORKER_DATACITE", ) + sub_doaj_article = subparsers.add_parser('doaj-article', + help="import doaj.org article metadata") + sub_doaj_article.add_argument('json_file', + help="File with JSON lines from DOAJ API (or bulk dump) to import from", + default=sys.stdin, type=argparse.FileType('r')) + sub_doaj_article.add_argument('--issn-map-file', + help="ISSN to ISSN-L mapping file", + default=None, type=argparse.FileType('r')) + sub_doaj_article.add_argument('--kafka-mode', + action='store_true', + help="consume from kafka topic (not stdin)") + sub_doaj_article.add_argument('--do-updates', + action='store_true', + help="update any pre-existing release entities") + sub_doaj_article.set_defaults( + func=run_doaj_article, + auth_var="FATCAT_AUTH_WORKER_DOAJ", + ) + sub_file_meta = subparsers.add_parser('file-meta', help="simple update-only importer for file metadata") sub_file_meta.set_defaults( -- cgit v1.2.3