diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-02-14 14:32:57 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-02-19 01:07:46 +0100 |
commit | 519b90d7f539b667e919c220a53626e7a4ac48bf (patch) | |
tree | b7c3beed283d7dca732a8f2ab5b1dfe283bb69f3 /python/fatcat_import.py | |
parent | 4cbc94cd708c1db80a232150ab2cf56dddf83e62 (diff) | |
download | fatcat-519b90d7f539b667e919c220a53626e7a4ac48bf.tar.gz fatcat-519b90d7f539b667e919c220a53626e7a4ac48bf.zip |
pubmed ftp harvest and KafkaBs4XmlPusher
* add PubmedFTPWorker
* utils are currently stored alongside pubmed (e.g. ftpretr, xmlstream)
but may live elsewhere, as they are more generic
* add KafkaBs4XmlPusher
Diffstat (limited to 'python/fatcat_import.py')
-rwxr-xr-x | python/fatcat_import.py | 32 |
1 files changed, 16 insertions, 16 deletions
diff --git a/python/fatcat_import.py b/python/fatcat_import.py index ad4de0e2..eaab9cfe 100755 --- a/python/fatcat_import.py +++ b/python/fatcat_import.py @@ -39,14 +39,13 @@ def run_arxiv(args): ari = ArxivRawImporter(args.api, edit_batch_size=args.batch_size) if args.kafka_mode: - raise NotImplementedError - #KafkaBs4XmlPusher( - # ari, - # args.kafka_hosts, - # args.kafka_env, - # "api-arxiv", - # "fatcat-{}-import-arxiv".format(args.kafka_env), - #).run() + KafkaBs4XmlPusher( + ari, + args.kafka_hosts, + args.kafka_env, + "oaipmh-arxiv", + "fatcat-{}-import-arxiv".format(args.kafka_env), + ).run() else: Bs4XmlFilePusher(ari, args.xml_file, "record").run() @@ -57,14 +56,13 @@ def run_pubmed(args): do_updates=args.do_updates, lookup_refs=(not args.no_lookup_refs)) if args.kafka_mode: - raise NotImplementedError - #KafkaBs4XmlPusher( - # pi, - # args.kafka_hosts, - # args.kafka_env, - # "api-pubmed", - # "fatcat-{}import-arxiv".format(args.kafka_env), - #).run() + KafkaBs4XmlPusher( + pi, + args.kafka_hosts, + args.kafka_env, + "oaipmh-pubmed", + "fatcat-{}-import-pubmed".format(args.kafka_env), + ).run() else: Bs4XmlLargeFilePusher( pi, @@ -297,6 +295,7 @@ def main(): auth_var="FATCAT_AUTH_WORKER_ARXIV", ) sub_arxiv.add_argument('xml_file', + nargs='?', help="arXivRaw XML file to import from", default=sys.stdin, type=argparse.FileType('r')) sub_arxiv.add_argument('--kafka-mode', @@ -310,6 +309,7 @@ def main(): auth_var="FATCAT_AUTH_WORKER_PUBMED", ) sub_pubmed.add_argument('xml_file', + nargs='?', help="Pubmed XML file to import from", default=sys.stdin, type=argparse.FileType('r')) sub_pubmed.add_argument('issn_map_file', |