summaryrefslogtreecommitdiffstats
path: root/python/fatcat_import.py
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-02-14 14:32:57 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-02-19 01:07:46 +0100
commit519b90d7f539b667e919c220a53626e7a4ac48bf (patch)
treeb7c3beed283d7dca732a8f2ab5b1dfe283bb69f3 /python/fatcat_import.py
parent4cbc94cd708c1db80a232150ab2cf56dddf83e62 (diff)
downloadfatcat-519b90d7f539b667e919c220a53626e7a4ac48bf.tar.gz
fatcat-519b90d7f539b667e919c220a53626e7a4ac48bf.zip
pubmed ftp harvest and KafkaBs4XmlPusher
* add PubmedFTPWorker * utils are currently stored alongside pubmed (e.g. ftpretr, xmlstream) but may live elsewhere, as they are more generic * add KafkaBs4XmlPusher
Diffstat (limited to 'python/fatcat_import.py')
-rwxr-xr-xpython/fatcat_import.py32
1 files changed, 16 insertions, 16 deletions
diff --git a/python/fatcat_import.py b/python/fatcat_import.py
index ad4de0e2..eaab9cfe 100755
--- a/python/fatcat_import.py
+++ b/python/fatcat_import.py
@@ -39,14 +39,13 @@ def run_arxiv(args):
ari = ArxivRawImporter(args.api,
edit_batch_size=args.batch_size)
if args.kafka_mode:
- raise NotImplementedError
- #KafkaBs4XmlPusher(
- # ari,
- # args.kafka_hosts,
- # args.kafka_env,
- # "api-arxiv",
- # "fatcat-{}-import-arxiv".format(args.kafka_env),
- #).run()
+ KafkaBs4XmlPusher(
+ ari,
+ args.kafka_hosts,
+ args.kafka_env,
+ "oaipmh-arxiv",
+ "fatcat-{}-import-arxiv".format(args.kafka_env),
+ ).run()
else:
Bs4XmlFilePusher(ari, args.xml_file, "record").run()
@@ -57,14 +56,13 @@ def run_pubmed(args):
do_updates=args.do_updates,
lookup_refs=(not args.no_lookup_refs))
if args.kafka_mode:
- raise NotImplementedError
- #KafkaBs4XmlPusher(
- # pi,
- # args.kafka_hosts,
- # args.kafka_env,
- # "api-pubmed",
- # "fatcat-{}import-arxiv".format(args.kafka_env),
- #).run()
+ KafkaBs4XmlPusher(
+ pi,
+ args.kafka_hosts,
+ args.kafka_env,
+ "oaipmh-pubmed",
+ "fatcat-{}-import-pubmed".format(args.kafka_env),
+ ).run()
else:
Bs4XmlLargeFilePusher(
pi,
@@ -297,6 +295,7 @@ def main():
auth_var="FATCAT_AUTH_WORKER_ARXIV",
)
sub_arxiv.add_argument('xml_file',
+ nargs='?',
help="arXivRaw XML file to import from",
default=sys.stdin, type=argparse.FileType('r'))
sub_arxiv.add_argument('--kafka-mode',
@@ -310,6 +309,7 @@ def main():
auth_var="FATCAT_AUTH_WORKER_PUBMED",
)
sub_pubmed.add_argument('xml_file',
+ nargs='?',
help="Pubmed XML file to import from",
default=sys.stdin, type=argparse.FileType('r'))
sub_pubmed.add_argument('issn_map_file',