diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-02-14 14:32:57 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-02-19 01:07:46 +0100 |
commit | 519b90d7f539b667e919c220a53626e7a4ac48bf (patch) | |
tree | b7c3beed283d7dca732a8f2ab5b1dfe283bb69f3 /python/fatcat_harvest.py | |
parent | 4cbc94cd708c1db80a232150ab2cf56dddf83e62 (diff) | |
download | fatcat-519b90d7f539b667e919c220a53626e7a4ac48bf.tar.gz fatcat-519b90d7f539b667e919c220a53626e7a4ac48bf.zip |
pubmed ftp harvest and KafkaBs4XmlPusher
* add PubmedFTPWorker
* utils are currently stored alongside pubmed (e.g. ftpretr, xmlstream)
but may live elsewhere, as they are more generic
* add KafkaBs4XmlPusher
Diffstat (limited to 'python/fatcat_harvest.py')
-rwxr-xr-x | python/fatcat_harvest.py | 15 |
1 files changed, 11 insertions, 4 deletions
diff --git a/python/fatcat_harvest.py b/python/fatcat_harvest.py index 58bef9ca..4c4f34a1 100755 --- a/python/fatcat_harvest.py +++ b/python/fatcat_harvest.py @@ -6,7 +6,7 @@ import datetime import raven from fatcat_tools.harvest import HarvestCrossrefWorker, HarvestDataciteWorker,\ HarvestArxivWorker, HarvestPubmedWorker, HarvestDoajArticleWorker,\ - HarvestDoajJournalWorker + HarvestDoajJournalWorker, PubmedFTPWorker # Yep, a global. Gets DSN from `SENTRY_DSN` environment variable sentry_client = raven.Client() @@ -42,10 +42,17 @@ def run_arxiv(args): worker.run(continuous=args.continuous) def run_pubmed(args): - worker = HarvestPubmedWorker( + # worker = HarvestPubmedWorker( + # kafka_hosts=args.kafka_hosts, + # produce_topic="fatcat-{}.oaipmh-pubmed".format(args.env), + # state_topic="fatcat-{}.oaipmh-pubmed-state".format(args.env), + # start_date=args.start_date, + # end_date=args.end_date) + # worker.run(continuous=args.continuous) + worker = PubmedFTPWorker( kafka_hosts=args.kafka_hosts, - produce_topic="fatcat-{}.oaipmh-pubmed".format(args.env), - state_topic="fatcat-{}.oaipmh-pubmed-state".format(args.env), + produce_topic="fatcat-{}.ftp-pubmed".format(args.env), + state_topic="fatcat-{}.ftp-pubmed-state".format(args.env), start_date=args.start_date, end_date=args.end_date) worker.run(continuous=args.continuous) |