diff options
author | Martin Czygan <martin@archive.org> | 2020-03-10 15:33:17 +0000 |
---|---|---|
committer | Martin Czygan <martin@archive.org> | 2020-03-10 15:33:17 +0000 |
commit | 336630e1d445fb9d233447f9af4bac94473a12bf (patch) | |
tree | b2d4baa4ea6d3afac29b9b2760101c10d18ea30a /python/fatcat_harvest.py | |
parent | f4cce5a765a9f80f9c5e9c907689c06dc9ebf102 (diff) | |
parent | d18942d1ab4d394bdb275bcf9eb82d1cba814775 (diff) | |
download | fatcat-336630e1d445fb9d233447f9af4bac94473a12bf.tar.gz fatcat-336630e1d445fb9d233447f9af4bac94473a12bf.zip |
Merge branch 'martin-kafka-bs4-import' into 'master'
pubmed and arxiv harvest preparations
See merge request webgroup/fatcat!28
Diffstat (limited to 'python/fatcat_harvest.py')
-rwxr-xr-x | python/fatcat_harvest.py | 17 |
1 files changed, 12 insertions, 5 deletions
diff --git a/python/fatcat_harvest.py b/python/fatcat_harvest.py index 58bef9ca..7ac0f16c 100755 --- a/python/fatcat_harvest.py +++ b/python/fatcat_harvest.py @@ -5,8 +5,8 @@ import argparse import datetime import raven from fatcat_tools.harvest import HarvestCrossrefWorker, HarvestDataciteWorker,\ - HarvestArxivWorker, HarvestPubmedWorker, HarvestDoajArticleWorker,\ - HarvestDoajJournalWorker + HarvestArxivWorker, HarvestDoajArticleWorker, HarvestDoajJournalWorker,\ + PubmedFTPWorker # Yep, a global. Gets DSN from `SENTRY_DSN` environment variable sentry_client = raven.Client() @@ -42,10 +42,17 @@ def run_arxiv(args): worker.run(continuous=args.continuous) def run_pubmed(args): - worker = HarvestPubmedWorker( + # worker = HarvestPubmedWorker( + # kafka_hosts=args.kafka_hosts, + # produce_topic="fatcat-{}.oaipmh-pubmed".format(args.env), + # state_topic="fatcat-{}.oaipmh-pubmed-state".format(args.env), + # start_date=args.start_date, + # end_date=args.end_date) + # worker.run(continuous=args.continuous) + worker = PubmedFTPWorker( kafka_hosts=args.kafka_hosts, - produce_topic="fatcat-{}.oaipmh-pubmed".format(args.env), - state_topic="fatcat-{}.oaipmh-pubmed-state".format(args.env), + produce_topic="fatcat-{}.ftp-pubmed".format(args.env), + state_topic="fatcat-{}.ftp-pubmed-state".format(args.env), start_date=args.start_date, end_date=args.end_date) worker.run(continuous=args.continuous) |