aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_harvest.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_harvest.py')
-rwxr-xr-xpython/fatcat_harvest.py15
1 files changed, 11 insertions, 4 deletions
diff --git a/python/fatcat_harvest.py b/python/fatcat_harvest.py
index 58bef9ca..4c4f34a1 100755
--- a/python/fatcat_harvest.py
+++ b/python/fatcat_harvest.py
@@ -6,7 +6,7 @@ import datetime
import raven
from fatcat_tools.harvest import HarvestCrossrefWorker, HarvestDataciteWorker,\
HarvestArxivWorker, HarvestPubmedWorker, HarvestDoajArticleWorker,\
- HarvestDoajJournalWorker
+ HarvestDoajJournalWorker, PubmedFTPWorker
# Yep, a global. Gets DSN from `SENTRY_DSN` environment variable
sentry_client = raven.Client()
@@ -42,10 +42,17 @@ def run_arxiv(args):
worker.run(continuous=args.continuous)
def run_pubmed(args):
- worker = HarvestPubmedWorker(
+ # worker = HarvestPubmedWorker(
+ # kafka_hosts=args.kafka_hosts,
+ # produce_topic="fatcat-{}.oaipmh-pubmed".format(args.env),
+ # state_topic="fatcat-{}.oaipmh-pubmed-state".format(args.env),
+ # start_date=args.start_date,
+ # end_date=args.end_date)
+ # worker.run(continuous=args.continuous)
+ worker = PubmedFTPWorker(
kafka_hosts=args.kafka_hosts,
- produce_topic="fatcat-{}.oaipmh-pubmed".format(args.env),
- state_topic="fatcat-{}.oaipmh-pubmed-state".format(args.env),
+ produce_topic="fatcat-{}.ftp-pubmed".format(args.env),
+ state_topic="fatcat-{}.ftp-pubmed-state".format(args.env),
start_date=args.start_date,
end_date=args.end_date)
worker.run(continuous=args.continuous)