summaryrefslogtreecommitdiffstats
path: root/python/fatcat_harvest.py
diff options
context:
space:
mode:
authorMartin Czygan <martin@archive.org>2020-03-10 15:33:17 +0000
committerMartin Czygan <martin@archive.org>2020-03-10 15:33:17 +0000
commit336630e1d445fb9d233447f9af4bac94473a12bf (patch)
treeb2d4baa4ea6d3afac29b9b2760101c10d18ea30a /python/fatcat_harvest.py
parentf4cce5a765a9f80f9c5e9c907689c06dc9ebf102 (diff)
parentd18942d1ab4d394bdb275bcf9eb82d1cba814775 (diff)
downloadfatcat-336630e1d445fb9d233447f9af4bac94473a12bf.tar.gz
fatcat-336630e1d445fb9d233447f9af4bac94473a12bf.zip
Merge branch 'martin-kafka-bs4-import' into 'master'
pubmed and arxiv harvest preparations See merge request webgroup/fatcat!28
Diffstat (limited to 'python/fatcat_harvest.py')
-rwxr-xr-xpython/fatcat_harvest.py17
1 files changed, 12 insertions, 5 deletions
diff --git a/python/fatcat_harvest.py b/python/fatcat_harvest.py
index 58bef9ca..7ac0f16c 100755
--- a/python/fatcat_harvest.py
+++ b/python/fatcat_harvest.py
@@ -5,8 +5,8 @@ import argparse
import datetime
import raven
from fatcat_tools.harvest import HarvestCrossrefWorker, HarvestDataciteWorker,\
- HarvestArxivWorker, HarvestPubmedWorker, HarvestDoajArticleWorker,\
- HarvestDoajJournalWorker
+ HarvestArxivWorker, HarvestDoajArticleWorker, HarvestDoajJournalWorker,\
+ PubmedFTPWorker
# Yep, a global. Gets DSN from `SENTRY_DSN` environment variable
sentry_client = raven.Client()
@@ -42,10 +42,17 @@ def run_arxiv(args):
worker.run(continuous=args.continuous)
def run_pubmed(args):
- worker = HarvestPubmedWorker(
+ # worker = HarvestPubmedWorker(
+ # kafka_hosts=args.kafka_hosts,
+ # produce_topic="fatcat-{}.oaipmh-pubmed".format(args.env),
+ # state_topic="fatcat-{}.oaipmh-pubmed-state".format(args.env),
+ # start_date=args.start_date,
+ # end_date=args.end_date)
+ # worker.run(continuous=args.continuous)
+ worker = PubmedFTPWorker(
kafka_hosts=args.kafka_hosts,
- produce_topic="fatcat-{}.oaipmh-pubmed".format(args.env),
- state_topic="fatcat-{}.oaipmh-pubmed-state".format(args.env),
+ produce_topic="fatcat-{}.ftp-pubmed".format(args.env),
+ state_topic="fatcat-{}.ftp-pubmed-state".format(args.env),
start_date=args.start_date,
end_date=args.end_date)
worker.run(continuous=args.continuous)