aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/harvest/oaipmh.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_tools/harvest/oaipmh.py')
-rw-r--r--python/fatcat_tools/harvest/oaipmh.py13
1 files changed, 8 insertions, 5 deletions
diff --git a/python/fatcat_tools/harvest/oaipmh.py b/python/fatcat_tools/harvest/oaipmh.py
index 3e3bea03..f908ba83 100644
--- a/python/fatcat_tools/harvest/oaipmh.py
+++ b/python/fatcat_tools/harvest/oaipmh.py
@@ -39,10 +39,7 @@ class HarvestOaiPmhWorker:
self.state_topic = state_topic
self.kafka_config = {
'bootstrap.servers': kafka_hosts,
- 'delivery.report.only.error': True,
'message.max.bytes': 20000000, # ~20 MBytes; broker is ~50 MBytes
- 'default.topic.config':
- {'request.required.acks': 'all'},
}
self.loop_sleep = 60*60 # how long to wait, in seconds, between date checks
@@ -62,7 +59,14 @@ class HarvestOaiPmhWorker:
# TODO: should it be sys.exit(-1)?
raise KafkaException(err)
- producer = Producer(self.kafka_config)
+ producer_conf = self.kafka_config.copy()
+ producer_conf.update({
+ 'delivery.report.only.error': True,
+ 'default.topic.config': {
+ 'request.required.acks': -1, # all brokers must confirm
+ },
+ })
+ producer = Producer(producer_conf)
api = sickle.Sickle(self.endpoint_url)
date_str = date.isoformat()
@@ -88,7 +92,6 @@ class HarvestOaiPmhWorker:
item.raw.encode('utf-8'),
key=item.header.identifier.encode('utf-8'),
on_delivery=fail_fast)
- producer.poll(0)
producer.flush()
def run(self, continuous=False):