summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/harvest/doi_registrars.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-04-08 14:58:20 -0700
committerBryan Newbold <bnewbold@robocracy.org>2019-09-20 11:21:10 -0700
commit47a90c135b06c18d38a1c92384ff474cb262d5d8 (patch)
tree4f6290d7725dbb310a4a807af226c917e34c34e4 /python/fatcat_tools/harvest/doi_registrars.py
parentdcc6c34af5b57d3d44bbf0a08002e2a33046ad99 (diff)
downloadfatcat-47a90c135b06c18d38a1c92384ff474cb262d5d8.tar.gz
fatcat-47a90c135b06c18d38a1c92384ff474cb262d5d8.zip
fixes to confluent-kafka harvesters
Diffstat (limited to 'python/fatcat_tools/harvest/doi_registrars.py')
-rw-r--r--python/fatcat_tools/harvest/doi_registrars.py21
1 files changed, 11 insertions, 10 deletions
diff --git a/python/fatcat_tools/harvest/doi_registrars.py b/python/fatcat_tools/harvest/doi_registrars.py
index 1483266c..a5d9a04d 100644
--- a/python/fatcat_tools/harvest/doi_registrars.py
+++ b/python/fatcat_tools/harvest/doi_registrars.py
@@ -57,8 +57,9 @@ class HarvestCrossrefWorker:
self.kafka_config = {
'bootstrap.servers': kafka_hosts,
'delivery.report.only.error': True,
- 'default.topic.config':
- {'request.required.acks': 'all'},
+ 'default.topic.config': {
+ 'request.required.acks': 'all',
+ },
}
self.state = HarvestState(start_date, end_date)
@@ -86,15 +87,15 @@ class HarvestCrossrefWorker:
def extract_key(self, obj):
return obj['DOI'].encode('utf-8')
- def kafka_produce_delivery_callback(err, msg):
- if err is not None:
- print("Kafka producer delivery error: {}".format(err))
- print("Bailing out...")
- # TODO: should it be sys.exit(-1)?
- raise KafkaException(err)
-
def fetch_date(self, date):
+ def fail_fast(err, msg):
+ if err is not None:
+ print("Kafka producer delivery error: {}".format(err))
+ print("Bailing out...")
+ # TODO: should it be sys.exit(-1)?
+ raise KafkaException(err)
+
producer = Producer(self.kafka_config)
date_str = date.isoformat()
@@ -125,7 +126,7 @@ class HarvestCrossrefWorker:
self.produce_topic,
json.dumps(work).encode('utf-8'),
key=self.extract_key(work),
- on_delivery=self.kafka_produce_delivery_callback)
+ on_delivery=fail_fast)
producer.poll(0)
if len(items) < self.api_batch_size:
break