From c0af75e31983852b15e430260dd67f220f31981a Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 10 Dec 2019 10:20:21 -0800 Subject: add another ingest request source to whitelist --- python/fatcat_tools/importers/ingest.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'python/fatcat_tools') diff --git a/python/fatcat_tools/importers/ingest.py b/python/fatcat_tools/importers/ingest.py index 7dad13ce..deb4ef51 100644 --- a/python/fatcat_tools/importers/ingest.py +++ b/python/fatcat_tools/importers/ingest.py @@ -28,13 +28,16 @@ class IngestFileResultImporter(EntityImporter): print("Requiring GROBID status == 200") else: print("NOT checking GROBID success") - self.ingest_request_source_whitelist = ['fatcat-changelog'] + self.ingest_request_source_whitelist = [ + 'fatcat-changelog', + 'fatcat-ingest-container', + ] if kwargs.get('skip_source_whitelist', False): self.ingest_request_source_whitelist = [] def want(self, row): """ - Logic here probably needs work: + Logic here probably needs work (TODO): - Direct ingests via DOI from fatcat-changelog should probably go through regardless of GROBID status -- cgit v1.2.3 From a7736d91665f6a98090cd448d02f1542aec6c180 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 10 Dec 2019 10:21:21 -0800 Subject: factor out some basic kafka helpers --- python/fatcat_tools/__init__.py | 1 + python/fatcat_tools/kafka.py | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 python/fatcat_tools/kafka.py (limited to 'python/fatcat_tools') diff --git a/python/fatcat_tools/__init__.py b/python/fatcat_tools/__init__.py index f2798f0b..78c5c90b 100644 --- a/python/fatcat_tools/__init__.py +++ b/python/fatcat_tools/__init__.py @@ -2,3 +2,4 @@ from .api_auth import authenticated_api, public_api from .fcid import fcid2uuid, uuid2fcid from .transforms import * +from .kafka import simple_kafka_producer, kafka_fail_fast diff --git a/python/fatcat_tools/kafka.py b/python/fatcat_tools/kafka.py new file mode 100644 index 00000000..53b62a37 --- /dev/null +++ b/python/fatcat_tools/kafka.py @@ -0,0 +1,22 @@ + +from confluent_kafka import Consumer, Producer, KafkaException + + +def kafka_fail_fast(err, msg): + if err is not None: + print("Kafka producer delivery error: {}".format(err)) + print("Bailing out...") + # TODO: should it be sys.exit(-1)? + raise KafkaException(err) + +def simple_kafka_producer(kafka_hosts): + + kafka_config = { + 'bootstrap.servers': kafka_hosts, + 'message.max.bytes': 20000000, # ~20 MBytes; broker-side max is ~50 MBytes + 'delivery.report.only.error': True, + 'default.topic.config': { + 'request.required.acks': -1, + }, + } + return Producer(kafka_config) -- cgit v1.2.3