From 519c7e77cf3a54b9620adef07fedac9b37a5f9f2 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 30 Sep 2021 15:31:03 -0700 Subject: default ingest request topic now '-daily'; configurable for ingest_tool.py --- python/example.env | 2 +- python/fatcat_ingest.py | 7 ++++++- python/fatcat_web/web_config.py | 2 +- python/fatcat_worker.py | 2 +- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/python/example.env b/python/example.env index 3a83d5b3..79cbe3ad 100644 --- a/python/example.env +++ b/python/example.env @@ -9,7 +9,7 @@ ELASTICSEARCH_CONTAINER_INDEX="fatcat_container" # for local dev use: #KAFKA_PIXY_ENDPOINT="http://localhost:19092" KAFKA_PIXY_ENDPOINT="" -KAFKA_SAVEPAPERNOW_TOPIC="sandcrawler-dev.ingest-file-requests" +KAFKA_SAVEPAPERNOW_TOPIC="sandcrawler-dev.ingest-file-requests-priority" GITLAB_CLIENT_ID="" GITLAB_CLIENT_SECRET="" IA_XAUTH_CLIENT_ID="" diff --git a/python/fatcat_ingest.py b/python/fatcat_ingest.py index 1a53a7dc..b62cb8a4 100755 --- a/python/fatcat_ingest.py +++ b/python/fatcat_ingest.py @@ -37,7 +37,10 @@ def _run_search_dump(args, search): print("=== THIS IS A DRY RUN ===") kafka_producer = None - ingest_file_request_topic = "sandcrawler-{}.ingest-file-requests".format(args.env) + if args.kafka_request_topic: + ingest_file_request_topic = args.kafka_request_topic + else: + ingest_file_request_topic = "sandcrawler-{}.ingest-file-requests-daily".format(args.env) if args.enqueue_kafka: print("Will send ingest requests to kafka topic: {}".format(ingest_file_request_topic), file=sys.stderr) kafka_producer = simple_kafka_producer(args.kafka_hosts) @@ -185,6 +188,8 @@ def main(): parser.add_argument('--kafka-hosts', default="localhost:9092", help="list of Kafka brokers (host/port) to use") + parser.add_argument('--kafka-request-topic', + help="exact Kafka ingest request topic to use") parser.add_argument('--elasticsearch-endpoint', default="https://search.fatcat.wiki", help="elasticsearch API. internal endpoint preferred, but public is default") diff --git a/python/fatcat_web/web_config.py b/python/fatcat_web/web_config.py index 98b89aea..f48a7b60 100644 --- a/python/fatcat_web/web_config.py +++ b/python/fatcat_web/web_config.py @@ -34,7 +34,7 @@ class Config(object): # for save-paper-now. set to None if not configured, so we don't display forms/links KAFKA_PIXY_ENDPOINT = os.environ.get("KAFKA_PIXY_ENDPOINT", default=None) or None - KAFKA_SAVEPAPERNOW_TOPIC = os.environ.get("KAFKA_SAVEPAPERNOW_TOPIC", default="sandcrawler-dev.ingest-file-requests") + KAFKA_SAVEPAPERNOW_TOPIC = os.environ.get("KAFKA_SAVEPAPERNOW_TOPIC", default="sandcrawler-dev.ingest-file-requests-priority") # for flask things, like session cookies FLASK_SECRET_KEY = os.environ.get("FLASK_SECRET_KEY", default=None) diff --git a/python/fatcat_worker.py b/python/fatcat_worker.py index 95f5024a..c3efb5f4 100755 --- a/python/fatcat_worker.py +++ b/python/fatcat_worker.py @@ -23,7 +23,7 @@ def run_entity_updates(args): file_topic = "fatcat-{}.file-updates".format(args.env) container_topic = "fatcat-{}.container-updates".format(args.env) work_ident_topic = "fatcat-{}.work-ident-updates".format(args.env) - ingest_file_request_topic = "sandcrawler-{}.ingest-file-requests".format(args.env) + ingest_file_request_topic = "sandcrawler-{}.ingest-file-requests-daily".format(args.env) worker = EntityUpdatesWorker(args.api, args.kafka_hosts, changelog_topic, release_topic=release_topic, -- cgit v1.2.3