diff options
Diffstat (limited to 'kafka')
-rw-r--r-- | kafka/howto_rebalance.md | 3 | ||||
-rw-r--r-- | kafka/monitoring_commands.md | 4 | ||||
-rw-r--r-- | kafka/topics.md | 14 |
3 files changed, 14 insertions, 7 deletions
diff --git a/kafka/howto_rebalance.md b/kafka/howto_rebalance.md index d68b205..093740a 100644 --- a/kafka/howto_rebalance.md +++ b/kafka/howto_rebalance.md @@ -27,7 +27,8 @@ On a kafka broker, go to `/srv/kafka-broker/kafka-*/bin`, generate a plan, then inspect the output: ./kafka-reassign-partitions.sh --zookeeper localhost:2181 --broker-list "280,281,284,285,263" --topics-to-move-json-file /tmp/topics_to_move.json --generate > /tmp/reassignment-plan.json - cat /tmp/reassignment-plan.json | rg '^\{' | tail -n1 > /tmp/new-plan.json + cat /tmp/reassignment-plan.json | rg '^\{' | head -n1 | jq . > /tmp/old-plan.json + cat /tmp/reassignment-plan.json | rg '^\{' | tail -n1 | jq . > /tmp/new-plan.json cat /tmp/reassignment-plan.json | rg '^\{' | jq . If that looks good, start the rebalance: diff --git a/kafka/monitoring_commands.md b/kafka/monitoring_commands.md new file mode 100644 index 0000000..c0c330f --- /dev/null +++ b/kafka/monitoring_commands.md @@ -0,0 +1,4 @@ + + kafkacat -C -b wbgrp-svc284.us.archive.org:9092 -t sandcrawler-prod.ingest-file-results -o end | jq '[.status, .base_url]' -c + + kafkacat -C -b wbgrp-svc284.us.archive.org:9092 -t sandcrawler-prod.ingest-file-results -o end | jq '[.request.ingest_request_source, .status, .request.base_url, .terminal.terminal_url]' -c diff --git a/kafka/topics.md b/kafka/topics.md index 06faf8e..a699e16 100644 --- a/kafka/topics.md +++ b/kafka/topics.md @@ -25,7 +25,8 @@ retention (on both a size and time basis). => fewer partitions with batch mode, but still a bunch (24?) => key is sha1hex of PDF. enable time compaction (6 months?) - sandcrawler-ENV.ingest-file-requests + sandcrawler-ENV.ingest-file-requests-daily + => was ingest-file-requests previously, but renamed/rebalanced => ingest requests from multiple sources; mostly continuous or pseudo-interactive => schema is JSON; see ingest proposal for fields. small objects. => fewer partitions with batch mode, but still a bunch (24) @@ -35,6 +36,10 @@ retention (on both a size and time basis). => ingest requests from bulk crawl sources; background processing => same as ingest-file-requests + sandcrawler-ENV.ingest-file-requests-priority + => ingest requests from bulk crawl sources; background processing + => same as ingest-file-requests + sandcrawler-ENV.ingest-file-results => ingest requests from multiple sources => schema is JSON; see ingest proposal for fields. small objects. @@ -113,9 +118,6 @@ retention (on both a size and time basis). => v03 is newer v0.3.0 API schema (backwards incompatible) => key: fcid => 8x partitions - fatcat-ENV.work-updates - => key: fcid - => 8x partitions fatcat-ENV.container-updates => key: fcid => 4x partitions @@ -174,15 +176,15 @@ exists`; this seems safe, and the settings won't be over-ridden. ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 24 --topic sandcrawler-qa.ungrobided-pg ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 12 --topic sandcrawler-qa.grobid-output-pg --config compression.type=gzip --config cleanup.policy=compact - ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 24 --topic sandcrawler-qa.ingest-file-requests --config retention.ms=7889400000 --config cleanup.policy=delete + ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 24 --topic sandcrawler-qa.ingest-file-requests-daily --config retention.ms=7889400000 --config cleanup.policy=delete ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 12 --topic sandcrawler-qa.ingest-file-requests-bulk --config retention.ms=7889400000 --config cleanup.policy=delete + ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 6 --topic sandcrawler-qa.ingest-file-requests-priority --config retention.ms=7889400000 --config cleanup.policy=delete ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 6 --topic sandcrawler-qa.ingest-file-results ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 6 --topic sandcrawler-qa.pdftrio-output --config cleanup.policy=compact ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 1 --topic fatcat-qa.changelog ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 8 --topic fatcat-qa.release-updates-v03 - ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 8 --topic fatcat-qa.work-updates ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 4 --topic fatcat-qa.file-updates ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 4 --topic fatcat-qa.container-updates ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 6 --topic fatcat-qa.work-ident-updates |