aboutsummaryrefslogtreecommitdiffstats
path: root/kafka
diff options
context:
space:
mode:
Diffstat (limited to 'kafka')
-rw-r--r--kafka/topics.md20
1 files changed, 20 insertions, 0 deletions
diff --git a/kafka/topics.md b/kafka/topics.md
index fa1bd6d..06faf8e 100644
--- a/kafka/topics.md
+++ b/kafka/topics.md
@@ -122,6 +122,22 @@ retention (on both a size and time basis).
fatcat-ENV.file-updates
=> key: fcid
=> 4x partitions
+ fatcat-ENV.work-ident-updates
+ => work identifiers when updated and needs re-indexing (eg, in scholar)
+ => 6x partitions
+ => key: doc ident ("work_{ident}")
+ => key compaction possible; long retention
+
+ scholar-ENV.sim-updates
+ => 6x partitions
+ => key: "sim_item_{}"
+ => key compaction possible; long retention
+ scholar-ENV.update-docs
+ => 12x partitions
+ => key: scholar doc identifer
+ => gzip compression
+ => key compaction possible
+ => short time-based retention (2 months?)
### Deprecated/Unused Topics
@@ -169,6 +185,7 @@ exists`; this seems safe, and the settings won't be over-ridden.
./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 8 --topic fatcat-qa.work-updates
./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 4 --topic fatcat-qa.file-updates
./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 4 --topic fatcat-qa.container-updates
+ ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 6 --topic fatcat-qa.work-ident-updates
./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 8 --topic fatcat-qa.api-crossref
./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 8 --topic fatcat-qa.api-datacite --config cleanup.policy=compact
@@ -187,6 +204,9 @@ exists`; this seems safe, and the settings won't be over-ridden.
./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 12 --topic sandcrawler-qa.pdf-thumbnail-180px-jpg --config cleanup.policy=compact
./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 24 --topic sandcrawler-qa.unextracted
+ ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 6 --topic scholar-qa.sim-updates
+ ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 12 --topic scholar-qa.update-docs --config compression.type=gzip --config cleanup.policy=compact --config retention.ms=7889400000
+
./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 6 --topic sandcrawler-qa.xml-doc --config compression.type=gzip --config cleanup.policy=compact
./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 6 --topic sandcrawler-qa.html-teixml --config compression.type=gzip --config cleanup.policy=compact