From 750b5d4c53d1075ddd31c3357dd5f690eb5951e0 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Thu, 12 Mar 2020 15:12:22 +0100 Subject: topics: add pubmed ftp topic PubmedFTPWorker replaced OAI recently. This documents the new topic. --- kafka/topics.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'kafka') diff --git a/kafka/topics.md b/kafka/topics.md index 0ce8610..9cd43bd 100644 --- a/kafka/topics.md +++ b/kafka/topics.md @@ -55,8 +55,15 @@ retention (on both a size and time basis). => ~1TB capacity; 8x crossref partitions, 4x datacite => key compaction possible + fatcat-ENV.ftp-pubmed + => new citations from FTP server, from: ftp://ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/ + => raw XML, one record per message (PubmedArticle, up to 25k records/day and 650MB/day) + => key: PMID + => key compaction possible + fatcat-ENV.api-crossref-state fatcat-ENV.api-datacite-state + fatcat-ENV.ftp-pubmed-state fatcat-ENV.oaipmh-pubmed-state fatcat-ENV.oaipmh-arxiv-state fatcat-ENV.oaipmh-doaj-journals-state (DISABLED) @@ -135,11 +142,12 @@ exists`; this seems safe, and the settings won't be over-ridden. ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 8 --topic fatcat-qa.api-crossref ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 8 --topic fatcat-qa.api-datacite --config cleanup.policy=compact + ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 8 --topic fatcat-qa.ftp-pubmed --config cleanup.policy=compact ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 1 --topic fatcat-qa.api-crossref-state ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 1 --topic fatcat-qa.api-datacite-state + ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 1 --topic fatcat-qa.ftp-pubmed-state ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 4 --topic fatcat-qa.oaipmh-pubmed ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 4 --topic fatcat-qa.oaipmh-arxiv ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 1 --topic fatcat-qa.oaipmh-pubmed-state ./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 2 --partitions 1 --topic fatcat-qa.oaipmh-arxiv-state - -- cgit v1.2.3