aboutsummaryrefslogtreecommitdiffstats
path: root/notes/ingest
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2022-07-07 13:19:40 -0700
committerBryan Newbold <bnewbold@archive.org>2022-07-07 13:19:40 -0700
commit8f85ab294eae50e31efa9e31bb0bca1bca76cf8b (patch)
tree6bf44043c7ff3c5087efbde48d0f4dc1903a8e47 /notes/ingest
parentbf1826f8e8d203f732cbdda008e0c5944cbdae60 (diff)
downloadsandcrawler-8f85ab294eae50e31efa9e31bb0bca1bca76cf8b.tar.gz
sandcrawler-8f85ab294eae50e31efa9e31bb0bca1bca76cf8b.zip
ingest: targeted 2022-04 notes
Diffstat (limited to 'notes/ingest')
-rw-r--r--notes/ingest/2022-04_targeted.md4
1 files changed, 3 insertions, 1 deletions
diff --git a/notes/ingest/2022-04_targeted.md b/notes/ingest/2022-04_targeted.md
index 89fe40a..23fd35f 100644
--- a/notes/ingest/2022-04_targeted.md
+++ b/notes/ingest/2022-04_targeted.md
@@ -138,5 +138,7 @@ TODO: are there any cases where we do a bulk ingest request, fail, and `terminal
cd /srv/sandcrawler/src/python
sudo su sandcrawler
pipenv run ./scripts/ingestrequest_row2json.py /srv/sandcrawler/tasks/patch_ingest_request_$PATCHDATE.rows.json | pv -l > /srv/sandcrawler/tasks/patch_ingest_request_$PATCHDATE.ingest_request.json
+ => 4.84M 0:03:14 [24.9k/s]
- cat /srv/sandcrawler/tasks/patch_ingest_request_$PATCHDATE.ingest_request.json | rg -v "\\\\" | jq . -c | kafkacat -P -b wbgrp-svc263.us.archive.org -t sandcrawler-prod.ingest-file-requests-bulk -p -1
+ cat /srv/sandcrawler/tasks/patch_ingest_request_$PATCHDATE.ingest_request.json | rg -v "\\\\" | jq . -c | kafkacat -P -b wbgrp-svc350.us.archive.org -t sandcrawler-prod.ingest-file-requests-bulk -p -1
+ => started 2022-05-11