diff options
author | Bryan Newbold <bnewbold@archive.org> | 2022-07-07 13:19:40 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2022-07-07 13:19:40 -0700 |
commit | 8f85ab294eae50e31efa9e31bb0bca1bca76cf8b (patch) | |
tree | 6bf44043c7ff3c5087efbde48d0f4dc1903a8e47 | |
parent | bf1826f8e8d203f732cbdda008e0c5944cbdae60 (diff) | |
download | sandcrawler-8f85ab294eae50e31efa9e31bb0bca1bca76cf8b.tar.gz sandcrawler-8f85ab294eae50e31efa9e31bb0bca1bca76cf8b.zip |
ingest: targeted 2022-04 notes
-rw-r--r-- | notes/ingest/2022-04_targeted.md | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/notes/ingest/2022-04_targeted.md b/notes/ingest/2022-04_targeted.md index 89fe40a..23fd35f 100644 --- a/notes/ingest/2022-04_targeted.md +++ b/notes/ingest/2022-04_targeted.md @@ -138,5 +138,7 @@ TODO: are there any cases where we do a bulk ingest request, fail, and `terminal cd /srv/sandcrawler/src/python sudo su sandcrawler pipenv run ./scripts/ingestrequest_row2json.py /srv/sandcrawler/tasks/patch_ingest_request_$PATCHDATE.rows.json | pv -l > /srv/sandcrawler/tasks/patch_ingest_request_$PATCHDATE.ingest_request.json + => 4.84M 0:03:14 [24.9k/s] - cat /srv/sandcrawler/tasks/patch_ingest_request_$PATCHDATE.ingest_request.json | rg -v "\\\\" | jq . -c | kafkacat -P -b wbgrp-svc263.us.archive.org -t sandcrawler-prod.ingest-file-requests-bulk -p -1 + cat /srv/sandcrawler/tasks/patch_ingest_request_$PATCHDATE.ingest_request.json | rg -v "\\\\" | jq . -c | kafkacat -P -b wbgrp-svc350.us.archive.org -t sandcrawler-prod.ingest-file-requests-bulk -p -1 + => started 2022-05-11 |