blob: d2e24441516e5b0c3d024db563cef4c8b6119a33 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
|
#!/bin/bash
set -e # fail on error
set -u # fail if variable not set in substitution
set -o pipefail # fail if part of a '|' command fails
sudo -u postgres psql sandcrawler < dump_reingest_weekly.sql
cd ../python
sudo -u sandcrawler pipenv run \
./scripts/ingestrequest_row2json.py /srv/sandcrawler/tasks/reingest_weekly_current.rows.json \
> /srv/sandcrawler/tasks/reingest_weekly_current.json
cat /srv/sandcrawler/tasks/reingest_weekly_current.json \
| shuf \
| head -n80000 \
| jq . -c \
| kafkacat -P -b wbgrp-svc350.us.archive.org -t sandcrawler-prod.ingest-file-requests-daily -p -1
|