aboutsummaryrefslogtreecommitdiffstats
path: root/extra/sitemap/container_url_lists.sh
blob: fcc0f4b6f8c8f629cdc7d834c4a24f149aa011ca (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#!/usr/bin/env bash

set -e              # fail on error
set -u              # fail if variable not set in substitution
set -o pipefail     # fail if part of a '|' command fails

: ${1?' You you did not supply a date argument'}
: ${2?' You you did not supply an input file (JSON gzip)'}
if [ ! -f $2 ] ; then
  echo "Input file not found: $2" && exit 1;
fi

# eg, 2020-08-19
DATE="$1"
# eg, container_export.json.gz
EXPORT_FILE_GZ="$2"

zcat $EXPORT_FILE_GZ \
    | jq .ident -r \
    | awk '{print "https://fatcat.wiki/container/" $1 }' \
    | split --lines 20000 - sitemap-containers-$DATE- -d -a 5 --additional-suffix .txt

gzip sitemap-containers-*.txt