aboutsummaryrefslogtreecommitdiffstats
path: root/extra/sitemap/container_url_lists.sh
blob: 1a37c220414e7773075d2572d6ee56a52d1ea982 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#!/usr/bin/env bash

set -e              # fail on error
set -u              # fail if variable not set in substitution
set -o pipefail     # fail if part of a '|' command fails

: ${1?' You you did not supply a date argument'}
: ${2?' You you did not supply an input file (JSON gzip)'}
if [ ! -f $2 ] ; then
  echo "Input file not found: $2" && exit 1;
fi

# eg, 2020-08-19
DATE="$1"
# eg, container_export.json.gz
EXPORT_FILE_GZ="$2"

# TODO: remove stubs? only if we have releases?
zcat $EXPORT_FILE_GZ \
    | jq .ident -r \
    | awk '{print "https://fatcat.wiki/container/" $1 }' \
    | split --lines 20000 - sitemap-containers-$DATE- -d -a 5 --additional-suffix .txt

gzip sitemap-containers-*.txt