aboutsummaryrefslogtreecommitdiffstats
path: root/extra/abbrev/parse_all_pages.sh
blob: 3d8e481b13ea5edcd1e3db6ea2f115d3c889fca5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#!/bin/bash
#
# Scrape abbreviations from https://www.library.caltech.edu/journal-title-abbreviations
#
# We may want these for reference snooping.

set -e -u -o pipefail

for cmd in curl pup jq python; do
	command -v "$cmd" >/dev/null 2>&1 || {
		echo >&2 "missing $cmd"
		exit 1
	}
done

while read line; do
	(
		echo >&2 "$line"
		sleep 1
		curl -sL "$line" |
			pup 'dl json{}' |
			jq -rc .[0].children[] |
			grep "children" |
			python parse_dls.py
	)
done < <(curl -sL "https://www.library.caltech.edu/journal-title-abbreviations" |
	pup 'a attr{href}' |
	grep "help/WOS")