export CITEFILE=enwiki-YYYYMMDD-pages-articles.citations.json.gz # total number of articles processed zcat $CITEFILE | wc -l # articles with one or more refs zcat $CITEFILE | rg '"CitationClass"' | wc -l # total number of refs zcat $CITEFILE | jq '.refs[].CitationClass' -r | wc -l # refs by type zcat $CITEFILE | jq '.refs[].CitationClass' -r | sort | uniq -c | sort -nr # identifiers present zcat $CITEFILE | jq '.refs[] | select(.ID_list != null) | .ID_list | keys[]' -r | sort | uniq -c | sort -nr # refs with URL, by type zcat $CITEFILE | jq '.refs[] | select(.URL != null) | .CitationClass' -r | sort | uniq -c | sort -nr # refs with URL, by type zcat $CITEFILE | jq '.refs[] | select(.ArchiveURL != null) | .CitationClass' -r | sort | uniq -c | sort -nr