aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-12-26 12:03:59 -0800
committerBryan Newbold <bnewbold@archive.org>2019-12-26 12:03:59 -0800
commit191aca7f4e75d8c659b81cdb66b7c15f1a6bc1b1 (patch)
tree2091dd727c821c4a690f4224f9b69ad8f8f8e5ab
parent9185e69f70a1085045d6e91295be73595b55291c (diff)
downloadchocula-191aca7f4e75d8c659b81cdb66b7c15f1a6bc1b1.tar.gz
chocula-191aca7f4e75d8c659b81cdb66b7c15f1a6bc1b1.zip
update URL crawl status snapshot
-rwxr-xr-xchocula.py2
-rwxr-xr-xdata/fetch.sh5
2 files changed, 2 insertions, 5 deletions
diff --git a/chocula.py b/chocula.py
index 597f06b..aa60e3a 100755
--- a/chocula.py
+++ b/chocula.py
@@ -83,7 +83,7 @@ WIKIDATA_SPARQL_FILE = 'data/wikidata_journals_sparql.2019-12-20.tsv'
OPENAPC_FILE = 'data/apc_de.2019-12-20.csv'
FATCAT_CONTAINER_FILE = 'data/container_export.2019-12-13.json'
-IA_CRAWL_FILE = 'data/url_status.2019-07-31.partial-ia.json'
+IA_CRAWL_FILE = 'data/url_status.20191223.json'
FATCAT_STATS_FILE = 'data/container_stats.20191213.json'
diff --git a/data/fetch.sh b/data/fetch.sh
index 93544ff..6d261fe 100755
--- a/data/fetch.sh
+++ b/data/fetch.sh
@@ -39,11 +39,8 @@ wget -c https://archive.org/download/ISSN-GOLD-OA-3/ISSN_Gold-OA_3.0.csv
wget -c https://archive.org/download/openapc-dataset/apc_de.2019-12-20.csv
wget -c https://archive.org/download/wikidata-journal-metadata/wikidata_journals_sparql.2019-12-20.tsv
-#wget -c https://archive.org/download/chocula-journal-counts/url_status.2019-07-31.partial-ia.json
-#wget -c https://archive.org/download/chocula-journal-counts/url_status.2019-07-31.partial-robocracy.json
-
wget -c https://archive.org/download/fatcat_bulk_exports_2019-12-13/container_export.json.gz
zcat container_export.json.gz > container_export.2019-12-13.json
wget -c https://archive.org/download/fatcat_bulk_exports_2019-12-13/container_stats.20191213.json
-wget -c https://archive.org/download/chocula-journal-counts/url_status.2019-07-31.partial-ia.json
+wget -c https://archive.org/download/chocula-journal-counts/url_status.20191223.json