aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-12-24 13:39:44 -0800
committerBryan Newbold <bnewbold@archive.org>2019-12-24 13:39:44 -0800
commit88751285e1759e8e415d4e2e9784b54051cf1fbb (patch)
tree9b598303e786d839e8d7ff7a787d4de5cde69b9a
parente2ed93db0c3c6bb7824ea17204b746d33573e093 (diff)
downloadchocula-88751285e1759e8e415d4e2e9784b54051cf1fbb.tar.gz
chocula-88751285e1759e8e415d4e2e9784b54051cf1fbb.zip
add stats and URL crawl status files
-rwxr-xr-xchocula.py5
-rwxr-xr-xdata/fetch.sh3
2 files changed, 6 insertions, 2 deletions
diff --git a/chocula.py b/chocula.py
index 7f7626b..597f06b 100755
--- a/chocula.py
+++ b/chocula.py
@@ -75,7 +75,6 @@ CLOCKSS_FILE = 'data/kbart_CLOCKSS.txt'
PORTICO_FILE = 'data/Portico_Holding_KBart.txt'
JSTOR_FILE = 'data/JSTOR_Global_AllArchiveTitles_2019-12-21.txt'
SIM_FILE = 'data/MASTER TITLE_METADATA_LIST_20171019.converted.csv'
-IA_CRAWL_FILE = 'data/url_status.2019-07-31.partial-ia.json'
SZCZEPANSKI_DATE = '2018'
SZCZEPANSKI_FILE = 'data/Jan-Szczepanski-Open-Access-Journals-2018_0.fixed.json'
EZB_FILE = 'data/ezb_metadata.json'
@@ -83,7 +82,9 @@ GOLD_OA_FILE = 'data/ISSN_Gold-OA_3.0.csv'
WIKIDATA_SPARQL_FILE = 'data/wikidata_journals_sparql.2019-12-20.tsv'
OPENAPC_FILE = 'data/apc_de.2019-12-20.csv'
FATCAT_CONTAINER_FILE = 'data/container_export.2019-12-13.json'
-FATCAT_STATS_FILE = 'data/container_stats.json'
+
+IA_CRAWL_FILE = 'data/url_status.2019-07-31.partial-ia.json'
+FATCAT_STATS_FILE = 'data/container_stats.20191213.json'
################### Utilities
diff --git a/data/fetch.sh b/data/fetch.sh
index d6b3bab..93544ff 100755
--- a/data/fetch.sh
+++ b/data/fetch.sh
@@ -44,3 +44,6 @@ wget -c https://archive.org/download/wikidata-journal-metadata/wikidata_journals
wget -c https://archive.org/download/fatcat_bulk_exports_2019-12-13/container_export.json.gz
zcat container_export.json.gz > container_export.2019-12-13.json
+
+wget -c https://archive.org/download/fatcat_bulk_exports_2019-12-13/container_stats.20191213.json
+wget -c https://archive.org/download/chocula-journal-counts/url_status.2019-07-31.partial-ia.json