From 88751285e1759e8e415d4e2e9784b54051cf1fbb Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 24 Dec 2019 13:39:44 -0800 Subject: add stats and URL crawl status files --- chocula.py | 5 +++-- data/fetch.sh | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/chocula.py b/chocula.py index 7f7626b..597f06b 100755 --- a/chocula.py +++ b/chocula.py @@ -75,7 +75,6 @@ CLOCKSS_FILE = 'data/kbart_CLOCKSS.txt' PORTICO_FILE = 'data/Portico_Holding_KBart.txt' JSTOR_FILE = 'data/JSTOR_Global_AllArchiveTitles_2019-12-21.txt' SIM_FILE = 'data/MASTER TITLE_METADATA_LIST_20171019.converted.csv' -IA_CRAWL_FILE = 'data/url_status.2019-07-31.partial-ia.json' SZCZEPANSKI_DATE = '2018' SZCZEPANSKI_FILE = 'data/Jan-Szczepanski-Open-Access-Journals-2018_0.fixed.json' EZB_FILE = 'data/ezb_metadata.json' @@ -83,7 +82,9 @@ GOLD_OA_FILE = 'data/ISSN_Gold-OA_3.0.csv' WIKIDATA_SPARQL_FILE = 'data/wikidata_journals_sparql.2019-12-20.tsv' OPENAPC_FILE = 'data/apc_de.2019-12-20.csv' FATCAT_CONTAINER_FILE = 'data/container_export.2019-12-13.json' -FATCAT_STATS_FILE = 'data/container_stats.json' + +IA_CRAWL_FILE = 'data/url_status.2019-07-31.partial-ia.json' +FATCAT_STATS_FILE = 'data/container_stats.20191213.json' ################### Utilities diff --git a/data/fetch.sh b/data/fetch.sh index d6b3bab..93544ff 100755 --- a/data/fetch.sh +++ b/data/fetch.sh @@ -44,3 +44,6 @@ wget -c https://archive.org/download/wikidata-journal-metadata/wikidata_journals wget -c https://archive.org/download/fatcat_bulk_exports_2019-12-13/container_export.json.gz zcat container_export.json.gz > container_export.2019-12-13.json + +wget -c https://archive.org/download/fatcat_bulk_exports_2019-12-13/container_stats.20191213.json +wget -c https://archive.org/download/chocula-journal-counts/url_status.2019-07-31.partial-ia.json -- cgit v1.2.3