diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-12-26 12:03:59 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-12-26 12:03:59 -0800 |
commit | 191aca7f4e75d8c659b81cdb66b7c15f1a6bc1b1 (patch) | |
tree | 2091dd727c821c4a690f4224f9b69ad8f8f8e5ab | |
parent | 9185e69f70a1085045d6e91295be73595b55291c (diff) | |
download | chocula-191aca7f4e75d8c659b81cdb66b7c15f1a6bc1b1.tar.gz chocula-191aca7f4e75d8c659b81cdb66b7c15f1a6bc1b1.zip |
update URL crawl status snapshot
-rwxr-xr-x | chocula.py | 2 | ||||
-rwxr-xr-x | data/fetch.sh | 5 |
2 files changed, 2 insertions, 5 deletions
@@ -83,7 +83,7 @@ WIKIDATA_SPARQL_FILE = 'data/wikidata_journals_sparql.2019-12-20.tsv' OPENAPC_FILE = 'data/apc_de.2019-12-20.csv' FATCAT_CONTAINER_FILE = 'data/container_export.2019-12-13.json' -IA_CRAWL_FILE = 'data/url_status.2019-07-31.partial-ia.json' +IA_CRAWL_FILE = 'data/url_status.20191223.json' FATCAT_STATS_FILE = 'data/container_stats.20191213.json' diff --git a/data/fetch.sh b/data/fetch.sh index 93544ff..6d261fe 100755 --- a/data/fetch.sh +++ b/data/fetch.sh @@ -39,11 +39,8 @@ wget -c https://archive.org/download/ISSN-GOLD-OA-3/ISSN_Gold-OA_3.0.csv wget -c https://archive.org/download/openapc-dataset/apc_de.2019-12-20.csv wget -c https://archive.org/download/wikidata-journal-metadata/wikidata_journals_sparql.2019-12-20.tsv -#wget -c https://archive.org/download/chocula-journal-counts/url_status.2019-07-31.partial-ia.json -#wget -c https://archive.org/download/chocula-journal-counts/url_status.2019-07-31.partial-robocracy.json - wget -c https://archive.org/download/fatcat_bulk_exports_2019-12-13/container_export.json.gz zcat container_export.json.gz > container_export.2019-12-13.json wget -c https://archive.org/download/fatcat_bulk_exports_2019-12-13/container_stats.20191213.json -wget -c https://archive.org/download/chocula-journal-counts/url_status.2019-07-31.partial-ia.json +wget -c https://archive.org/download/chocula-journal-counts/url_status.20191223.json |