aboutsummaryrefslogtreecommitdiffstats
path: root/data
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-12-23 19:11:36 -0800
committerBryan Newbold <bnewbold@archive.org>2019-12-23 19:11:38 -0800
commitfaaefd2a2a998551b50b7de5c8e231d53b55882a (patch)
tree29831a6c152733b56e8c4f21f984e096b192b658 /data
parent55a00912586a2aea705687472578dc9e8486be5e (diff)
downloadchocula-faaefd2a2a998551b50b7de5c8e231d53b55882a.tar.gz
chocula-faaefd2a2a998551b50b7de5c8e231d53b55882a.zip
update chocula input data files
Including updating fetch script, README links, and chocula.py path references.
Diffstat (limited to 'data')
-rwxr-xr-xdata/fetch.sh39
1 files changed, 16 insertions, 23 deletions
diff --git a/data/fetch.sh b/data/fetch.sh
index 182953d..d6b3bab 100755
--- a/data/fetch.sh
+++ b/data/fetch.sh
@@ -6,13 +6,11 @@ set -eu
#unzip -n road-2018-01-24-export-issn.zip
wget -c https://archive.org/download/road-issn-2018/road-2018-01-24.tsv
-#wget -c https://archive.org/download/doaj_bulk_metadata_2019/doaj_20190124.csv
-wget -c https://archive.org/download/doaj_bulk_metadata_2019/journalcsv__doaj_20190731_0130_utf8.csv
+wget -c https://archive.org/download/doaj_bulk_metadata_2019/journalcsv__doaj_20191221_0135_utf8.csv
-#wget -c https://archive.org/download/issn_issnl_mappings/20190129.ISSN-to-ISSN-L.txt
-wget -c https://archive.org/download/issn_issnl_mappings/20190730.ISSN-to-ISSN-L.txt
+wget -c https://archive.org/download/issn_issnl_mappings/20191220.ISSN-to-ISSN-L.txt
-wget -c https://archive.org/download/crossref_doi_titles/doi_titles_file_2019-08-17.csv
+wget -c https://archive.org/download/crossref_doi_titles/doi_titles_file_2019-12-20.csv
#wget -c https://archive.org/download/ncbi-entrez-2019/J_Entrez.txt -O ncbi-entrez-2019.txt
@@ -20,21 +18,16 @@ wget -c https://archive.org/download/moreo.info-2018-12-20/romeo-journals.csv
wget -c https://archive.org/download/moreo.info-2018-12-20/romeo-policies.csv
wget -c https://archive.org/download/moreo.info-2018-12-20/entrez-journals.csv
-wget -c https://archive.org/download/doaj_bulk_metadata_2019/doaj_20190124.csv
+wget -c https://archive.org/download/keepers_reports_201912/JSTOR_Global_AllArchiveTitles_2019-12-21.txt
+#wget -c https://archive.org/download/keepers_reports_201901/JSTOR_Global_AllCurrentJournalTitles_2019-01-07.txt
+#wget -c https://archive.org/download/keepers_reports_201901/JSTOR_Global_EarlyJournalContent_2017-06-08.txt
+wget -c https://archive.org/download/keepers_reports_201912/kbart_CLOCKSS.txt
+wget -c https://archive.org/download/keepers_reports_201912/kbart_LOCKSS.txt
+wget -c https://archive.org/download/keepers_reports_201912/Portico_Holding_KBart.txt
-wget -c https://archive.org/download/keepers_reports_201901/jstor_all-archive-titles.txt
-wget -c https://archive.org/download/keepers_reports_201901/JSTOR_Global_AllCurrentJournalTitles_2019-01-07.txt
-wget -c https://archive.org/download/keepers_reports_201901/JSTOR_Global_EarlyJournalContent_2017-06-08.txt
-wget -c https://archive.org/download/keepers_reports_201901/kbart_CLOCKSS.txt
-wget -c https://archive.org/download/keepers_reports_201901/kbart_LOCKSS.txt
-wget -c https://archive.org/download/keepers_reports_201901/Portico_Holding_KBart.txt
-
-wget -c https://archive.org/download/ia_journal_metadata_explore_2018-04-05/journal_homepage_results.partial.tsv
-
-#wget -c https://archive.org/download/SerialsOnMicrofilmCollection/MASTER%20TITLE_METADATA_LIST_20171019.xlsx
wget -c https://archive.org/download/SerialsOnMicrofilmCollection/MASTER%20TITLE_METADATA_LIST_20171019.converted.csv
-wget -c https://archive.org/download/norwegian_register_journals/2018-03-02%20Norwegian%20Register%20for%20Scientific%20Journals%20and%20Series.csv
+wget -c https://archive.org/download/norwegian_register_journals/2019-12-21%20Norwegian%20Register%20for%20Scientific%20Journals%20and%20Series.csv
#wget -c https://archive.org/download/open_academic_graph_2019/mag_venues.zip
#unzip mag_venues.zip
@@ -43,11 +36,11 @@ wget -c https://archive.org/download/szczepanski-oa-journal-list-2018/Jan-Szczep
wget -c https://archive.org/download/ezb_snapshot_2019-07-11/ezb_metadata.json
wget -c https://archive.org/download/ISSN-GOLD-OA-3/ISSN_Gold-OA_3.0.csv
-wget -c https://archive.org/download/openapc-dataset/apc_de.2019-07-30.csv
-wget -c https://archive.org/download/wikidata-journal-metadata/wikidata_journals_sparql.2019-07-30.tsv
+wget -c https://archive.org/download/openapc-dataset/apc_de.2019-12-20.csv
+wget -c https://archive.org/download/wikidata-journal-metadata/wikidata_journals_sparql.2019-12-20.tsv
-wget -c https://archive.org/download/chocula-journal-counts/url_status.2019-07-31.partial-ia.json
-wget -c https://archive.org/download/chocula-journal-counts/url_status.2019-07-31.partial-robocracy.json
+#wget -c https://archive.org/download/chocula-journal-counts/url_status.2019-07-31.partial-ia.json
+#wget -c https://archive.org/download/chocula-journal-counts/url_status.2019-07-31.partial-robocracy.json
-wget -c https://archive.org/download/fatcat_bulk_exports_2019-07-07/container_export.2019-09-03.json.gz
-zcat container_export.2019-09-03.json.gz > container_export.2019-09-03.json
+wget -c https://archive.org/download/fatcat_bulk_exports_2019-12-13/container_export.json.gz
+zcat container_export.json.gz > container_export.2019-12-13.json