From faaefd2a2a998551b50b7de5c8e231d53b55882a Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 23 Dec 2019 19:11:36 -0800 Subject: update chocula input data files Including updating fetch script, README links, and chocula.py path references. --- data/fetch.sh | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) (limited to 'data') diff --git a/data/fetch.sh b/data/fetch.sh index 182953d..d6b3bab 100755 --- a/data/fetch.sh +++ b/data/fetch.sh @@ -6,13 +6,11 @@ set -eu #unzip -n road-2018-01-24-export-issn.zip wget -c https://archive.org/download/road-issn-2018/road-2018-01-24.tsv -#wget -c https://archive.org/download/doaj_bulk_metadata_2019/doaj_20190124.csv -wget -c https://archive.org/download/doaj_bulk_metadata_2019/journalcsv__doaj_20190731_0130_utf8.csv +wget -c https://archive.org/download/doaj_bulk_metadata_2019/journalcsv__doaj_20191221_0135_utf8.csv -#wget -c https://archive.org/download/issn_issnl_mappings/20190129.ISSN-to-ISSN-L.txt -wget -c https://archive.org/download/issn_issnl_mappings/20190730.ISSN-to-ISSN-L.txt +wget -c https://archive.org/download/issn_issnl_mappings/20191220.ISSN-to-ISSN-L.txt -wget -c https://archive.org/download/crossref_doi_titles/doi_titles_file_2019-08-17.csv +wget -c https://archive.org/download/crossref_doi_titles/doi_titles_file_2019-12-20.csv #wget -c https://archive.org/download/ncbi-entrez-2019/J_Entrez.txt -O ncbi-entrez-2019.txt @@ -20,21 +18,16 @@ wget -c https://archive.org/download/moreo.info-2018-12-20/romeo-journals.csv wget -c https://archive.org/download/moreo.info-2018-12-20/romeo-policies.csv wget -c https://archive.org/download/moreo.info-2018-12-20/entrez-journals.csv -wget -c https://archive.org/download/doaj_bulk_metadata_2019/doaj_20190124.csv +wget -c https://archive.org/download/keepers_reports_201912/JSTOR_Global_AllArchiveTitles_2019-12-21.txt +#wget -c https://archive.org/download/keepers_reports_201901/JSTOR_Global_AllCurrentJournalTitles_2019-01-07.txt +#wget -c https://archive.org/download/keepers_reports_201901/JSTOR_Global_EarlyJournalContent_2017-06-08.txt +wget -c https://archive.org/download/keepers_reports_201912/kbart_CLOCKSS.txt +wget -c https://archive.org/download/keepers_reports_201912/kbart_LOCKSS.txt +wget -c https://archive.org/download/keepers_reports_201912/Portico_Holding_KBart.txt -wget -c https://archive.org/download/keepers_reports_201901/jstor_all-archive-titles.txt -wget -c https://archive.org/download/keepers_reports_201901/JSTOR_Global_AllCurrentJournalTitles_2019-01-07.txt -wget -c https://archive.org/download/keepers_reports_201901/JSTOR_Global_EarlyJournalContent_2017-06-08.txt -wget -c https://archive.org/download/keepers_reports_201901/kbart_CLOCKSS.txt -wget -c https://archive.org/download/keepers_reports_201901/kbart_LOCKSS.txt -wget -c https://archive.org/download/keepers_reports_201901/Portico_Holding_KBart.txt - -wget -c https://archive.org/download/ia_journal_metadata_explore_2018-04-05/journal_homepage_results.partial.tsv - -#wget -c https://archive.org/download/SerialsOnMicrofilmCollection/MASTER%20TITLE_METADATA_LIST_20171019.xlsx wget -c https://archive.org/download/SerialsOnMicrofilmCollection/MASTER%20TITLE_METADATA_LIST_20171019.converted.csv -wget -c https://archive.org/download/norwegian_register_journals/2018-03-02%20Norwegian%20Register%20for%20Scientific%20Journals%20and%20Series.csv +wget -c https://archive.org/download/norwegian_register_journals/2019-12-21%20Norwegian%20Register%20for%20Scientific%20Journals%20and%20Series.csv #wget -c https://archive.org/download/open_academic_graph_2019/mag_venues.zip #unzip mag_venues.zip @@ -43,11 +36,11 @@ wget -c https://archive.org/download/szczepanski-oa-journal-list-2018/Jan-Szczep wget -c https://archive.org/download/ezb_snapshot_2019-07-11/ezb_metadata.json wget -c https://archive.org/download/ISSN-GOLD-OA-3/ISSN_Gold-OA_3.0.csv -wget -c https://archive.org/download/openapc-dataset/apc_de.2019-07-30.csv -wget -c https://archive.org/download/wikidata-journal-metadata/wikidata_journals_sparql.2019-07-30.tsv +wget -c https://archive.org/download/openapc-dataset/apc_de.2019-12-20.csv +wget -c https://archive.org/download/wikidata-journal-metadata/wikidata_journals_sparql.2019-12-20.tsv -wget -c https://archive.org/download/chocula-journal-counts/url_status.2019-07-31.partial-ia.json -wget -c https://archive.org/download/chocula-journal-counts/url_status.2019-07-31.partial-robocracy.json +#wget -c https://archive.org/download/chocula-journal-counts/url_status.2019-07-31.partial-ia.json +#wget -c https://archive.org/download/chocula-journal-counts/url_status.2019-07-31.partial-robocracy.json -wget -c https://archive.org/download/fatcat_bulk_exports_2019-07-07/container_export.2019-09-03.json.gz -zcat container_export.2019-09-03.json.gz > container_export.2019-09-03.json +wget -c https://archive.org/download/fatcat_bulk_exports_2019-12-13/container_export.json.gz +zcat container_export.json.gz > container_export.2019-12-13.json -- cgit v1.2.3