From 6a00a6f4ac3a2d1f029bae6402d1dd117a58416e Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 30 Jul 2019 19:34:52 -0700 Subject: chocula: updated fetches, new ISSN-L and DOAJ files --- chocula.py | 6 +++--- data/fetch.sh | 11 +++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/chocula.py b/chocula.py index bbec5c6..6049bb5 100755 --- a/chocula.py +++ b/chocula.py @@ -56,13 +56,13 @@ import pycountry ################### File Config -ISSNL_FILE = 'data/20190220.ISSN-to-ISSN-L.txt' +ISSNL_FILE = 'data/20190730.ISSN-to-ISSN-L.txt' ENTREZ_FILE = 'data/entrez-journals.csv' ROAD_FILE = 'data/road-2018-01-24.tsv' ROAD_DATE = '2018-01-24' -DOAJ_FILE = 'data/doaj_20190124.csv' -DOAJ_DATE = '2019-01-24' +DOAJ_FILE = 'data/journalcsv__doaj_20190731_0130_utf8.csv' +DOAJ_DATE = '2019-07-31' CROSSREF_FILE = 'data/doi_titles_file_2019-01-24.csv' SHERPA_ROMEO_JOURNAL_FILE = 'data/romeo-journals.csv' SHERPA_ROMEO_POLICY_FILE = 'data/romeo-policies.csv' diff --git a/data/fetch.sh b/data/fetch.sh index 5d315a7..f55f934 100755 --- a/data/fetch.sh +++ b/data/fetch.sh @@ -9,7 +9,7 @@ wget -c https://archive.org/download/road-issn-2018/road-2018-01-24.tsv wget -c https://archive.org/download/doaj_bulk_metadata_2019/doaj_20190124.csv #wget -c https://archive.org/download/issn_issnl_mappings/20190129.ISSN-to-ISSN-L.txt -wget -c https://archive.org/download/issn_issnl_mappings/20190220.ISSN-to-ISSN-L.txt +wget -c https://archive.org/download/issn_issnl_mappings/20190730.ISSN-to-ISSN-L.txt wget -c https://archive.org/download/crossref_doi_titles/doi_titles_file_2019-01-24.csv @@ -38,7 +38,10 @@ wget -c https://archive.org/download/norwegian_register_journals/2018-03-02%20No #wget -c https://archive.org/download/open_academic_graph_2019/mag_venues.zip #unzip mag_venues.zip -#wget -c https://archive.org/download/szczepanski-oa-journal-list-2018/Jan-Szczepanski-Open-Access-Journals-2018_0.docx -#wget -c https://archive.org/download/szczepanski-oa-journal-list-2018/Jan-Szczepanski-Open-Access-Journals-2018_0.converted.csv - wget -c https://archive.org/download/szczepanski-oa-journal-list-2018/Jan-Szczepanski-Open-Access-Journals-2018_0.fixed.json + +wget -c https://archive.org/download/ezb_snapshot_2019-07-11/ezb_metadata.json +wget -c https://archive.org/download/ISSN-GOLD-OA-3/ISSN_Gold-OA_3.0.csv +wget -c https://archive.org/download/openapc-dataset/apc_de.2019-07-30.csv +wget -c https://archive.org/download/wikidata-journal-metadata/wikidata_journals_sparql.2019-07-30.tsv + -- cgit v1.2.3