From 92189ad99ae7f799377a0fcbb928e09ff1f82a79 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 24 Jan 2019 13:06:09 -0800 Subject: first-pass journal metadata munger --- extra/journal_metadata/data/.gitignore | 3 +++ extra/journal_metadata/data/fetch.sh | 38 ++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 extra/journal_metadata/data/.gitignore create mode 100755 extra/journal_metadata/data/fetch.sh (limited to 'extra/journal_metadata/data') diff --git a/extra/journal_metadata/data/.gitignore b/extra/journal_metadata/data/.gitignore new file mode 100644 index 00000000..c3f104a6 --- /dev/null +++ b/extra/journal_metadata/data/.gitignore @@ -0,0 +1,3 @@ +* +!.gitignore +!fetch.sh diff --git a/extra/journal_metadata/data/fetch.sh b/extra/journal_metadata/data/fetch.sh new file mode 100755 index 00000000..b087d864 --- /dev/null +++ b/extra/journal_metadata/data/fetch.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +set -eu + +#wget -c https://archive.org/download/road-issn-2018/2018-01-24/export-issn.zip -O road-2018-01-24-export-issn.zip +#unzip -n road-2018-01-24-export-issn.zip +wget -c https://archive.org/download/road-issn-2018/road-2018-01-24.tsv + +wget -c https://archive.org/download/doaj_bulk_metadata_2019/doaj_20190124.csv + +wget -c https://archive.org/download/issn_issnl_mappings/20181203.ISSN-to-ISSN-L.txt + +wget -c https://archive.org/download/crossref_doi_titles/doi_titles_file_2019-01-24.csv + +#wget -c https://archive.org/download/ncbi-entrez-2019/J_Entrez.txt -O ncbi-entrez-2019.txt + +wget -c https://archive.org/download/moreo.info-2018-12-20/romeo-journals.csv +wget -c https://archive.org/download/moreo.info-2018-12-20/romeo-policies.csv +wget -c https://archive.org/download/moreo.info-2018-12-20/entrez-journals.csv + +wget -c https://archive.org/download/doaj_bulk_metadata_2019/doaj_20190124.csv + +wget -c https://archive.org/download/keepers_reports_201901/jstor_all-archive-titles.txt +wget -c https://archive.org/download/keepers_reports_201901/JSTOR_Global_AllCurrentJournalTitles_2019-01-07.txt +wget -c https://archive.org/download/keepers_reports_201901/JSTOR_Global_EarlyJournalContent_2017-06-08.txt +wget -c https://archive.org/download/keepers_reports_201901/kbart_CLOCKSS.txt +wget -c https://archive.org/download/keepers_reports_201901/kbart_LOCKSS.txt +wget -c https://archive.org/download/keepers_reports_201901/Portico_Holding_KBart.txt + +wget -c https://archive.org/download/ia_journal_metadata_explore_2018-04-05/journal_homepage_results.partial.tsv + +#wget -c https://archive.org/download/SerialsOnMicrofilmCollection/MASTER%20TITLE_METADATA_LIST_20171019.xlsx +wget -c https://archive.org/download/SerialsOnMicrofilmCollection/MASTER%20TITLE_METADATA_LIST_20171019.converted.csv + +wget -c https://archive.org/download/norwegian_register_journals/2018-03-02%20Norwegian%20Register%20for%20Scientific%20Journals%20and%20Series.csv + +#wget -c https://archive.org/download/szczepanski-oa-journal-list-2018/Jan-Szczepanski-Open-Access-Journals-2018_0.docx +#wget -c https://archive.org/download/szczepanski-oa-journal-list-2018/Jan-Szczepanski-Open-Access-Journals-2018_0.converted.csv -- cgit v1.2.3