aboutsummaryrefslogtreecommitdiffstats
path: root/data
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-01-24 13:06:09 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-01-24 13:06:09 -0800
commitc46d078d8e2a8ff6a6ced0530ab5a6293214f2d5 (patch)
treec13132761bb5df456f00d4d039fc80200a4f0e6e /data
downloadchocula-c46d078d8e2a8ff6a6ced0530ab5a6293214f2d5.tar.gz
chocula-c46d078d8e2a8ff6a6ced0530ab5a6293214f2d5.zip
first-pass journal metadata munger
Diffstat (limited to 'data')
-rw-r--r--data/.gitignore3
-rwxr-xr-xdata/fetch.sh38
2 files changed, 41 insertions, 0 deletions
diff --git a/data/.gitignore b/data/.gitignore
new file mode 100644
index 0000000..c3f104a
--- /dev/null
+++ b/data/.gitignore
@@ -0,0 +1,3 @@
+*
+!.gitignore
+!fetch.sh
diff --git a/data/fetch.sh b/data/fetch.sh
new file mode 100755
index 0000000..b087d86
--- /dev/null
+++ b/data/fetch.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+set -eu
+
+#wget -c https://archive.org/download/road-issn-2018/2018-01-24/export-issn.zip -O road-2018-01-24-export-issn.zip
+#unzip -n road-2018-01-24-export-issn.zip
+wget -c https://archive.org/download/road-issn-2018/road-2018-01-24.tsv
+
+wget -c https://archive.org/download/doaj_bulk_metadata_2019/doaj_20190124.csv
+
+wget -c https://archive.org/download/issn_issnl_mappings/20181203.ISSN-to-ISSN-L.txt
+
+wget -c https://archive.org/download/crossref_doi_titles/doi_titles_file_2019-01-24.csv
+
+#wget -c https://archive.org/download/ncbi-entrez-2019/J_Entrez.txt -O ncbi-entrez-2019.txt
+
+wget -c https://archive.org/download/moreo.info-2018-12-20/romeo-journals.csv
+wget -c https://archive.org/download/moreo.info-2018-12-20/romeo-policies.csv
+wget -c https://archive.org/download/moreo.info-2018-12-20/entrez-journals.csv
+
+wget -c https://archive.org/download/doaj_bulk_metadata_2019/doaj_20190124.csv
+
+wget -c https://archive.org/download/keepers_reports_201901/jstor_all-archive-titles.txt
+wget -c https://archive.org/download/keepers_reports_201901/JSTOR_Global_AllCurrentJournalTitles_2019-01-07.txt
+wget -c https://archive.org/download/keepers_reports_201901/JSTOR_Global_EarlyJournalContent_2017-06-08.txt
+wget -c https://archive.org/download/keepers_reports_201901/kbart_CLOCKSS.txt
+wget -c https://archive.org/download/keepers_reports_201901/kbart_LOCKSS.txt
+wget -c https://archive.org/download/keepers_reports_201901/Portico_Holding_KBart.txt
+
+wget -c https://archive.org/download/ia_journal_metadata_explore_2018-04-05/journal_homepage_results.partial.tsv
+
+#wget -c https://archive.org/download/SerialsOnMicrofilmCollection/MASTER%20TITLE_METADATA_LIST_20171019.xlsx
+wget -c https://archive.org/download/SerialsOnMicrofilmCollection/MASTER%20TITLE_METADATA_LIST_20171019.converted.csv
+
+wget -c https://archive.org/download/norwegian_register_journals/2018-03-02%20Norwegian%20Register%20for%20Scientific%20Journals%20and%20Series.csv
+
+#wget -c https://archive.org/download/szczepanski-oa-journal-list-2018/Jan-Szczepanski-Open-Access-Journals-2018_0.docx
+#wget -c https://archive.org/download/szczepanski-oa-journal-list-2018/Jan-Szczepanski-Open-Access-Journals-2018_0.converted.csv