summaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-11-20 13:28:37 -0800
committerBryan Newbold <bnewbold@robocracy.org>2020-11-20 13:28:37 -0800
commit0138ae0157f70edefb9670f67e3c92e861e8dd35 (patch)
tree0815b49fc347386a09057883c5d9964e80d4c368 /python
parent4e0f8bd7796eaa419490c082bbf92558a39c0718 (diff)
downloadfatcat-0138ae0157f70edefb9670f67e3c92e861e8dd35.tar.gz
fatcat-0138ae0157f70edefb9670f67e3c92e861e8dd35.zip
DOAJ: update importer README with example invocation
Diffstat (limited to 'python')
-rw-r--r--python/README_import.md7
1 files changed, 7 insertions, 0 deletions
diff --git a/python/README_import.md b/python/README_import.md
index 65c08f8b..71b15eee 100644
--- a/python/README_import.md
+++ b/python/README_import.md
@@ -126,3 +126,10 @@ Run import in parallel:
zcat /srv/fatcat/datasets/crossref-pre-1923-scholarly-works.matched.json.gz | time parallel -j12 --round-robin --pipe ./fatcat_import.py matched - --default-mime 'application/pdf'
+## DOAJ
+
+Takes a few hours.
+
+ export FATCAT_API_AUTH_TOKEN=... (FATCAT_AUTH_WORKER_DOAJ)
+
+ zcat /srv/fatcat/datasets/doaj_article_data_2020-11-13_all.json.gz | pv -l | parallel -j12 --round-robin --pipe ./fatcat_import.py doaj-article --issn-map-file /srv/fatcat/datasets/ISSN-to-ISSN-L.txt -