From 0138ae0157f70edefb9670f67e3c92e861e8dd35 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 20 Nov 2020 13:28:37 -0800 Subject: DOAJ: update importer README with example invocation --- python/README_import.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/README_import.md b/python/README_import.md index 65c08f8b..71b15eee 100644 --- a/python/README_import.md +++ b/python/README_import.md @@ -126,3 +126,10 @@ Run import in parallel: zcat /srv/fatcat/datasets/crossref-pre-1923-scholarly-works.matched.json.gz | time parallel -j12 --round-robin --pipe ./fatcat_import.py matched - --default-mime 'application/pdf' +## DOAJ + +Takes a few hours. + + export FATCAT_API_AUTH_TOKEN=... (FATCAT_AUTH_WORKER_DOAJ) + + zcat /srv/fatcat/datasets/doaj_article_data_2020-11-13_all.json.gz | pv -l | parallel -j12 --round-robin --pipe ./fatcat_import.py doaj-article --issn-map-file /srv/fatcat/datasets/ISSN-to-ISSN-L.txt - -- cgit v1.2.3