summaryrefslogtreecommitdiffstats
path: root/python/README_import.md
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-12-17 01:56:27 -0800
committerBryan Newbold <bnewbold@robocracy.org>2020-12-17 23:03:08 -0800
commit9451b3063c2d446748db74027c40c13ee69c24fb (patch)
treea62a4170469b9d8d9bfeec540a7e8c75f1a704a2 /python/README_import.md
parent58ff361eb481bee9d2ef7249f48f94729d2a830d (diff)
downloadfatcat-9451b3063c2d446748db74027c40c13ee69c24fb.tar.gz
fatcat-9451b3063c2d446748db74027c40c13ee69c24fb.zip
improve dblp release import
Diffstat (limited to 'python/README_import.md')
-rw-r--r--python/README_import.md11
1 files changed, 11 insertions, 0 deletions
diff --git a/python/README_import.md b/python/README_import.md
index 71b15eee..6853a4d7 100644
--- a/python/README_import.md
+++ b/python/README_import.md
@@ -133,3 +133,14 @@ Takes a few hours.
export FATCAT_API_AUTH_TOKEN=... (FATCAT_AUTH_WORKER_DOAJ)
zcat /srv/fatcat/datasets/doaj_article_data_2020-11-13_all.json.gz | pv -l | parallel -j12 --round-robin --pipe ./fatcat_import.py doaj-article --issn-map-file /srv/fatcat/datasets/ISSN-to-ISSN-L.txt -
+
+## dblp
+
+See `extra/dblp/README.md` for notes about first importing container metadata
+and getting a TSV mapping flie to help with import. This is needed because
+there is not (yet) a lookup mechanism for `dblp_prefix` as an identifier of
+container entities.
+
+ export FATCAT_AUTH_WORKER_DBLP=...
+ ./fatcat_import.py dblp-release --dblp-container-map-file /data/dblp/all_dblp_containers.tsv /data/dblp/dblp.xml
+