diff options
Diffstat (limited to 'python/README_import.md')
-rw-r--r-- | python/README_import.md | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/python/README_import.md b/python/README_import.md index 71b15eee..6853a4d7 100644 --- a/python/README_import.md +++ b/python/README_import.md @@ -133,3 +133,14 @@ Takes a few hours. export FATCAT_API_AUTH_TOKEN=... (FATCAT_AUTH_WORKER_DOAJ) zcat /srv/fatcat/datasets/doaj_article_data_2020-11-13_all.json.gz | pv -l | parallel -j12 --round-robin --pipe ./fatcat_import.py doaj-article --issn-map-file /srv/fatcat/datasets/ISSN-to-ISSN-L.txt - + +## dblp + +See `extra/dblp/README.md` for notes about first importing container metadata +and getting a TSV mapping flie to help with import. This is needed because +there is not (yet) a lookup mechanism for `dblp_prefix` as an identifier of +container entities. + + export FATCAT_AUTH_WORKER_DBLP=... + ./fatcat_import.py dblp-release --dblp-container-map-file /data/dblp/all_dblp_containers.tsv /data/dblp/dblp.xml + |