From c7687e259cd003b3737a8bd9dd1ae51bf1f15a1e Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 21 Jun 2018 18:24:18 -0700 Subject: update import numbers --- python/README_import.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/python/README_import.md b/python/README_import.md index 60f91cf2..7301d72e 100644 --- a/python/README_import.md +++ b/python/README_import.md @@ -25,9 +25,11 @@ From CSV file: time ./client.py import-issn /srv/datasets/journal_extra_metadata.csv +Pretty quick, a few minutes. + ## ORCID -Directly from compressed tarball: +Directly from compressed tarball; takes about 2 hours in production: tar xf /srv/datasets/public_profiles_API-2.0_2017_10_json.tar.gz -O | jq -c . | grep '"person":' | time parallel -j12 --pipe --round-robin ./client.py import-orcid - @@ -66,6 +68,14 @@ for ~9k files: => 203/second +For the full batch, on production machine with 12 threads, around 3.8 million records: + + 3550.76 user + 190.16 system + 1:40:01 elapsed + + => 644/second + ## Crossref From compressed: -- cgit v1.2.3