From 4eab53da4b89d0ef4d90140f9429a3bdfcc7761e Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 18 Jan 2019 17:28:59 -0800 Subject: update import README with times --- python/README_import.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/README_import.md b/python/README_import.md index 9dda725d..2465940b 100644 --- a/python/README_import.md +++ b/python/README_import.md @@ -56,13 +56,14 @@ Usually 24 hours or so on fast production machine. ## Matched -Unknown speed! +These each take 2-4 hours: # No file update for the first import... - zcat /srv/fatcat/datasets/ia_papers_manifest_2018-01-25.matched.json.gz | pv -l | time parallel -j12 --round-robin --pipe ./fatcat_import.py matched --no-file-updates - + time zcat /srv/fatcat/datasets/ia_papers_manifest_2018-01-25.matched.json.gz | pv -l | time parallel -j12 --round-robin --pipe ./fatcat_import.py matched --no-file-updates - # ... but do on the second zcat /srv/fatcat/datasets/2018-08-27-2352.17-matchcrossref.insertable.json.gz | pv -l | time parallel -j12 --round-robin --pipe ./fatcat_import.py matched - # GROBID extracted (release+file) time zcat /srv/fatcat/datasets/2018-09-23-0405.30-dumpgrobidmetainsertable.longtail_join.filtered.tsv.gz | pv -l | time parallel -j12 --round-robin --pipe ./fatcat_import.py grobid-metadata - + -- cgit v1.2.3