diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2018-09-28 18:01:21 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-09-28 18:01:21 -0700 |
commit | 222d7f5488f30c46333e5298fd37587994e58e58 (patch) | |
tree | 0d76219844d9b09941c55b8f51ac32be37364fe7 /python | |
parent | 58b1f6fbbf42c3d64cbce1e9f250302a2be1f6da (diff) | |
download | fatcat-222d7f5488f30c46333e5298fd37587994e58e58.tar.gz fatcat-222d7f5488f30c46333e5298fd37587994e58e58.zip |
update README_import with GROBID command
Diffstat (limited to 'python')
-rw-r--r-- | python/README_import.md | 2 |
1 files changed, 2 insertions, 0 deletions
diff --git a/python/README_import.md b/python/README_import.md index 38064a97..d3bbaddd 100644 --- a/python/README_import.md +++ b/python/README_import.md @@ -61,3 +61,5 @@ Unknown speed! # ... but do on the second zcat /srv/fatcat/datasets/2018-08-27-2352.17-matchcrossref.insertable.json.gz | pv -l | time parallel -j12 --round-robin --pipe ./fatcat_import.py import-matched - + # GROBID extracted (release+file) + time zcat /srv/fatcat/datasets/2018-09-23-0405.30-dumpgrobidmetainsertable.longtail_join.filtered.tsv.gz | pv -l | time parallel -j12 --round-robin --pipe ./fatcat_import.py import-grobid-metadata - |