diff options
Diffstat (limited to 'python/README_import.md')
-rw-r--r-- | python/README_import.md | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/python/README_import.md b/python/README_import.md index 0264610b..6334dbc6 100644 --- a/python/README_import.md +++ b/python/README_import.md @@ -36,7 +36,7 @@ the others: From CSV file: # See "start off with" command above - time ./fatcat_import.py import-issn /srv/fatcat/datasets/journal_extra_metadata.csv + time ./fatcat_import.py issn /srv/fatcat/datasets/journal_extra_metadata.csv Usually a couple minutes at most on fast production machine. @@ -44,23 +44,23 @@ Usually a couple minutes at most on fast production machine. Usually tens of minutes on fast production machine. - time parallel --bar --pipepart -j8 -a /srv/fatcat/datasets/public_profiles_1_2_json.all.json ./fatcat_import.py import-orcid - + time parallel --bar --pipepart -j8 -a /srv/fatcat/datasets/public_profiles_1_2_json.all.json ./fatcat_import.py orcid - ## Crossref Usually 24 hours or so on fast production machine. - time xzcat /srv/fatcat/datasets/crossref-works.2018-09-05.json.xz | time parallel -j20 --round-robin --pipe ./fatcat_import.py import-crossref - /srv/fatcat/datasets/20180216.ISSN-to-ISSN-L.txt /srv/fatcat/datasets/release_ids.ia_munge_20180908.sqlite3 + time xzcat /srv/fatcat/datasets/crossref-works.2018-09-05.json.xz | time parallel -j20 --round-robin --pipe ./fatcat_import.py crossref - /srv/fatcat/datasets/20180216.ISSN-to-ISSN-L.txt /srv/fatcat/datasets/release_ids.ia_munge_20180908.sqlite3 ## Matched Unknown speed! # No file update for the first import... - zcat /srv/fatcat/datasets/ia_papers_manifest_2018-01-25.matched.json.gz | pv -l | time parallel -j12 --round-robin --pipe ./fatcat_import.py import-matched --no-file-update - + zcat /srv/fatcat/datasets/ia_papers_manifest_2018-01-25.matched.json.gz | pv -l | time parallel -j12 --round-robin --pipe ./fatcat_import.py matched --no-file-update - # ... but do on the second - zcat /srv/fatcat/datasets/2018-08-27-2352.17-matchcrossref.insertable.json.gz | pv -l | time parallel -j12 --round-robin --pipe ./fatcat_import.py import-matched - + zcat /srv/fatcat/datasets/2018-08-27-2352.17-matchcrossref.insertable.json.gz | pv -l | time parallel -j12 --round-robin --pipe ./fatcat_import.py matched - # GROBID extracted (release+file) - time zcat /srv/fatcat/datasets/2018-09-23-0405.30-dumpgrobidmetainsertable.longtail_join.filtered.tsv.gz | pv -l | time parallel -j12 --round-robin --pipe ./fatcat_import.py import-grobid-metadata - + time zcat /srv/fatcat/datasets/2018-09-23-0405.30-dumpgrobidmetainsertable.longtail_join.filtered.tsv.gz | pv -l | time parallel -j12 --round-robin --pipe ./fatcat_import.py grobid-metadata - |