From f41ba2dc9ab8e5d4c9497d8845ab1c16e08ccd05 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 25 Sep 2018 15:36:27 -0700 Subject: better default file names --- extra/partition_dumps/README.md | 14 +++++++------- rust/README.export.md | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/extra/partition_dumps/README.md b/extra/partition_dumps/README.md index 2e26a41b..5e42ff48 100644 --- a/extra/partition_dumps/README.md +++ b/extra/partition_dumps/README.md @@ -5,9 +5,9 @@ journal/container. Example parititoning a sample by release type: - cat release_dump_expanded_sample.json | jq .release_type -r > release_dump_expanded_sample.release_type - cat release_dump_expanded_sample.release_type | sort | uniq -c | sort -nr > release_dump_expanded_sample.release_type.counts - cat release_dump_expanded_sample.json | paste release_dump_expanded_sample.release_type - | sort > out + cat release_export_expanded_sample.json | jq .release_type -r > release_export_expanded_sample.release_type + cat release_export_expanded_sample.release_type | sort | uniq -c | sort -nr > release_export_expanded_sample.release_type.counts + cat release_export_expanded_sample.json | paste release_export_expanded_sample.release_type - | sort > out More production-y example using ISSN-L: @@ -15,11 +15,11 @@ More production-y example using ISSN-L: rm -rf ./partitioned # it's a pretty huge sort, will need 300+ GB scratch space? this might not scale. - zcat release_dump_expanded.json.gz | jq .container.issnl -r > release_dump_expanded.issnl - zcat release_dump_expanded.json.gz | paste release_dump_expanded.issnl - | sort | ./partition_script.py + zcat release_export_expanded.json.gz | jq .container.issnl -r > release_export_expanded.issnl + zcat release_export_expanded.json.gz | paste release_export_expanded.issnl - | sort | ./partition_script.py # for verification/stats - cat release_dump_expanded.issnl | sort | uniq -c | sort -nr > release_dump_expanded.issnl.counts + cat release_export_expanded.issnl | sort | uniq -c | sort -nr > release_export_expanded.issnl.counts # cleanup - rm release_dump_expanded.issnl + rm release_export_expanded.issnl diff --git a/rust/README.export.md b/rust/README.export.md index 1b083c68..65c32b31 100644 --- a/rust/README.export.md +++ b/rust/README.export.md @@ -9,5 +9,5 @@ Then dump: Or, perhaps, in production: - cat /tmp/fatcat_ident_releases.tsv | ./target/release/fatcat-export release --expand files,container -j8 | pv | gzip > all_releases.json.gz + cat /tmp/fatcat_ident_releases.tsv | ./target/release/fatcat-export release --expand files,container -j8 | pv | gzip > release_export_expanded.json.gz -- cgit v1.2.3