diff options
Diffstat (limited to 'extra')
| -rw-r--r-- | extra/partition_dumps/README.md | 14 | 
1 files changed, 7 insertions, 7 deletions
| diff --git a/extra/partition_dumps/README.md b/extra/partition_dumps/README.md index 2e26a41b..5e42ff48 100644 --- a/extra/partition_dumps/README.md +++ b/extra/partition_dumps/README.md @@ -5,9 +5,9 @@ journal/container.  Example parititoning a sample by release type: -    cat release_dump_expanded_sample.json | jq .release_type -r > release_dump_expanded_sample.release_type -    cat release_dump_expanded_sample.release_type | sort | uniq -c | sort -nr > release_dump_expanded_sample.release_type.counts -    cat release_dump_expanded_sample.json | paste release_dump_expanded_sample.release_type - | sort > out +    cat release_export_expanded_sample.json | jq .release_type -r > release_export_expanded_sample.release_type +    cat release_export_expanded_sample.release_type | sort | uniq -c | sort -nr > release_export_expanded_sample.release_type.counts +    cat release_export_expanded_sample.json | paste release_export_expanded_sample.release_type - | sort > out  More production-y example using ISSN-L: @@ -15,11 +15,11 @@ More production-y example using ISSN-L:      rm -rf ./partitioned      # it's a pretty huge sort, will need 300+ GB scratch space? this might not scale. -    zcat release_dump_expanded.json.gz | jq .container.issnl -r > release_dump_expanded.issnl -    zcat release_dump_expanded.json.gz | paste release_dump_expanded.issnl - | sort  | ./partition_script.py +    zcat release_export_expanded.json.gz | jq .container.issnl -r > release_export_expanded.issnl +    zcat release_export_expanded.json.gz | paste release_export_expanded.issnl - | sort  | ./partition_script.py      # for verification/stats -    cat release_dump_expanded.issnl | sort | uniq -c | sort -nr > release_dump_expanded.issnl.counts +    cat release_export_expanded.issnl | sort | uniq -c | sort -nr > release_export_expanded.issnl.counts      # cleanup -    rm release_dump_expanded.issnl +    rm release_export_expanded.issnl | 
