aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-07-03 01:34:38 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-07-03 01:34:38 +0200
commit8299755d7d415ae4084e822aa1b537a6d6706fb9 (patch)
tree4e6a8c6e06ca48cce6b2afec357ab49252370ed1
parentf990bb747c48b2c6c672ac1c913cf15f38a9bf8a (diff)
downloadrefcat-8299755d7d415ae4084e822aa1b537a6d6706fb9.tar.gz
refcat-8299755d7d415ae4084e822aa1b537a6d6706fb9.zip
tasks: compress tmp files
-rw-r--r--python/refcat/tasks.py42
1 files changed, 21 insertions, 21 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py
index 48c4226..01f879b 100644
--- a/python/refcat/tasks.py
+++ b/python/refcat/tasks.py
@@ -457,7 +457,7 @@ class URLTabs(Refcat):
output = shellout("""
zstdcat -T0 {input} |
skate-map -m ru -skip-on-empty 3 |
- LC_ALL=C sort -T {tmpdir} -k3,3 -S25% |
+ LC_ALL=C sort -T {tmpdir} -k3,3 -S25% --compress-program=zstd |
zstd -T0 -c > {output}
""",
n=self.n,
@@ -480,7 +480,7 @@ class URLTabsCleaned(Refcat):
output = shellout("""
zstdcat -T0 {input} |
skate-cleanup -c url -allow http,https -X -B -S -f 3 |
- LC_ALL=C sort -T {tmpdir} -k3,3 -S25% |
+ LC_ALL=C sort -T {tmpdir} -k3,3 -S25% --compress-program=zstd |
zstd -T0 -c > {output}
""",
n=self.n,
@@ -504,7 +504,7 @@ class URLList(Refcat):
zstdcat -T0 {input} |
cut -f 3 |
skate-cleanup -X -c url -B -S -f 1 |
- LC_ALL=C sort -u -T {tmpdir} -k1,1 -S25% |
+ LC_ALL=C sort -u -T {tmpdir} -k1,1 -S25% --compress-program=zstd |
LC_ALL=C grep -E '^https?://' |
zstd -T0 -c > {output}
""",
@@ -535,7 +535,7 @@ class RefsDOI(Refcat):
zstdcat -T0 {input} |
skate-map -m ff -x biblio.doi -skip-on-empty 1 |
skate-cleanup -S -c doi -f 1 |
- LC_ALL=C sort -T {tmpdir} -k1,1 -S25% |
+ LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd |
zstd -T0 -c > {output}
""",
n=self.n,
@@ -559,7 +559,7 @@ class RefsPMID(Refcat):
output = shellout("""
zstdcat -T0 {input} |
skate-map -m ff -x biblio.pmid -skip-on-empty 1 |
- LC_ALL=C sort -T {tmpdir} -k1,1 -S25% |
+ LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd |
zstd -T0 -c > {output}
""",
n=self.n,
@@ -583,7 +583,7 @@ class RefsPMCID(Refcat):
output = shellout("""
zstdcat -T0 {input} |
skate-map -m ff -x biblio.pmcid -skip-on-empty 1 |
- LC_ALL=C sort -T {tmpdir} -k1,1 -S25% |
+ LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd |
zstd -T0 -c > {output}
""",
n=self.n,
@@ -606,7 +606,7 @@ class RefsArxiv(Refcat):
output = shellout("""
zstdcat -T0 {input} |
skate-map -m ff -x biblio.arxiv_id -skip-on-empty 1 |
- LC_ALL=C sort -T {tmpdir} -k1,1 -S25% |
+ LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd |
zstd -T0 -c > {output}
""",
n=self.n,
@@ -636,7 +636,7 @@ class FatcatDOI(Refcat):
zstdcat -T0 {input} |
skate-map -m ff -x ext_ids.doi -skip-on-empty 1 |
skate-cleanup -S -c doi -f 1 |
- LC_ALL=C sort -T {tmpdir} -k1,1 -S25% |
+ LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd |
zstd -T0 -c > {output}
""",
n=self.n,
@@ -659,7 +659,7 @@ class FatcatPMID(Refcat):
output = shellout("""
zstdcat -T0 {input} |
skate-map -m ff -x ext_ids.pmid -skip-on-empty 1 |
- LC_ALL=C sort -T {tmpdir} -k1,1 -S25% |
+ LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd |
zstd -T0 -c > {output}
""",
n=self.n,
@@ -682,7 +682,7 @@ class FatcatPMCID(Refcat):
output = shellout("""
zstdcat -T0 {input} |
skate-map -m ff -x ext_ids.pmcid -skip-on-empty 1 |
- LC_ALL=C sort -T {tmpdir} -k1,1 -S25% |
+ LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd |
zstd -T0 -c > {output}
""",
n=self.n,
@@ -705,7 +705,7 @@ class FatcatArxiv(Refcat):
output = shellout("""
zstdcat -T0 {input} |
skate-map -m ff -x extra.arxiv.base_id -skip-on-empty 1 |
- LC_ALL=C sort -T {tmpdir} -k1,1 -S25% |
+ LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd |
zstd -T0 -c > {output}
""",
n=self.n,
@@ -736,7 +736,7 @@ class FatcatMapped(Refcat):
output = shellout("""
zstdcat -T0 {input} |
skate-map -m {mapper} -skip-on-empty 1 |
- LC_ALL=C sort -T {tmpdir} -k1,1 -S25% |
+ LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd |
zstd -T0 -c > {output}
""",
mapper=self.mapper,
@@ -782,7 +782,7 @@ class RefsMapped(Refcat):
output = shellout("""
zstdcat -T0 {input} |
skate-map -m {mapper} -skip-on-empty 1 |
- LC_ALL=C sort -T {tmpdir} -k1,1 -S25% |
+ LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd |
zstd -T0 -c > {output}
""",
n=self.n,
@@ -943,7 +943,7 @@ class OpenLibraryEditionsByWork(Refcat):
zstdcat -T0 {input} |
cut -f 5 |
skate-map -skip-on-empty 1 -m ff -x 'works.0.key' |
- LC_ALL=C sort -T {tmpdir} -S25% -k1,1 |
+ LC_ALL=C sort -T {tmpdir} -S25% -k1,1 --compress-program=zstd |
zstd -T0 -c > {output}
""",
tmpdir=self.tmpdir,
@@ -965,7 +965,7 @@ class OpenLibraryWorksSorted(Refcat):
output = shellout("""
zstdcat -T0 {input} |
cut -f 2,5 |
- LC_ALL=C sort -T {tmpdir} -S25% -k1,1 |
+ LC_ALL=C sort -T {tmpdir} -S25% -k1,1 --compress-program=zstd |
zstd -T0 -c > {output}
""",
tmpdir=self.tmpdir,
@@ -1047,7 +1047,7 @@ class OpenLibraryEditionsMapped(Refcat):
output = shellout("""
zstdcat -T0 {input} |
skate-map -m {mapper} -skip-on-empty 1 |
- LC_ALL=C sort -T {tmpdir} -k1,1 -S25% |
+ LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd |
zstd -T0 -c > {output}
""",
n=self.n,
@@ -1102,7 +1102,7 @@ class UnmatchedMapped(Refcat):
zstdcat -T0 {input} |
skate-conv -f ref |
skate-map -m rcns -skip-on-empty 1 |
- LC_ALL=C sort -T {tmpdir} -S25% -k1,1 |
+ LC_ALL=C sort -T {tmpdir} -S25% -k1,1 --compress-program=zstd |
zstd -T0 -c > {output}
""",
tmpdir=self.tmpdir,
@@ -1185,7 +1185,7 @@ class OpenLibraryReleaseMapped(Refcat):
output = shellout("""
zstdcat -T0 {input} |
skate-map -m {mapper} -skip-on-empty 1 |
- LC_ALL=C sort -T {tmpdir} -k1,1 -S25% |
+ LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd |
zstd -T0 -c > {output}
""",
mapper=self.mapper,
@@ -1247,7 +1247,7 @@ class BrefSortedByWorkID(Refcat):
output = shellout("""
zstdcat -T0 {bref} |
skate-map -B -m ff -x source_work_ident |
- LC_ALL=C sort -T {tmpdir} -S25% -k1,1 | zstd -c -T0 > {output}
+ LC_ALL=C sort -T {tmpdir} -S25% -k1,1 --compress-program=zstd | zstd -c -T0 > {output}
""",
tmpdir=self.tmpdir,
bref=self.input().path)
@@ -1271,7 +1271,7 @@ class RefsByWorkID(Refcat):
output = shellout("""
zstdcat -T0 {input} |
skate-map -m ff -x work_ident |
- LC_ALL=C sort -T {tmpdir} -S25% -k1,1 |
+ LC_ALL=C sort -T {tmpdir} -S25% -k1,1 --compress-program=zstd |
zstd -c -T0 > {output}
""",
tmpdir=self.tmpdir,
@@ -1376,7 +1376,7 @@ class UnmatchedResolveJournalNamesMapped(Refcat):
output = shellout("""
zstdcat -T0 {input} |
skate-map -m vcns -skip-on-empty 1 |
- LC_ALL=C sort -T {tmpdir} -k1,1 -S25% |
+ LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd |
zstd -T0 -c > {output}
""",
tmpdir=self.tmpdir,