diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-07-03 01:34:38 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-07-03 01:34:38 +0200 |
commit | 8299755d7d415ae4084e822aa1b537a6d6706fb9 (patch) | |
tree | 4e6a8c6e06ca48cce6b2afec357ab49252370ed1 | |
parent | f990bb747c48b2c6c672ac1c913cf15f38a9bf8a (diff) | |
download | refcat-8299755d7d415ae4084e822aa1b537a6d6706fb9.tar.gz refcat-8299755d7d415ae4084e822aa1b537a6d6706fb9.zip |
tasks: compress tmp files
-rw-r--r-- | python/refcat/tasks.py | 42 |
1 files changed, 21 insertions, 21 deletions
diff --git a/python/refcat/tasks.py b/python/refcat/tasks.py index 48c4226..01f879b 100644 --- a/python/refcat/tasks.py +++ b/python/refcat/tasks.py @@ -457,7 +457,7 @@ class URLTabs(Refcat): output = shellout(""" zstdcat -T0 {input} | skate-map -m ru -skip-on-empty 3 | - LC_ALL=C sort -T {tmpdir} -k3,3 -S25% | + LC_ALL=C sort -T {tmpdir} -k3,3 -S25% --compress-program=zstd | zstd -T0 -c > {output} """, n=self.n, @@ -480,7 +480,7 @@ class URLTabsCleaned(Refcat): output = shellout(""" zstdcat -T0 {input} | skate-cleanup -c url -allow http,https -X -B -S -f 3 | - LC_ALL=C sort -T {tmpdir} -k3,3 -S25% | + LC_ALL=C sort -T {tmpdir} -k3,3 -S25% --compress-program=zstd | zstd -T0 -c > {output} """, n=self.n, @@ -504,7 +504,7 @@ class URLList(Refcat): zstdcat -T0 {input} | cut -f 3 | skate-cleanup -X -c url -B -S -f 1 | - LC_ALL=C sort -u -T {tmpdir} -k1,1 -S25% | + LC_ALL=C sort -u -T {tmpdir} -k1,1 -S25% --compress-program=zstd | LC_ALL=C grep -E '^https?://' | zstd -T0 -c > {output} """, @@ -535,7 +535,7 @@ class RefsDOI(Refcat): zstdcat -T0 {input} | skate-map -m ff -x biblio.doi -skip-on-empty 1 | skate-cleanup -S -c doi -f 1 | - LC_ALL=C sort -T {tmpdir} -k1,1 -S25% | + LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd | zstd -T0 -c > {output} """, n=self.n, @@ -559,7 +559,7 @@ class RefsPMID(Refcat): output = shellout(""" zstdcat -T0 {input} | skate-map -m ff -x biblio.pmid -skip-on-empty 1 | - LC_ALL=C sort -T {tmpdir} -k1,1 -S25% | + LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd | zstd -T0 -c > {output} """, n=self.n, @@ -583,7 +583,7 @@ class RefsPMCID(Refcat): output = shellout(""" zstdcat -T0 {input} | skate-map -m ff -x biblio.pmcid -skip-on-empty 1 | - LC_ALL=C sort -T {tmpdir} -k1,1 -S25% | + LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd | zstd -T0 -c > {output} """, n=self.n, @@ -606,7 +606,7 @@ class RefsArxiv(Refcat): output = shellout(""" zstdcat -T0 {input} | skate-map -m ff -x biblio.arxiv_id -skip-on-empty 1 | - LC_ALL=C sort -T {tmpdir} -k1,1 -S25% | + LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd | zstd -T0 -c > {output} """, n=self.n, @@ -636,7 +636,7 @@ class FatcatDOI(Refcat): zstdcat -T0 {input} | skate-map -m ff -x ext_ids.doi -skip-on-empty 1 | skate-cleanup -S -c doi -f 1 | - LC_ALL=C sort -T {tmpdir} -k1,1 -S25% | + LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd | zstd -T0 -c > {output} """, n=self.n, @@ -659,7 +659,7 @@ class FatcatPMID(Refcat): output = shellout(""" zstdcat -T0 {input} | skate-map -m ff -x ext_ids.pmid -skip-on-empty 1 | - LC_ALL=C sort -T {tmpdir} -k1,1 -S25% | + LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd | zstd -T0 -c > {output} """, n=self.n, @@ -682,7 +682,7 @@ class FatcatPMCID(Refcat): output = shellout(""" zstdcat -T0 {input} | skate-map -m ff -x ext_ids.pmcid -skip-on-empty 1 | - LC_ALL=C sort -T {tmpdir} -k1,1 -S25% | + LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd | zstd -T0 -c > {output} """, n=self.n, @@ -705,7 +705,7 @@ class FatcatArxiv(Refcat): output = shellout(""" zstdcat -T0 {input} | skate-map -m ff -x extra.arxiv.base_id -skip-on-empty 1 | - LC_ALL=C sort -T {tmpdir} -k1,1 -S25% | + LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd | zstd -T0 -c > {output} """, n=self.n, @@ -736,7 +736,7 @@ class FatcatMapped(Refcat): output = shellout(""" zstdcat -T0 {input} | skate-map -m {mapper} -skip-on-empty 1 | - LC_ALL=C sort -T {tmpdir} -k1,1 -S25% | + LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd | zstd -T0 -c > {output} """, mapper=self.mapper, @@ -782,7 +782,7 @@ class RefsMapped(Refcat): output = shellout(""" zstdcat -T0 {input} | skate-map -m {mapper} -skip-on-empty 1 | - LC_ALL=C sort -T {tmpdir} -k1,1 -S25% | + LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd | zstd -T0 -c > {output} """, n=self.n, @@ -943,7 +943,7 @@ class OpenLibraryEditionsByWork(Refcat): zstdcat -T0 {input} | cut -f 5 | skate-map -skip-on-empty 1 -m ff -x 'works.0.key' | - LC_ALL=C sort -T {tmpdir} -S25% -k1,1 | + LC_ALL=C sort -T {tmpdir} -S25% -k1,1 --compress-program=zstd | zstd -T0 -c > {output} """, tmpdir=self.tmpdir, @@ -965,7 +965,7 @@ class OpenLibraryWorksSorted(Refcat): output = shellout(""" zstdcat -T0 {input} | cut -f 2,5 | - LC_ALL=C sort -T {tmpdir} -S25% -k1,1 | + LC_ALL=C sort -T {tmpdir} -S25% -k1,1 --compress-program=zstd | zstd -T0 -c > {output} """, tmpdir=self.tmpdir, @@ -1047,7 +1047,7 @@ class OpenLibraryEditionsMapped(Refcat): output = shellout(""" zstdcat -T0 {input} | skate-map -m {mapper} -skip-on-empty 1 | - LC_ALL=C sort -T {tmpdir} -k1,1 -S25% | + LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd | zstd -T0 -c > {output} """, n=self.n, @@ -1102,7 +1102,7 @@ class UnmatchedMapped(Refcat): zstdcat -T0 {input} | skate-conv -f ref | skate-map -m rcns -skip-on-empty 1 | - LC_ALL=C sort -T {tmpdir} -S25% -k1,1 | + LC_ALL=C sort -T {tmpdir} -S25% -k1,1 --compress-program=zstd | zstd -T0 -c > {output} """, tmpdir=self.tmpdir, @@ -1185,7 +1185,7 @@ class OpenLibraryReleaseMapped(Refcat): output = shellout(""" zstdcat -T0 {input} | skate-map -m {mapper} -skip-on-empty 1 | - LC_ALL=C sort -T {tmpdir} -k1,1 -S25% | + LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd | zstd -T0 -c > {output} """, mapper=self.mapper, @@ -1247,7 +1247,7 @@ class BrefSortedByWorkID(Refcat): output = shellout(""" zstdcat -T0 {bref} | skate-map -B -m ff -x source_work_ident | - LC_ALL=C sort -T {tmpdir} -S25% -k1,1 | zstd -c -T0 > {output} + LC_ALL=C sort -T {tmpdir} -S25% -k1,1 --compress-program=zstd | zstd -c -T0 > {output} """, tmpdir=self.tmpdir, bref=self.input().path) @@ -1271,7 +1271,7 @@ class RefsByWorkID(Refcat): output = shellout(""" zstdcat -T0 {input} | skate-map -m ff -x work_ident | - LC_ALL=C sort -T {tmpdir} -S25% -k1,1 | + LC_ALL=C sort -T {tmpdir} -S25% -k1,1 --compress-program=zstd | zstd -c -T0 > {output} """, tmpdir=self.tmpdir, @@ -1376,7 +1376,7 @@ class UnmatchedResolveJournalNamesMapped(Refcat): output = shellout(""" zstdcat -T0 {input} | skate-map -m vcns -skip-on-empty 1 | - LC_ALL=C sort -T {tmpdir} -k1,1 -S25% | + LC_ALL=C sort -T {tmpdir} -k1,1 -S25% --compress-program=zstd | zstd -T0 -c > {output} """, tmpdir=self.tmpdir, |