diff options
-rw-r--r-- | fuzzycat/__main__.py | 5 | ||||
-rw-r--r-- | fuzzycat/cluster.py | 2 | ||||
-rw-r--r-- | fuzzycat/utils.py | 54 |
3 files changed, 59 insertions, 2 deletions
diff --git a/fuzzycat/__main__.py b/fuzzycat/__main__.py index 7845299..cd94f9c 100644 --- a/fuzzycat/__main__.py +++ b/fuzzycat/__main__.py @@ -218,7 +218,10 @@ if __name__ == '__main__': sub_cluster = subparsers.add_parser('cluster', help='group entities', parents=[parser]) sub_cluster.set_defaults(func=run_cluster) - sub_cluster.add_argument('-C', '--compress', action="store_true", help='compress intermediate results') + sub_cluster.add_argument('-C', + '--compress', + action="store_true", + help='compress intermediate results') sub_cluster.add_argument('-f', '--files', default="-", help='input files') sub_cluster.add_argument('--key-denylist', help='file path to key denylist') sub_cluster.add_argument('--min-cluster-size', diff --git a/fuzzycat/cluster.py b/fuzzycat/cluster.py index 1c9d79f..3228247 100644 --- a/fuzzycat/cluster.py +++ b/fuzzycat/cluster.py @@ -78,7 +78,7 @@ import fuzzy import regex from zstandard import ZstdCompressor -from fuzzycat.utils import cut, slugify_string, zstdlines +from fuzzycat.utils import cut, shellout, slugify_string, zstdlines __all__ = [ "release_key_title", diff --git a/fuzzycat/utils.py b/fuzzycat/utils.py index 55729a1..b43cbcf 100644 --- a/fuzzycat/utils.py +++ b/fuzzycat/utils.py @@ -5,6 +5,8 @@ import os import random import re import string +import subprocess +import tempfile import requests from glom import PathAccessError, glom @@ -200,3 +202,55 @@ def zstdlines(filename): line = prev_line + line yield line prev_line = lines[-1] + + +def shellout(template, + preserve_whitespace=False, + executable='/bin/bash', + ignoremap=None, + encoding=None, + pipefail=True, + **kwargs): + """ + Takes a shell command template and executes it. The template must use the + new (2.6+) format mini language. `kwargs` must contain any defined + placeholder, only `output` is optional and will be autofilled with a + temporary file if it used, but not specified explicitly. + + If `pipefail` is `False` no subshell environment will be spawned, where a + failed pipe will cause an error as well. If `preserve_whitespace` is `True`, + no whitespace normalization is performed. A custom shell executable name can + be passed in `executable` and defaults to `/bin/bash`. + + Raises RuntimeError on nonzero exit codes. To ignore certain errors, pass a + dictionary in `ignoremap`, with the error code to ignore as key and a string + message as value. + + Simple template: + + wc -l < {input} > {output} + + Quoted curly braces: + + ps ax|awk '{{print $1}}' > {output} + + """ + if not 'output' in kwargs: + kwargs.update({'output': tempfile.mkstemp(prefix='gluish-')[1]}) + if ignoremap is None: + ignoremap = {} + if encoding: + command = template.decode(encoding).format(**kwargs) + else: + command = template.format(**kwargs) + if not preserve_whitespace: + command = re.sub('[ \t\n]+', ' ', command) + if pipefail: + command = '(set -o pipefail && %s)' % command + code = subprocess.call([command], shell=True, executable=executable) + if not code == 0: + if code not in ignoremap: + error = RuntimeError('%s exitcode: %s' % (command, code)) + error.code = code + raise error + return kwargs.get('output') |