aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fuzzycat/__main__.py5
-rw-r--r--fuzzycat/cluster.py2
-rw-r--r--fuzzycat/utils.py54
3 files changed, 59 insertions, 2 deletions
diff --git a/fuzzycat/__main__.py b/fuzzycat/__main__.py
index 7845299..cd94f9c 100644
--- a/fuzzycat/__main__.py
+++ b/fuzzycat/__main__.py
@@ -218,7 +218,10 @@ if __name__ == '__main__':
sub_cluster = subparsers.add_parser('cluster', help='group entities', parents=[parser])
sub_cluster.set_defaults(func=run_cluster)
- sub_cluster.add_argument('-C', '--compress', action="store_true", help='compress intermediate results')
+ sub_cluster.add_argument('-C',
+ '--compress',
+ action="store_true",
+ help='compress intermediate results')
sub_cluster.add_argument('-f', '--files', default="-", help='input files')
sub_cluster.add_argument('--key-denylist', help='file path to key denylist')
sub_cluster.add_argument('--min-cluster-size',
diff --git a/fuzzycat/cluster.py b/fuzzycat/cluster.py
index 1c9d79f..3228247 100644
--- a/fuzzycat/cluster.py
+++ b/fuzzycat/cluster.py
@@ -78,7 +78,7 @@ import fuzzy
import regex
from zstandard import ZstdCompressor
-from fuzzycat.utils import cut, slugify_string, zstdlines
+from fuzzycat.utils import cut, shellout, slugify_string, zstdlines
__all__ = [
"release_key_title",
diff --git a/fuzzycat/utils.py b/fuzzycat/utils.py
index 55729a1..b43cbcf 100644
--- a/fuzzycat/utils.py
+++ b/fuzzycat/utils.py
@@ -5,6 +5,8 @@ import os
import random
import re
import string
+import subprocess
+import tempfile
import requests
from glom import PathAccessError, glom
@@ -200,3 +202,55 @@ def zstdlines(filename):
line = prev_line + line
yield line
prev_line = lines[-1]
+
+
+def shellout(template,
+ preserve_whitespace=False,
+ executable='/bin/bash',
+ ignoremap=None,
+ encoding=None,
+ pipefail=True,
+ **kwargs):
+ """
+ Takes a shell command template and executes it. The template must use the
+ new (2.6+) format mini language. `kwargs` must contain any defined
+ placeholder, only `output` is optional and will be autofilled with a
+ temporary file if it used, but not specified explicitly.
+
+ If `pipefail` is `False` no subshell environment will be spawned, where a
+ failed pipe will cause an error as well. If `preserve_whitespace` is `True`,
+ no whitespace normalization is performed. A custom shell executable name can
+ be passed in `executable` and defaults to `/bin/bash`.
+
+ Raises RuntimeError on nonzero exit codes. To ignore certain errors, pass a
+ dictionary in `ignoremap`, with the error code to ignore as key and a string
+ message as value.
+
+ Simple template:
+
+ wc -l < {input} > {output}
+
+ Quoted curly braces:
+
+ ps ax|awk '{{print $1}}' > {output}
+
+ """
+ if not 'output' in kwargs:
+ kwargs.update({'output': tempfile.mkstemp(prefix='gluish-')[1]})
+ if ignoremap is None:
+ ignoremap = {}
+ if encoding:
+ command = template.decode(encoding).format(**kwargs)
+ else:
+ command = template.format(**kwargs)
+ if not preserve_whitespace:
+ command = re.sub('[ \t\n]+', ' ', command)
+ if pipefail:
+ command = '(set -o pipefail && %s)' % command
+ code = subprocess.call([command], shell=True, executable=executable)
+ if not code == 0:
+ if code not in ignoremap:
+ error = RuntimeError('%s exitcode: %s' % (command, code))
+ error.code = code
+ raise error
+ return kwargs.get('output')