aboutsummaryrefslogtreecommitdiffstats
path: root/fuzzycat/utils.py
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-11-25 01:22:32 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-11-25 01:22:32 +0100
commit6bf0cb8a908122eed9cccd7f9fae35377a692c1d (patch)
tree587b5c4e9c02fbdceb86001bd3bfd269a372cd1b /fuzzycat/utils.py
parent17582f0b1d5e6a33ec353f3ff63f37f0a2764c0c (diff)
downloadfuzzycat-6bf0cb8a908122eed9cccd7f9fae35377a692c1d.tar.gz
fuzzycat-6bf0cb8a908122eed9cccd7f9fae35377a692c1d.zip
extend test coverage
Diffstat (limited to 'fuzzycat/utils.py')
-rw-r--r--fuzzycat/utils.py26
1 files changed, 26 insertions, 0 deletions
diff --git a/fuzzycat/utils.py b/fuzzycat/utils.py
new file mode 100644
index 0000000..f269b11
--- /dev/null
+++ b/fuzzycat/utils.py
@@ -0,0 +1,26 @@
+import io
+import string
+
+printable_no_punct = string.digits + string.ascii_letters + string.whitespace
+
+
+def slugify_string(s: str) -> str:
+ """
+ Keeps ascii chars and single whitespace only.
+ """
+ return ''.join((c for c in s.lower() if c in printable_no_punct))
+
+
+def cut(f: int = 0, sep: str = '\t', ignore_missing_column: bool = True):
+ """
+ Return a callable that extracts a given column from a line.
+ """
+ def func(value):
+ parts = value.strip().split(sep)
+ if f >= len(parts):
+ if ignore_missing_column:
+ return ""
+ raise ValueError('cannot split value {} into {} parts'.format(value, f))
+ return parts[f]
+
+ return func