diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-11-25 01:22:32 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-11-25 01:22:32 +0100 |
commit | 6bf0cb8a908122eed9cccd7f9fae35377a692c1d (patch) | |
tree | 587b5c4e9c02fbdceb86001bd3bfd269a372cd1b /fuzzycat/utils.py | |
parent | 17582f0b1d5e6a33ec353f3ff63f37f0a2764c0c (diff) | |
download | fuzzycat-6bf0cb8a908122eed9cccd7f9fae35377a692c1d.tar.gz fuzzycat-6bf0cb8a908122eed9cccd7f9fae35377a692c1d.zip |
extend test coverage
Diffstat (limited to 'fuzzycat/utils.py')
-rw-r--r-- | fuzzycat/utils.py | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/fuzzycat/utils.py b/fuzzycat/utils.py new file mode 100644 index 0000000..f269b11 --- /dev/null +++ b/fuzzycat/utils.py @@ -0,0 +1,26 @@ +import io +import string + +printable_no_punct = string.digits + string.ascii_letters + string.whitespace + + +def slugify_string(s: str) -> str: + """ + Keeps ascii chars and single whitespace only. + """ + return ''.join((c for c in s.lower() if c in printable_no_punct)) + + +def cut(f: int = 0, sep: str = '\t', ignore_missing_column: bool = True): + """ + Return a callable that extracts a given column from a line. + """ + def func(value): + parts = value.strip().split(sep) + if f >= len(parts): + if ignore_missing_column: + return "" + raise ValueError('cannot split value {} into {} parts'.format(value, f)) + return parts[f] + + return func |