diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-11-04 01:11:05 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-11-04 01:11:05 +0100 |
commit | 99228171f2e0668de744dae71ad66085234be356 (patch) | |
tree | d64a823f4af9da2339893c061db20688dafe490c | |
parent | 99597f7ca518ca752b4f677dd1a6fb2fab3e0ab6 (diff) | |
download | fuzzycat-99228171f2e0668de744dae71ad66085234be356.tar.gz fuzzycat-99228171f2e0668de744dae71ad66085234be356.zip |
ignore missing column values
-rw-r--r-- | fuzzycat/cluster.py | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/fuzzycat/cluster.py b/fuzzycat/cluster.py index da86f6c..4d5580d 100644 --- a/fuzzycat/cluster.py +++ b/fuzzycat/cluster.py @@ -77,7 +77,7 @@ def group_by(filename, key=None, value=None, comment=""): yield doc -def cut(f=0, sep='\t'): +def cut(f=0, sep='\t', ignore_missing_column=True): """ Return a callable, that extracts a given column from a file with a specific separator. TODO: move this into more generic place. @@ -85,7 +85,10 @@ def cut(f=0, sep='\t'): def func(value): parts = value.strip().split(sep) if f >= len(parts): - raise ValueError('cannot split value {} into {} parts'.format(value, f)) + if ignore_missing_column: + return "" + else: + raise ValueError('cannot split value {} into {} parts'.format(value, f)) return parts[f] return func |