aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-11-04 01:11:05 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-11-04 01:11:05 +0100
commit99228171f2e0668de744dae71ad66085234be356 (patch)
treed64a823f4af9da2339893c061db20688dafe490c
parent99597f7ca518ca752b4f677dd1a6fb2fab3e0ab6 (diff)
downloadfuzzycat-99228171f2e0668de744dae71ad66085234be356.tar.gz
fuzzycat-99228171f2e0668de744dae71ad66085234be356.zip
ignore missing column values
-rw-r--r--fuzzycat/cluster.py7
1 files changed, 5 insertions, 2 deletions
diff --git a/fuzzycat/cluster.py b/fuzzycat/cluster.py
index da86f6c..4d5580d 100644
--- a/fuzzycat/cluster.py
+++ b/fuzzycat/cluster.py
@@ -77,7 +77,7 @@ def group_by(filename, key=None, value=None, comment=""):
yield doc
-def cut(f=0, sep='\t'):
+def cut(f=0, sep='\t', ignore_missing_column=True):
"""
Return a callable, that extracts a given column from a file with a specific
separator. TODO: move this into more generic place.
@@ -85,7 +85,10 @@ def cut(f=0, sep='\t'):
def func(value):
parts = value.strip().split(sep)
if f >= len(parts):
- raise ValueError('cannot split value {} into {} parts'.format(value, f))
+ if ignore_missing_column:
+ return ""
+ else:
+ raise ValueError('cannot split value {} into {} parts'.format(value, f))
return parts[f]
return func