diff options
| author | Martin Czygan <martin.czygan@gmail.com> | 2020-11-04 01:11:05 +0100 | 
|---|---|---|
| committer | Martin Czygan <martin.czygan@gmail.com> | 2020-11-04 01:11:05 +0100 | 
| commit | 99228171f2e0668de744dae71ad66085234be356 (patch) | |
| tree | d64a823f4af9da2339893c061db20688dafe490c | |
| parent | 99597f7ca518ca752b4f677dd1a6fb2fab3e0ab6 (diff) | |
| download | fuzzycat-99228171f2e0668de744dae71ad66085234be356.tar.gz fuzzycat-99228171f2e0668de744dae71ad66085234be356.zip  | |
ignore missing column values
| -rw-r--r-- | fuzzycat/cluster.py | 7 | 
1 files changed, 5 insertions, 2 deletions
diff --git a/fuzzycat/cluster.py b/fuzzycat/cluster.py index da86f6c..4d5580d 100644 --- a/fuzzycat/cluster.py +++ b/fuzzycat/cluster.py @@ -77,7 +77,7 @@ def group_by(filename, key=None, value=None, comment=""):              yield doc -def cut(f=0, sep='\t'): +def cut(f=0, sep='\t', ignore_missing_column=True):      """      Return a callable, that extracts a given column from a file with a specific      separator. TODO: move this into more generic place. @@ -85,7 +85,10 @@ def cut(f=0, sep='\t'):      def func(value):          parts = value.strip().split(sep)          if f >= len(parts): -            raise ValueError('cannot split value {} into {} parts'.format(value, f)) +            if ignore_missing_column: +                return "" +            else: +                raise ValueError('cannot split value {} into {} parts'.format(value, f))          return parts[f]      return func  | 
