diff options
author | Bryan Newbold <bnewbold@archive.org> | 2018-11-21 17:53:32 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2018-11-21 17:53:32 -0800 |
commit | 6591acdda8b09289fabfa913b2f6bb51642fd38f (patch) | |
tree | 99a0e373d21f0a3380d86ac31d4ead21e908deb4 /python | |
parent | 3054e1ce5395c70d75c7750a77ba8a49648bc504 (diff) | |
download | sandcrawler-6591acdda8b09289fabfa913b2f6bb51642fd38f.tar.gz sandcrawler-6591acdda8b09289fabfa913b2f6bb51642fd38f.zip |
cherry-pick: correct HBase column filtering
Diffstat (limited to 'python')
-rwxr-xr-x | python/extraction_ungrobided.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/python/extraction_ungrobided.py b/python/extraction_ungrobided.py index af38cea..4b558dd 100755 --- a/python/extraction_ungrobided.py +++ b/python/extraction_ungrobided.py @@ -242,7 +242,7 @@ class MRExtractUnGrobided(MRJob): # Basically, don't overwrite backfill fields. grobid_status_code = info.get('grobid0:status_code', None) for k in list(info.keys()): - if k.encode('utf-8') in ('f:c', 'file:mime', 'file:cdx'): + if k in ('f:c', 'file:mime', 'file:cdx'): info.pop(k) # Convert fields to binary |