diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2018-09-28 11:47:30 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-09-28 11:47:30 -0700 |
commit | 9857b3347586608cf6d83dc096d5a2f1fc90ed62 (patch) | |
tree | 0ba1a2f3774dbc8391ecb80880c6c37cbd8e282a /python | |
parent | a0fe8eb84ef6c2607833e5e189596168e8096997 (diff) | |
download | fatcat-9857b3347586608cf6d83dc096d5a2f1fc90ed62.tar.gz fatcat-9857b3347586608cf6d83dc096d5a2f1fc90ed62.zip |
hack to fix search results for now
Diffstat (limited to 'python')
-rw-r--r-- | python/fatcat/search.py | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/python/fatcat/search.py b/python/fatcat/search.py index 0b038859..c179b12e 100644 --- a/python/fatcat/search.py +++ b/python/fatcat/search.py @@ -41,8 +41,12 @@ def do_search(q, limit=20): # Ensure 'contrib_names' is a list, not a single string if type(h['contrib_names']) is not list: h['contrib_names'] = [h['contrib_names'], ] - # TODO: a total hack; why is elastic sending weird surrogate - # characters? + # Handle surrogate strings that elasticsearch returns sometimes, + # probably due to mangled data processing in some pipeline. + # "Crimes against Unicode"; production workaround + for key in h: + if type(h[key]) is str: + h[key] = h[key].encode('utf8', 'ignore').decode('utf8') h['contrib_names'] = [name.encode('utf8', 'ignore').decode('utf8') for name in h['contrib_names']] found = content['hits']['total'] |