aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-09-28 11:47:30 -0700
committerBryan Newbold <bnewbold@robocracy.org>2018-09-28 11:47:30 -0700
commit9857b3347586608cf6d83dc096d5a2f1fc90ed62 (patch)
tree0ba1a2f3774dbc8391ecb80880c6c37cbd8e282a
parenta0fe8eb84ef6c2607833e5e189596168e8096997 (diff)
downloadfatcat-9857b3347586608cf6d83dc096d5a2f1fc90ed62.tar.gz
fatcat-9857b3347586608cf6d83dc096d5a2f1fc90ed62.zip
hack to fix search results for now
-rw-r--r--python/fatcat/search.py8
1 files changed, 6 insertions, 2 deletions
diff --git a/python/fatcat/search.py b/python/fatcat/search.py
index 0b038859..c179b12e 100644
--- a/python/fatcat/search.py
+++ b/python/fatcat/search.py
@@ -41,8 +41,12 @@ def do_search(q, limit=20):
# Ensure 'contrib_names' is a list, not a single string
if type(h['contrib_names']) is not list:
h['contrib_names'] = [h['contrib_names'], ]
- # TODO: a total hack; why is elastic sending weird surrogate
- # characters?
+ # Handle surrogate strings that elasticsearch returns sometimes,
+ # probably due to mangled data processing in some pipeline.
+ # "Crimes against Unicode"; production workaround
+ for key in h:
+ if type(h[key]) is str:
+ h[key] = h[key].encode('utf8', 'ignore').decode('utf8')
h['contrib_names'] = [name.encode('utf8', 'ignore').decode('utf8') for name in h['contrib_names']]
found = content['hits']['total']