diff options
| -rw-r--r-- | python/fatcat/search.py | 8 | 
1 files changed, 6 insertions, 2 deletions
| diff --git a/python/fatcat/search.py b/python/fatcat/search.py index 0b038859..c179b12e 100644 --- a/python/fatcat/search.py +++ b/python/fatcat/search.py @@ -41,8 +41,12 @@ def do_search(q, limit=20):          # Ensure 'contrib_names' is a list, not a single string          if type(h['contrib_names']) is not list:              h['contrib_names'] = [h['contrib_names'], ] -        # TODO: a total hack; why is elastic sending weird surrogate -        # characters? +        # Handle surrogate strings that elasticsearch returns sometimes, +        # probably due to mangled data processing in some pipeline. +        # "Crimes against Unicode"; production workaround +        for key in h: +            if type(h[key]) is str: +                h[key] = h[key].encode('utf8', 'ignore').decode('utf8')          h['contrib_names'] = [name.encode('utf8', 'ignore').decode('utf8') for name in h['contrib_names']]      found = content['hits']['total'] | 
