From b888f2201f981a2e01026e53d5c6ba2d435e9506 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 16 May 2019 19:19:22 -0700 Subject: fix elastic file pdf check --- python/fatcat_tools/transforms/elasticsearch.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py index 91086a0c..645142de 100644 --- a/python/fatcat_tools/transforms/elasticsearch.py +++ b/python/fatcat_tools/transforms/elasticsearch.py @@ -137,8 +137,10 @@ def release_to_elasticsearch(entity, force_bool=True): if f.extra and f.extra.get('shadows'): # TODO: shadow check goes here in_shadows = True - is_pdf = 'pdf' in (f.mimetype or '') or 'pdf' in url.lower() + is_pdf = 'pdf' in (f.mimetype or '') for url in (f.urls or []): + if not f.mimetype and 'pdf' in url.lower(): + is_pdf = True if url.url.lower().startswith('http'): in_web = True if url.rel in ('dweb', 'p2p', 'ipfs', 'dat', 'torrent'): -- cgit v1.2.3