summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-05-16 19:19:22 -0700
committerBryan Newbold <bnewbold@robocracy.org>2019-05-16 19:23:07 -0700
commitb888f2201f981a2e01026e53d5c6ba2d435e9506 (patch)
treea22d0a3fbf7f23a72dca2f500eb46dc93121d05b /python/fatcat_tools
parent7ed25a124e91f005b3347924be3783b51b79d197 (diff)
downloadfatcat-b888f2201f981a2e01026e53d5c6ba2d435e9506.tar.gz
fatcat-b888f2201f981a2e01026e53d5c6ba2d435e9506.zip
fix elastic file pdf check
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/transforms/elasticsearch.py4
1 files changed, 3 insertions, 1 deletions
diff --git a/python/fatcat_tools/transforms/elasticsearch.py b/python/fatcat_tools/transforms/elasticsearch.py
index 91086a0c..645142de 100644
--- a/python/fatcat_tools/transforms/elasticsearch.py
+++ b/python/fatcat_tools/transforms/elasticsearch.py
@@ -137,8 +137,10 @@ def release_to_elasticsearch(entity, force_bool=True):
if f.extra and f.extra.get('shadows'):
# TODO: shadow check goes here
in_shadows = True
- is_pdf = 'pdf' in (f.mimetype or '') or 'pdf' in url.lower()
+ is_pdf = 'pdf' in (f.mimetype or '')
for url in (f.urls or []):
+ if not f.mimetype and 'pdf' in url.lower():
+ is_pdf = True
if url.url.lower().startswith('http'):
in_web = True
if url.rel in ('dweb', 'p2p', 'ipfs', 'dat', 'torrent'):