summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-11-26 19:26:02 -0800
committerBryan Newbold <bnewbold@robocracy.org>2018-11-26 19:26:02 -0800
commit767018eae6c628e0add27a0f187327b25d8569dc (patch)
treeec080337a337dd64c7c532ee1bd616a8cb3f851b /python/fatcat_tools
parent8b84e83b79de67996e61a3a830e8f395b68085e5 (diff)
downloadfatcat-767018eae6c628e0add27a0f187327b25d8569dc.tar.gz
fatcat-767018eae6c628e0add27a0f187327b25d8569dc.zip
fix file extraction (and transforms)
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/transforms.py12
1 files changed, 6 insertions, 6 deletions
diff --git a/python/fatcat_tools/transforms.py b/python/fatcat_tools/transforms.py
index 87facd57..516b68ae 100644
--- a/python/fatcat_tools/transforms.py
+++ b/python/fatcat_tools/transforms.py
@@ -69,16 +69,16 @@ def release_to_elasticsearch(release):
in_ia = False
t['file_pdf_url'] = None
for f in files:
- is_pdf = 'pdf' in f.get('mimetype', '')
- for url in f.get('urls', []):
- if url.get('rel', '') == 'webarchive':
+ is_pdf = 'pdf' in (f.mimetype or '')
+ for url in (f.urls or []):
+ if url.rel == 'webarchive':
in_wa = True
- if '//web.archive.org/' in url['url'] or '//archive.org/' in url['url']:
+ if '//web.archive.org/' in (url.url or '') or '//archive.org/' in (url.url or ''):
in_ia = True
if is_pdf:
- t['file_pdf_url'] = url['url']
+ t['file_pdf_url'] = url.url
if not t['file_pdf_url'] and is_pdf:
- t['file_pdf_url'] = url['url']
+ t['file_pdf_url'] = url.url
t['file_in_webarchive'] = in_wa
t['file_in_ia'] = in_ia