diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2018-11-26 19:26:02 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-11-26 19:26:02 -0800 |
commit | 767018eae6c628e0add27a0f187327b25d8569dc (patch) | |
tree | ec080337a337dd64c7c532ee1bd616a8cb3f851b /python/fatcat_tools/transforms.py | |
parent | 8b84e83b79de67996e61a3a830e8f395b68085e5 (diff) | |
download | fatcat-767018eae6c628e0add27a0f187327b25d8569dc.tar.gz fatcat-767018eae6c628e0add27a0f187327b25d8569dc.zip |
fix file extraction (and transforms)
Diffstat (limited to 'python/fatcat_tools/transforms.py')
-rw-r--r-- | python/fatcat_tools/transforms.py | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/python/fatcat_tools/transforms.py b/python/fatcat_tools/transforms.py index 87facd57..516b68ae 100644 --- a/python/fatcat_tools/transforms.py +++ b/python/fatcat_tools/transforms.py @@ -69,16 +69,16 @@ def release_to_elasticsearch(release): in_ia = False t['file_pdf_url'] = None for f in files: - is_pdf = 'pdf' in f.get('mimetype', '') - for url in f.get('urls', []): - if url.get('rel', '') == 'webarchive': + is_pdf = 'pdf' in (f.mimetype or '') + for url in (f.urls or []): + if url.rel == 'webarchive': in_wa = True - if '//web.archive.org/' in url['url'] or '//archive.org/' in url['url']: + if '//web.archive.org/' in (url.url or '') or '//archive.org/' in (url.url or ''): in_ia = True if is_pdf: - t['file_pdf_url'] = url['url'] + t['file_pdf_url'] = url.url if not t['file_pdf_url'] and is_pdf: - t['file_pdf_url'] = url['url'] + t['file_pdf_url'] = url.url t['file_in_webarchive'] = in_wa t['file_in_ia'] = in_ia |