diff options
Diffstat (limited to 'fatcat_covid19/transform.py')
-rw-r--r-- | fatcat_covid19/transform.py | 4 |
1 files changed, 4 insertions, 0 deletions
diff --git a/fatcat_covid19/transform.py b/fatcat_covid19/transform.py index 16774ab..3f942ba 100644 --- a/fatcat_covid19/transform.py +++ b/fatcat_covid19/transform.py @@ -152,6 +152,10 @@ def fulltext_to_elasticsearch(row, force_bool=True): t['fulltext']['thumbnail_url'] = "/" + full['thumbnail_path'] if full.get('grobid_xml_path'): t['fulltext']['grobid_xml_url'] = "/" + full['grobid_xml_path'] + for url in full.get('urls', []): + if url.get('rel') in ('webarchive', 'archive') and 'archive.org/' in url['url']: + t['fulltext']['ia_pdf_url'] = url['url'] + break if 'fulltext_grobid' in row: grobid = row['fulltext_grobid'] |