aboutsummaryrefslogtreecommitdiffstats
path: root/fatcat_covid19/transform.py
diff options
context:
space:
mode:
Diffstat (limited to 'fatcat_covid19/transform.py')
-rw-r--r--fatcat_covid19/transform.py4
1 files changed, 4 insertions, 0 deletions
diff --git a/fatcat_covid19/transform.py b/fatcat_covid19/transform.py
index 16774ab..3f942ba 100644
--- a/fatcat_covid19/transform.py
+++ b/fatcat_covid19/transform.py
@@ -152,6 +152,10 @@ def fulltext_to_elasticsearch(row, force_bool=True):
t['fulltext']['thumbnail_url'] = "/" + full['thumbnail_path']
if full.get('grobid_xml_path'):
t['fulltext']['grobid_xml_url'] = "/" + full['grobid_xml_path']
+ for url in full.get('urls', []):
+ if url.get('rel') in ('webarchive', 'archive') and 'archive.org/' in url['url']:
+ t['fulltext']['ia_pdf_url'] = url['url']
+ break
if 'fulltext_grobid' in row:
grobid = row['fulltext_grobid']