aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/html.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/sandcrawler/html.py')
-rw-r--r--python/sandcrawler/html.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/html.py b/python/sandcrawler/html.py
index b924a17..8fbb0ba 100644
--- a/python/sandcrawler/html.py
+++ b/python/sandcrawler/html.py
@@ -78,7 +78,7 @@ def extract_fulltext_url(html_url, html_body):
# https://pubs.acs.org/doi/10.1021/acs.estlett.9b00379
# <a href="/doi/pdf/10.1021/acs.estlett.9b00379" title="PDF" target="_blank" class="button_primary"><i class="icon-file-pdf-o"></i><span>PDF (1 MB)</span></a>
href = soup.find('a', attrs={"title":"PDF"})
- if href:
+ if href and 'href' in href:
url = href['href'].strip()
if url.startswith('http'):
return dict(pdf_url=url, technique='href_title')