aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/sandcrawler/html.py2
1 files changed, 2 insertions, 0 deletions
diff --git a/python/sandcrawler/html.py b/python/sandcrawler/html.py
index 1d24ca1..6236a3b 100644
--- a/python/sandcrawler/html.py
+++ b/python/sandcrawler/html.py
@@ -59,6 +59,8 @@ def extract_fulltext_url(html_url, html_body):
if not meta:
# researchgate does this; maybe others also?
meta = soup.find('meta', attrs={"property":"citation_pdf_url"})
+ if not meta:
+ meta = soup.find('meta', attrs={"name":"eprints.document_url"})
# if tag is only partially populated
if meta and not meta.get('content'):
meta = None