aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/html.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/sandcrawler/html.py')
-rw-r--r--python/sandcrawler/html.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/html.py b/python/sandcrawler/html.py
index 6236a3b..acf1522 100644
--- a/python/sandcrawler/html.py
+++ b/python/sandcrawler/html.py
@@ -105,7 +105,7 @@ def extract_fulltext_url(html_url, html_body):
# http://www.jasstudies.com/DergiTamDetay.aspx?ID=3401
# <embed src="/files/jass_makaleler/1359848334_33-Okt.%20Yasemin%20KARADEM%C4%B0R.pdf" type="application/pdf" />
embed = soup.find('embed', attrs={"type": "application/pdf"})
- if embed:
+ if embed and embed.get('src'):
url = embed['src'].strip()
if url.startswith('/'):
url = host_prefix+url