diff options
Diffstat (limited to 'python/sandcrawler/html.py')
-rw-r--r-- | python/sandcrawler/html.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/python/sandcrawler/html.py b/python/sandcrawler/html.py index a44fc67..5b9742a 100644 --- a/python/sandcrawler/html.py +++ b/python/sandcrawler/html.py @@ -53,12 +53,12 @@ def extract_fulltext_url(html_url, html_body): print(f"\tdoi.org in citation_pdf_url (loop?): {url}", file=sys.stderr) elif url.startswith('/'): if host_prefix + url == html_url: - print(f"\tavoiding citation_pdf_url link-loop", file=sys.stderr) + print("\tavoiding citation_pdf_url link-loop", file=sys.stderr) else: return dict(pdf_url=host_prefix + url, technique='citation_pdf_url') elif url.startswith('http'): if url == html_url: - print(f"\tavoiding citation_pdf_url link-loop", file=sys.stderr) + print("\tavoiding citation_pdf_url link-loop", file=sys.stderr) else: return dict(pdf_url=url, technique='citation_pdf_url') else: |