aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/ingest.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/sandcrawler/ingest.py')
-rw-r--r--python/sandcrawler/ingest.py1
1 files changed, 1 insertions, 0 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py
index c9a697c..4159e26 100644
--- a/python/sandcrawler/ingest.py
+++ b/python/sandcrawler/ingest.py
@@ -354,6 +354,7 @@ class IngestFileWorker(SandcrawlerWorker):
return result
next_url = fulltext_url.get('pdf_url') or fulltext_url.get('next_url')
assert next_url
+ next_url = clean_url(next_url)
print("[PARSE\t] {}\t{}".format(
fulltext_url.get('technique'),
next_url,