From 4f40ea7d0cb19ceac15c28b61c479a66895cea2d Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 12 Apr 2019 12:14:29 -0700 Subject: add loop detection in backward processing --- arabesque.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arabesque.py b/arabesque.py index 429042d..07c05b5 100755 --- a/arabesque.py +++ b/arabesque.py @@ -427,12 +427,17 @@ def backward(log_file, map_db, output_db, hit_mimetypes=FULLTEXT_MIMETYPES): counts['skip-map-scope'] += 1 continue row = final_row + loop_stack = [] while row and row.referrer_url != None: next_row = lookup_referrer_row(m, row.referrer_url) if next_row: row = next_row else: break + if row.referrer_url in loop_stack: + counts['map-url-redirect-loop'] += 1 + break + loop_stack.append(row.referrer_url) initial_domain = urllib3.util.parse_url(row.url).host final_domain = urllib3.util.parse_url(final_row.url).host -- cgit v1.2.3