aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-04-23 17:31:26 -0700
committerBryan Newbold <bnewbold@archive.org>2019-04-23 17:31:26 -0700
commit39150d0fec3a444d9fa2786aa19e7c098c8247df (patch)
tree7bf77af8831f12a6acf332db2031b1566266c8af
parent46b93836752a6fb221df44111de43c435174926d (diff)
downloadarabesque-39150d0fec3a444d9fa2786aa19e7c098c8247df.tar.gz
arabesque-39150d0fec3a444d9fa2786aa19e7c098c8247df.zip
don't spam about missing URLs
-rwxr-xr-xarabesque.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/arabesque.py b/arabesque.py
index 1c5663b..9b12b68 100755
--- a/arabesque.py
+++ b/arabesque.py
@@ -518,7 +518,7 @@ def forward(seed_id_file, map_db, output_db):
# simple for redirect case (no branching); arbitrary for the fan-out case
first_row = lookup_referrer_row(m, seed_url)
if not first_row:
- print("MISSING url: {}".format(raw_line.strip()))
+ #print("MISSING url: {}".format(raw_line.strip()))
# need to insert *something* in this case...
initial_domain = urllib3.util.parse_url(seed_url).host
c.execute("INSERT INTO crawl_result VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)",