diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-04-23 17:31:26 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-04-23 17:31:26 -0700 |
commit | 39150d0fec3a444d9fa2786aa19e7c098c8247df (patch) | |
tree | 7bf77af8831f12a6acf332db2031b1566266c8af | |
parent | 46b93836752a6fb221df44111de43c435174926d (diff) | |
download | arabesque-39150d0fec3a444d9fa2786aa19e7c098c8247df.tar.gz arabesque-39150d0fec3a444d9fa2786aa19e7c098c8247df.zip |
don't spam about missing URLs
-rwxr-xr-x | arabesque.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/arabesque.py b/arabesque.py index 1c5663b..9b12b68 100755 --- a/arabesque.py +++ b/arabesque.py @@ -518,7 +518,7 @@ def forward(seed_id_file, map_db, output_db): # simple for redirect case (no branching); arbitrary for the fan-out case first_row = lookup_referrer_row(m, seed_url) if not first_row: - print("MISSING url: {}".format(raw_line.strip())) + #print("MISSING url: {}".format(raw_line.strip())) # need to insert *something* in this case... initial_domain = urllib3.util.parse_url(seed_url).host c.execute("INSERT INTO crawl_result VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)", |