diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-04-18 11:14:39 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-04-18 11:14:39 -0700 |
commit | cd566e0b44cfa7cb110b60158aa029189e2d03ff (patch) | |
tree | cad8d48d192729852f333bf497c937c8168c3c3a | |
parent | 50a076b46842b32288c89d199d28e54032341f49 (diff) | |
download | arabesque-cd566e0b44cfa7cb110b60158aa029189e2d03ff.tar.gz arabesque-cd566e0b44cfa7cb110b60158aa029189e2d03ff.zip |
fix missing postproc bug
-rwxr-xr-x | arabesque.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/arabesque.py b/arabesque.py index b5b2224..cf8cf96 100755 --- a/arabesque.py +++ b/arabesque.py @@ -515,8 +515,8 @@ def forward(seed_id_file, map_db, output_db): print("MISSING url: {}".format(raw_line.strip())) # need to insert *something* in this case... initial_domain = urllib3.util.parse_url(seed_url).host - c.execute("INSERT INTO crawl_result VALUES (?,?,?,?,?,?,?,?,?,?,?,?)", - (seed_url, identifier, initial_domain, None, None, None, None, None, None, None, None, False)) + c.execute("INSERT INTO crawl_result VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)", + (seed_url, identifier, initial_domain, None, None, None, None, None, None, None, None, False, None)) counts['map-url-missing'] += 1 continue row = first_row |