aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-04-18 11:14:39 -0700
committerBryan Newbold <bnewbold@archive.org>2019-04-18 11:14:39 -0700
commitcd566e0b44cfa7cb110b60158aa029189e2d03ff (patch)
treecad8d48d192729852f333bf497c937c8168c3c3a
parent50a076b46842b32288c89d199d28e54032341f49 (diff)
downloadarabesque-cd566e0b44cfa7cb110b60158aa029189e2d03ff.tar.gz
arabesque-cd566e0b44cfa7cb110b60158aa029189e2d03ff.zip
fix missing postproc bug
-rwxr-xr-xarabesque.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/arabesque.py b/arabesque.py
index b5b2224..cf8cf96 100755
--- a/arabesque.py
+++ b/arabesque.py
@@ -515,8 +515,8 @@ def forward(seed_id_file, map_db, output_db):
print("MISSING url: {}".format(raw_line.strip()))
# need to insert *something* in this case...
initial_domain = urllib3.util.parse_url(seed_url).host
- c.execute("INSERT INTO crawl_result VALUES (?,?,?,?,?,?,?,?,?,?,?,?)",
- (seed_url, identifier, initial_domain, None, None, None, None, None, None, None, None, False))
+ c.execute("INSERT INTO crawl_result VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)",
+ (seed_url, identifier, initial_domain, None, None, None, None, None, None, None, None, False, None))
counts['map-url-missing'] += 1
continue
row = first_row