From c7de3e448a3aa197236f2ff8a254b207a103f95f Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 31 Jan 2020 16:28:29 -0800 Subject: fix IA datetime length (14 not 12) --- arabesque.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arabesque.py b/arabesque.py index e67bc0b..721b74f 100755 --- a/arabesque.py +++ b/arabesque.py @@ -452,8 +452,8 @@ def backward(log_file, map_db, output_db, hit_mimetypes=FULLTEXT_MIMETYPES): # convert to IA CDX timestamp format #final_timestamp = dateutil.parser.parse(line.timestamp).strftime("%Y%m%d%H%M%S") final_timestamp = None - if len(line.timestamp) >= 12 and line.timestamp[4] != '-': - final_timestamp = line.timestamp[:12] + if len(line.timestamp) >= 14 and line.timestamp[4] != '-': + final_timestamp = line.timestamp[:14] c.execute("INSERT INTO crawl_result VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)", (row.url, None, initial_domain, final_row.breadcrumbs, final_row.url, final_domain, final_timestamp, final_row.status_code, line.sha1, final_row.mimetype, final_row.is_dedupe, True, None)) #print(final_row.breadcrumbs) -- cgit v1.2.3