From 6e7baa12619e8b51218bcefb2c6dd75525eb7387 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Thu, 15 Jul 2021 03:33:19 +0200 Subject: cdx reshape: only include hits --- extra/cdx/cdx_reshape.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'extra/cdx') diff --git a/extra/cdx/cdx_reshape.py b/extra/cdx/cdx_reshape.py index 6b3d6e5..28b6fc0 100644 --- a/extra/cdx/cdx_reshape.py +++ b/extra/cdx/cdx_reshape.py @@ -9,8 +9,7 @@ by_url = {} for line in fileinput.input(): line = line.strip() doc = json.loads(line) - last = doc.get("summary", {}).get("last", "") - if not last: + if doc.get("numRows") == 0: continue by_url[doc["line"]] = doc -- cgit v1.2.3