aboutsummaryrefslogtreecommitdiffstats
path: root/extra/cdx
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-07-15 03:33:19 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-07-15 03:33:19 +0200
commit6e7baa12619e8b51218bcefb2c6dd75525eb7387 (patch)
treec3c6c09256eda3eda5f756c3f1c5bab7094e906a /extra/cdx
parent1e03f7f9b0dc4fe2bf856c542d011de72962ce10 (diff)
downloadrefcat-6e7baa12619e8b51218bcefb2c6dd75525eb7387.tar.gz
refcat-6e7baa12619e8b51218bcefb2c6dd75525eb7387.zip
cdx reshape: only include hits
Diffstat (limited to 'extra/cdx')
-rw-r--r--extra/cdx/cdx_reshape.py3
1 files changed, 1 insertions, 2 deletions
diff --git a/extra/cdx/cdx_reshape.py b/extra/cdx/cdx_reshape.py
index 6b3d6e5..28b6fc0 100644
--- a/extra/cdx/cdx_reshape.py
+++ b/extra/cdx/cdx_reshape.py
@@ -9,8 +9,7 @@ by_url = {}
for line in fileinput.input():
line = line.strip()
doc = json.loads(line)
- last = doc.get("summary", {}).get("last", "")
- if not last:
+ if doc.get("numRows") == 0:
continue
by_url[doc["line"]] = doc