From 9529cbb2660897ce3ffe3986f60eafbf3596495d Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 12 Nov 2019 13:22:43 -0800 Subject: add note to CDX backfill script that we should be filtering (oops) --- sql/backfill/backfill_cdx.py | 1 + 1 file changed, 1 insertion(+) (limited to 'sql') diff --git a/sql/backfill/backfill_cdx.py b/sql/backfill/backfill_cdx.py index 1c452ca..f929502 100755 --- a/sql/backfill/backfill_cdx.py +++ b/sql/backfill/backfill_cdx.py @@ -109,6 +109,7 @@ def stdin_to_pg(): info = parse_cdx_line(l) if not info: continue + # XXX: filter to, eg, PDF or octet/stream (derp) batch.append(info) counts['total'] += 1 if len(batch) >= 1000: -- cgit v1.2.3