From 7cc0b6fd1a7b9e3b9bb2c8ee85e319a7aae9c5e8 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 10 Jan 2020 16:02:32 -0800 Subject: disable CDX best lookup 'collapse'; leave comment --- python/sandcrawler/ia.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'python') diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 3c1d2f9..096d5d4 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -168,7 +168,9 @@ class CdxApiClient: 'matchType': 'exact', 'limit': -25, 'output': 'json', - 'collapse': 'timestamp:6', + # Collapsing seems efficient, but is complex; would need to include + # other filters and status code in filter + #'collapse': 'timestamp:6', 'filter': '!mimetype:warc/revisit', } if max_age_days: -- cgit v1.2.3