diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-01-10 16:02:32 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-01-10 16:02:32 -0800 |
commit | 7cc0b6fd1a7b9e3b9bb2c8ee85e319a7aae9c5e8 (patch) | |
tree | f5c762dc61917cb60b01810a2cef7779dc52a7f2 /python | |
parent | f31f6ba5917403b85892c7345a08b8de93dae501 (diff) | |
download | sandcrawler-7cc0b6fd1a7b9e3b9bb2c8ee85e319a7aae9c5e8.tar.gz sandcrawler-7cc0b6fd1a7b9e3b9bb2c8ee85e319a7aae9c5e8.zip |
disable CDX best lookup 'collapse'; leave comment
Diffstat (limited to 'python')
-rw-r--r-- | python/sandcrawler/ia.py | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 3c1d2f9..096d5d4 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -168,7 +168,9 @@ class CdxApiClient: 'matchType': 'exact', 'limit': -25, 'output': 'json', - 'collapse': 'timestamp:6', + # Collapsing seems efficient, but is complex; would need to include + # other filters and status code in filter + #'collapse': 'timestamp:6', 'filter': '!mimetype:warc/revisit', } if max_age_days: |