aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-26 15:16:19 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-26 15:16:25 -0700
commit8046e127c7de43f162bdc58f2a5c68c0848aef27 (patch)
tree637767f6cdab3e4e9618e25548da119fee969cf1
parent262990a3cb77c790fe62c325415edf2416b840f0 (diff)
downloadsandcrawler-8046e127c7de43f162bdc58f2a5c68c0848aef27.tar.gz
sandcrawler-8046e127c7de43f162bdc58f2a5c68c0848aef27.zip
set CDX request params are str, not int or datetime
This might be a bugfix, changing CDX lookup behavior?
-rw-r--r--python/sandcrawler/ia.py9
1 files changed, 6 insertions, 3 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index 45db393..ad685aa 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -223,7 +223,7 @@ class CdxApiClient:
'from': datetime,
'to': datetime,
'matchType': 'exact',
- 'limit': 1,
+ 'limit': "1",
'output': 'json',
}
if filter_status_code:
@@ -291,7 +291,7 @@ class CdxApiClient:
params: Dict[str, str] = {
'url': url,
'matchType': 'exact',
- 'limit': -25,
+ 'limit': "-25",
'output': 'json',
# Collapsing seems efficient, but is complex; would need to include
# other filters and status code in filter
@@ -304,7 +304,10 @@ class CdxApiClient:
since = datetime.date.today() - datetime.timedelta(days=max_age_days)
params['from'] = '%04d%02d%02d' % (since.year, since.month, since.day)
if closest:
- params['closest'] = closest
+ if isinstance(closest, datetime.datetime):
+ params['closest'] = '%04d%02d%02d' % (closest.year, closest.month, closest.day)
+ else:
+ params['closest'] = closest
params['sort'] = "closest"
#print(params, file=sys.stderr)
rows = self._query_api(params)