From 8046e127c7de43f162bdc58f2a5c68c0848aef27 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 26 Oct 2021 15:16:19 -0700 Subject: set CDX request params are str, not int or datetime This might be a bugfix, changing CDX lookup behavior? --- python/sandcrawler/ia.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'python') diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 45db393..ad685aa 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -223,7 +223,7 @@ class CdxApiClient: 'from': datetime, 'to': datetime, 'matchType': 'exact', - 'limit': 1, + 'limit': "1", 'output': 'json', } if filter_status_code: @@ -291,7 +291,7 @@ class CdxApiClient: params: Dict[str, str] = { 'url': url, 'matchType': 'exact', - 'limit': -25, + 'limit': "-25", 'output': 'json', # Collapsing seems efficient, but is complex; would need to include # other filters and status code in filter @@ -304,7 +304,10 @@ class CdxApiClient: since = datetime.date.today() - datetime.timedelta(days=max_age_days) params['from'] = '%04d%02d%02d' % (since.year, since.month, since.day) if closest: - params['closest'] = closest + if isinstance(closest, datetime.datetime): + params['closest'] = '%04d%02d%02d' % (closest.year, closest.month, closest.day) + else: + params['closest'] = closest params['sort'] = "closest" #print(params, file=sys.stderr) rows = self._query_api(params) -- cgit v1.2.3