diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 15:16:19 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 15:16:25 -0700 |
commit | 8046e127c7de43f162bdc58f2a5c68c0848aef27 (patch) | |
tree | 637767f6cdab3e4e9618e25548da119fee969cf1 | |
parent | 262990a3cb77c790fe62c325415edf2416b840f0 (diff) | |
download | sandcrawler-8046e127c7de43f162bdc58f2a5c68c0848aef27.tar.gz sandcrawler-8046e127c7de43f162bdc58f2a5c68c0848aef27.zip |
set CDX request params are str, not int or datetime
This might be a bugfix, changing CDX lookup behavior?
-rw-r--r-- | python/sandcrawler/ia.py | 9 |
1 files changed, 6 insertions, 3 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 45db393..ad685aa 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -223,7 +223,7 @@ class CdxApiClient: 'from': datetime, 'to': datetime, 'matchType': 'exact', - 'limit': 1, + 'limit': "1", 'output': 'json', } if filter_status_code: @@ -291,7 +291,7 @@ class CdxApiClient: params: Dict[str, str] = { 'url': url, 'matchType': 'exact', - 'limit': -25, + 'limit': "-25", 'output': 'json', # Collapsing seems efficient, but is complex; would need to include # other filters and status code in filter @@ -304,7 +304,10 @@ class CdxApiClient: since = datetime.date.today() - datetime.timedelta(days=max_age_days) params['from'] = '%04d%02d%02d' % (since.year, since.month, since.day) if closest: - params['closest'] = closest + if isinstance(closest, datetime.datetime): + params['closest'] = '%04d%02d%02d' % (closest.year, closest.month, closest.day) + else: + params['closest'] = closest params['sort'] = "closest" #print(params, file=sys.stderr) rows = self._query_api(params) |