aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/ia.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-11-03 11:26:40 -0800
committerBryan Newbold <bnewbold@archive.org>2020-11-03 11:26:40 -0800
commitbc22da30e379221c31ce466f76e1e3f048d3bcc8 (patch)
treeb65af780c25661b7aef63f53fdb02c1bc7dbfb6e /python/sandcrawler/ia.py
parent806967ca168bcdbf2e57699703904333c21d4a2f (diff)
downloadsandcrawler-bc22da30e379221c31ce466f76e1e3f048d3bcc8.tar.gz
sandcrawler-bc22da30e379221c31ce466f76e1e3f048d3bcc8.zip
cdx: fix 'closest' support
Diffstat (limited to 'python/sandcrawler/ia.py')
-rw-r--r--python/sandcrawler/ia.py5
1 files changed, 2 insertions, 3 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index 664bd20..b1f90ea 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -273,10 +273,8 @@ class CdxApiClient:
if closest:
params['closest'] = closest
params['sort'] = "closest"
- print(params)
+ #print(params, file=sys.stderr)
rows = self._query_api(params)
- for r in rows:
- print(f" {r.datetime}")
if not rows:
return None
@@ -984,6 +982,7 @@ class SavePageNowClient:
elsevier_pdf_cdx = wayback_client.cdx_client.lookup_best(
spn_result.request_url,
best_mimetype="application/pdf",
+ closest=closest,
)
if elsevier_pdf_cdx and elsevier_pdf_cdx.mimetype == "application/pdf":
print("Trying pdf.sciencedirectassets.com hack!", file=sys.stderr)