diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-11-03 11:26:40 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-11-03 11:26:40 -0800 |
commit | bc22da30e379221c31ce466f76e1e3f048d3bcc8 (patch) | |
tree | b65af780c25661b7aef63f53fdb02c1bc7dbfb6e /python | |
parent | 806967ca168bcdbf2e57699703904333c21d4a2f (diff) | |
download | sandcrawler-bc22da30e379221c31ce466f76e1e3f048d3bcc8.tar.gz sandcrawler-bc22da30e379221c31ce466f76e1e3f048d3bcc8.zip |
cdx: fix 'closest' support
Diffstat (limited to 'python')
-rw-r--r-- | python/sandcrawler/ia.py | 5 |
1 files changed, 2 insertions, 3 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 664bd20..b1f90ea 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -273,10 +273,8 @@ class CdxApiClient: if closest: params['closest'] = closest params['sort'] = "closest" - print(params) + #print(params, file=sys.stderr) rows = self._query_api(params) - for r in rows: - print(f" {r.datetime}") if not rows: return None @@ -984,6 +982,7 @@ class SavePageNowClient: elsevier_pdf_cdx = wayback_client.cdx_client.lookup_best( spn_result.request_url, best_mimetype="application/pdf", + closest=closest, ) if elsevier_pdf_cdx and elsevier_pdf_cdx.mimetype == "application/pdf": print("Trying pdf.sciencedirectassets.com hack!", file=sys.stderr) |