From 24185837a47f305757a5c783b95ca25b709f66e3 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 9 Jan 2020 17:31:08 -0800 Subject: refactor ingest to a loop, allowing multiple hops --- python/tests/test_ingest.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'python/tests') diff --git a/python/tests/test_ingest.py b/python/tests/test_ingest.py index 8692b21..f5599e9 100644 --- a/python/tests/test_ingest.py +++ b/python/tests/test_ingest.py @@ -109,12 +109,19 @@ def test_ingest_landing(ingest_worker): headers={"X-Archive-Src": "liveweb-whatever.warc.gz"}, body=WARC_BODY) + # this is for second time around; don't want to fetch same landing page + # HTML again and result in a loop + responses.add(responses.GET, + 'https://web.archive.org/web/{}id_/{}'.format("20180326070330", TARGET + "/redirect"), + status=200, + headers={"X-Archive-Src": "liveweb-whatever.warc.gz"}, + body="") + resp = ingest_worker.process(request) print(resp) assert resp['hit'] == False - assert resp['status'] == "wrong-mimetype" + assert resp['status'] == "no-pdf-link" assert resp['request'] == request assert 'grobid' not in resp - assert resp['terminal'] -- cgit v1.2.3