From d06fd45e3c86cb080ad7724f3fc7575750a9cd69 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 15 Jan 2020 13:54:02 -0800 Subject: clarify ingest result schema and semantics --- python/tests/test_ingest.py | 22 +++++++++++++++++++--- python/tests/test_live_wayback.py | 2 ++ 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'python/tests') diff --git a/python/tests/test_ingest.py b/python/tests/test_ingest.py index 8f96a26..050e2ea 100644 --- a/python/tests/test_ingest.py +++ b/python/tests/test_ingest.py @@ -83,10 +83,22 @@ def test_ingest_success(ingest_worker_pdf): assert resp['hit'] == True assert resp['status'] == "success" assert resp['request'] == request - assert resp['file_meta']['size_bytes'] - assert resp['grobid'] + assert resp['terminal']['terminal_sha1hex'] == resp['file_meta']['sha1hex'] + assert type(resp['terminal']['terminal_dt']) == str + assert resp['terminal']['terminal_url'] == TARGET + "/redirect" + assert resp['terminal']['terminal_status_code'] + assert type(resp['file_meta']['size_bytes']) == int + assert resp['file_meta']['mimetype'] == "application/pdf" + assert resp['cdx']['url'] == TARGET + "/redirect" + assert 'warc_path' not in resp['cdx'] + assert 'revisit_cdx' not in resp + assert resp['grobid']['status'] == "success" + assert resp['grobid']['status_code'] == 200 + assert resp['grobid']['grobid_version'] + assert 'fatcat_release' in resp['grobid'] + assert 'grobid_version' not in resp['grobid']['metadata'] + assert 'fatcat_release' not in resp['grobid']['metadata'] assert not 'tei_xml' in resp['grobid'] - assert resp['terminal'] @responses.activate def test_ingest_landing(ingest_worker): @@ -131,5 +143,9 @@ def test_ingest_landing(ingest_worker): assert resp['hit'] == False assert resp['status'] == "no-pdf-link" assert resp['request'] == request + assert 'terminal' in resp + assert 'file_meta' not in resp + assert 'cdx' not in resp + assert 'revisit_cdx' not in resp assert 'grobid' not in resp diff --git a/python/tests/test_live_wayback.py b/python/tests/test_live_wayback.py index 4f7daef..429c6b0 100644 --- a/python/tests/test_live_wayback.py +++ b/python/tests/test_live_wayback.py @@ -132,6 +132,8 @@ def test_lookup_ftp(wayback_client): assert resp.terminal_url == url assert resp.terminal_status_code == 226 assert resp.cdx.url == url + assert resp.revisit_cdx + assert resp.revisit_cdx.url != url file_meta = gen_file_metadata(resp.body) assert file_meta['sha1hex'] == resp.cdx.sha1hex -- cgit v1.2.3