diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-01-15 13:54:02 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-01-15 13:54:02 -0800 |
commit | d06fd45e3c86cb080ad7724f3fc7575750a9cd69 (patch) | |
tree | 12862548aa14870af3c710076a6df8441f2ddb4e /python/tests | |
parent | 4d0224f3e73315ef4db39643e6d4851e4a466658 (diff) | |
download | sandcrawler-d06fd45e3c86cb080ad7724f3fc7575750a9cd69.tar.gz sandcrawler-d06fd45e3c86cb080ad7724f3fc7575750a9cd69.zip |
clarify ingest result schema and semantics
Diffstat (limited to 'python/tests')
-rw-r--r-- | python/tests/test_ingest.py | 22 | ||||
-rw-r--r-- | python/tests/test_live_wayback.py | 2 |
2 files changed, 21 insertions, 3 deletions
diff --git a/python/tests/test_ingest.py b/python/tests/test_ingest.py index 8f96a26..050e2ea 100644 --- a/python/tests/test_ingest.py +++ b/python/tests/test_ingest.py @@ -83,10 +83,22 @@ def test_ingest_success(ingest_worker_pdf): assert resp['hit'] == True assert resp['status'] == "success" assert resp['request'] == request - assert resp['file_meta']['size_bytes'] - assert resp['grobid'] + assert resp['terminal']['terminal_sha1hex'] == resp['file_meta']['sha1hex'] + assert type(resp['terminal']['terminal_dt']) == str + assert resp['terminal']['terminal_url'] == TARGET + "/redirect" + assert resp['terminal']['terminal_status_code'] + assert type(resp['file_meta']['size_bytes']) == int + assert resp['file_meta']['mimetype'] == "application/pdf" + assert resp['cdx']['url'] == TARGET + "/redirect" + assert 'warc_path' not in resp['cdx'] + assert 'revisit_cdx' not in resp + assert resp['grobid']['status'] == "success" + assert resp['grobid']['status_code'] == 200 + assert resp['grobid']['grobid_version'] + assert 'fatcat_release' in resp['grobid'] + assert 'grobid_version' not in resp['grobid']['metadata'] + assert 'fatcat_release' not in resp['grobid']['metadata'] assert not 'tei_xml' in resp['grobid'] - assert resp['terminal'] @responses.activate def test_ingest_landing(ingest_worker): @@ -131,5 +143,9 @@ def test_ingest_landing(ingest_worker): assert resp['hit'] == False assert resp['status'] == "no-pdf-link" assert resp['request'] == request + assert 'terminal' in resp + assert 'file_meta' not in resp + assert 'cdx' not in resp + assert 'revisit_cdx' not in resp assert 'grobid' not in resp diff --git a/python/tests/test_live_wayback.py b/python/tests/test_live_wayback.py index 4f7daef..429c6b0 100644 --- a/python/tests/test_live_wayback.py +++ b/python/tests/test_live_wayback.py @@ -132,6 +132,8 @@ def test_lookup_ftp(wayback_client): assert resp.terminal_url == url assert resp.terminal_status_code == 226 assert resp.cdx.url == url + assert resp.revisit_cdx + assert resp.revisit_cdx.url != url file_meta = gen_file_metadata(resp.body) assert file_meta['sha1hex'] == resp.cdx.sha1hex |