aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-01-15 13:54:02 -0800
committerBryan Newbold <bnewbold@archive.org>2020-01-15 13:54:02 -0800
commitd06fd45e3c86cb080ad7724f3fc7575750a9cd69 (patch)
tree12862548aa14870af3c710076a6df8441f2ddb4e /python/tests
parent4d0224f3e73315ef4db39643e6d4851e4a466658 (diff)
downloadsandcrawler-d06fd45e3c86cb080ad7724f3fc7575750a9cd69.tar.gz
sandcrawler-d06fd45e3c86cb080ad7724f3fc7575750a9cd69.zip
clarify ingest result schema and semantics
Diffstat (limited to 'python/tests')
-rw-r--r--python/tests/test_ingest.py22
-rw-r--r--python/tests/test_live_wayback.py2
2 files changed, 21 insertions, 3 deletions
diff --git a/python/tests/test_ingest.py b/python/tests/test_ingest.py
index 8f96a26..050e2ea 100644
--- a/python/tests/test_ingest.py
+++ b/python/tests/test_ingest.py
@@ -83,10 +83,22 @@ def test_ingest_success(ingest_worker_pdf):
assert resp['hit'] == True
assert resp['status'] == "success"
assert resp['request'] == request
- assert resp['file_meta']['size_bytes']
- assert resp['grobid']
+ assert resp['terminal']['terminal_sha1hex'] == resp['file_meta']['sha1hex']
+ assert type(resp['terminal']['terminal_dt']) == str
+ assert resp['terminal']['terminal_url'] == TARGET + "/redirect"
+ assert resp['terminal']['terminal_status_code']
+ assert type(resp['file_meta']['size_bytes']) == int
+ assert resp['file_meta']['mimetype'] == "application/pdf"
+ assert resp['cdx']['url'] == TARGET + "/redirect"
+ assert 'warc_path' not in resp['cdx']
+ assert 'revisit_cdx' not in resp
+ assert resp['grobid']['status'] == "success"
+ assert resp['grobid']['status_code'] == 200
+ assert resp['grobid']['grobid_version']
+ assert 'fatcat_release' in resp['grobid']
+ assert 'grobid_version' not in resp['grobid']['metadata']
+ assert 'fatcat_release' not in resp['grobid']['metadata']
assert not 'tei_xml' in resp['grobid']
- assert resp['terminal']
@responses.activate
def test_ingest_landing(ingest_worker):
@@ -131,5 +143,9 @@ def test_ingest_landing(ingest_worker):
assert resp['hit'] == False
assert resp['status'] == "no-pdf-link"
assert resp['request'] == request
+ assert 'terminal' in resp
+ assert 'file_meta' not in resp
+ assert 'cdx' not in resp
+ assert 'revisit_cdx' not in resp
assert 'grobid' not in resp
diff --git a/python/tests/test_live_wayback.py b/python/tests/test_live_wayback.py
index 4f7daef..429c6b0 100644
--- a/python/tests/test_live_wayback.py
+++ b/python/tests/test_live_wayback.py
@@ -132,6 +132,8 @@ def test_lookup_ftp(wayback_client):
assert resp.terminal_url == url
assert resp.terminal_status_code == 226
assert resp.cdx.url == url
+ assert resp.revisit_cdx
+ assert resp.revisit_cdx.url != url
file_meta = gen_file_metadata(resp.body)
assert file_meta['sha1hex'] == resp.cdx.sha1hex