aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-25 18:18:19 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-25 18:18:19 -0700
commit6f382a4c07ccac68896f75d55835a8876981edbd (patch)
treea8c998a5183cb40e7f3a89c793db09731d8ac5db /python/tests
parent585e0ba76687ae2872faed88bd1edc5b051136ca (diff)
downloadsandcrawler-6f382a4c07ccac68896f75d55835a8876981edbd.tar.gz
sandcrawler-6f382a4c07ccac68896f75d55835a8876981edbd.zip
pdfextract support in ingest worker
Diffstat (limited to 'python/tests')
-rw-r--r--python/tests/test_ingest.py7
1 files changed, 7 insertions, 0 deletions
diff --git a/python/tests/test_ingest.py b/python/tests/test_ingest.py
index 33de35d..c2d6266 100644
--- a/python/tests/test_ingest.py
+++ b/python/tests/test_ingest.py
@@ -73,6 +73,10 @@ def test_ingest_success(ingest_worker_pdf):
'http://dummy-postgrest/grobid?sha1hex=eq.{}'.format("90ffd2359008d82298821d16b21778c5c39aec36"),
status=200,
body=json.dumps([]))
+ responses.add(responses.GET,
+ 'http://dummy-postgrest/pdf_meta?sha1hex=eq.{}'.format("90ffd2359008d82298821d16b21778c5c39aec36"),
+ status=200,
+ body=json.dumps([]))
responses.add(responses.POST,
'http://dummy-grobid/api/processFulltextDocument', status=200,
body=REAL_TEI_XML, content_type='text/xml')
@@ -99,6 +103,9 @@ def test_ingest_success(ingest_worker_pdf):
assert 'grobid_version' not in resp['grobid']['metadata']
assert 'fatcat_release' not in resp['grobid']['metadata']
assert not 'tei_xml' in resp['grobid']
+ assert resp['pdf_meta']['status'] == "success"
+ assert resp['pdf_meta']['pdf_extra']['page_count'] == 1
+ assert resp['pdf_meta'].get('text') is None
@responses.activate
def test_ingest_landing(ingest_worker):