diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-25 18:18:19 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-25 18:18:19 -0700 |
commit | 6f382a4c07ccac68896f75d55835a8876981edbd (patch) | |
tree | a8c998a5183cb40e7f3a89c793db09731d8ac5db /python/tests | |
parent | 585e0ba76687ae2872faed88bd1edc5b051136ca (diff) | |
download | sandcrawler-6f382a4c07ccac68896f75d55835a8876981edbd.tar.gz sandcrawler-6f382a4c07ccac68896f75d55835a8876981edbd.zip |
pdfextract support in ingest worker
Diffstat (limited to 'python/tests')
-rw-r--r-- | python/tests/test_ingest.py | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/python/tests/test_ingest.py b/python/tests/test_ingest.py index 33de35d..c2d6266 100644 --- a/python/tests/test_ingest.py +++ b/python/tests/test_ingest.py @@ -73,6 +73,10 @@ def test_ingest_success(ingest_worker_pdf): 'http://dummy-postgrest/grobid?sha1hex=eq.{}'.format("90ffd2359008d82298821d16b21778c5c39aec36"), status=200, body=json.dumps([])) + responses.add(responses.GET, + 'http://dummy-postgrest/pdf_meta?sha1hex=eq.{}'.format("90ffd2359008d82298821d16b21778c5c39aec36"), + status=200, + body=json.dumps([])) responses.add(responses.POST, 'http://dummy-grobid/api/processFulltextDocument', status=200, body=REAL_TEI_XML, content_type='text/xml') @@ -99,6 +103,9 @@ def test_ingest_success(ingest_worker_pdf): assert 'grobid_version' not in resp['grobid']['metadata'] assert 'fatcat_release' not in resp['grobid']['metadata'] assert not 'tei_xml' in resp['grobid'] + assert resp['pdf_meta']['status'] == "success" + assert resp['pdf_meta']['pdf_extra']['page_count'] == 1 + assert resp['pdf_meta'].get('text') is None @responses.activate def test_ingest_landing(ingest_worker): |