diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 12:54:37 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 12:54:37 -0700 |
commit | 05bd7cbcc62588e431c5efd533189e246b2a997e (patch) | |
tree | abcc707a451e77ea1e8c5ac9a5925b97a4bd139a /python/tests/test_grobid.py | |
parent | f3f424e42f2f4f383103cf80b30a00cfa6cfc179 (diff) | |
download | sandcrawler-05bd7cbcc62588e431c5efd533189e246b2a997e.tar.gz sandcrawler-05bd7cbcc62588e431c5efd533189e246b2a997e.zip |
make fmt
Diffstat (limited to 'python/tests/test_grobid.py')
-rw-r--r-- | python/tests/test_grobid.py | 27 |
1 files changed, 16 insertions, 11 deletions
diff --git a/python/tests/test_grobid.py b/python/tests/test_grobid.py index 7d950df..55636dc 100644 --- a/python/tests/test_grobid.py +++ b/python/tests/test_grobid.py @@ -1,4 +1,3 @@ - import struct import pytest @@ -12,20 +11,21 @@ FAKE_PDF_BYTES = b"%PDF SOME JUNK" + struct.pack("!q", 112853843) with open('tests/files/23b29ea36382680716be08fc71aa81bd226e8a85.xml', 'rb') as f: REAL_TEI_XML = f.read() + @pytest.fixture def grobid_client(): - client = GrobidClient( - host_url="http://dummy-grobid", - ) + client = GrobidClient(host_url="http://dummy-grobid", ) return client + @responses.activate def test_grobid_503(grobid_client): status = b'{"status": "done broke due to 503"}' responses.add(responses.POST, - 'http://dummy-grobid/api/processFulltextDocument', status=503, - body=status) + 'http://dummy-grobid/api/processFulltextDocument', + status=503, + body=status) resp = grobid_client.process_fulltext(FAKE_PDF_BYTES) @@ -35,12 +35,15 @@ def test_grobid_503(grobid_client): assert resp['status_code'] == 503 assert resp['status'] == "error" + @responses.activate def test_grobid_success(grobid_client): responses.add(responses.POST, - 'http://dummy-grobid/api/processFulltextDocument', status=200, - body=REAL_TEI_XML, content_type='text/xml') + 'http://dummy-grobid/api/processFulltextDocument', + status=200, + body=REAL_TEI_XML, + content_type='text/xml') resp = grobid_client.process_fulltext(FAKE_PDF_BYTES) @@ -53,6 +56,7 @@ def test_grobid_success(grobid_client): #print(type(REAL_TEI_XML)) assert resp['tei_xml'] == REAL_TEI_XML.decode('ISO-8859-1') + @responses.activate def test_grobid_worker_cdx(grobid_client, wayback_client): @@ -60,8 +64,10 @@ def test_grobid_worker_cdx(grobid_client, wayback_client): worker = GrobidWorker(grobid_client, wayback_client, sink=sink) responses.add(responses.POST, - 'http://dummy-grobid/api/processFulltextDocument', status=200, - body=REAL_TEI_XML, content_type='text/xml') + 'http://dummy-grobid/api/processFulltextDocument', + status=200, + body=REAL_TEI_XML, + content_type='text/xml') with open('tests/files/example.cdx', 'r') as cdx_file: pusher = CdxLinePusher( @@ -76,4 +82,3 @@ def test_grobid_worker_cdx(grobid_client, wayback_client): assert pusher_counts['pushed'] == worker.counts['total'] assert len(responses.calls) == worker.counts['total'] - |