From a6f2067e288fc235375af4fed12c5782a82856dc Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 8 Jan 2020 19:28:29 -0800 Subject: fix grobid test (ISO-8859-1 encoding) Also changes for wayback refactor --- python/tests/test_grobid.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'python/tests/test_grobid.py') diff --git a/python/tests/test_grobid.py b/python/tests/test_grobid.py index 330c384..8c5e080 100644 --- a/python/tests/test_grobid.py +++ b/python/tests/test_grobid.py @@ -4,7 +4,7 @@ import struct import responses from sandcrawler import GrobidClient, GrobidWorker, CdxLinePusher, BlackholeSink, WaybackClient -from test_wayback import * +from test_wayback import wayback_client, cdx_client FAKE_PDF_BYTES = b"%PDF SOME JUNK" + struct.pack("!q", 112853843) @@ -36,7 +36,6 @@ def test_grobid_503(grobid_client): assert resp['status'] == "error" @responses.activate -@pytest.mark.skip(reason="XXX: need to fix unicode/bytes something something") def test_grobid_success(grobid_client): responses.add(responses.POST, @@ -50,10 +49,9 @@ def test_grobid_success(grobid_client): assert resp['status_code'] == 200 assert resp['status'] == "success" - print(type(resp['tei_xml'])) - print(type(REAL_TEI_XML)) - assert resp['tei_xml'] == REAL_TEI_XML.decode('utf-8') - #assert resp['tei_xml'].split('\n')[:3] == REAL_TEI_XML.split('\n')[:3] + #print(type(resp['tei_xml'])) + #print(type(REAL_TEI_XML)) + assert resp['tei_xml'] == REAL_TEI_XML.decode('ISO-8859-1') @responses.activate def test_grobid_worker_cdx(grobid_client, wayback_client): -- cgit v1.2.3