diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-01-08 19:28:29 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-01-09 16:29:37 -0800 |
commit | a6f2067e288fc235375af4fed12c5782a82856dc (patch) | |
tree | 1946309d62751cc6cffb3d468533d2cf171206e9 /python/tests | |
parent | cc9c911e714bc29b1ccd9133d56bec3960752a07 (diff) | |
download | sandcrawler-a6f2067e288fc235375af4fed12c5782a82856dc.tar.gz sandcrawler-a6f2067e288fc235375af4fed12c5782a82856dc.zip |
fix grobid test (ISO-8859-1 encoding)
Also changes for wayback refactor
Diffstat (limited to 'python/tests')
-rw-r--r-- | python/tests/test_grobid.py | 10 |
1 files changed, 4 insertions, 6 deletions
diff --git a/python/tests/test_grobid.py b/python/tests/test_grobid.py index 330c384..8c5e080 100644 --- a/python/tests/test_grobid.py +++ b/python/tests/test_grobid.py @@ -4,7 +4,7 @@ import struct import responses from sandcrawler import GrobidClient, GrobidWorker, CdxLinePusher, BlackholeSink, WaybackClient -from test_wayback import * +from test_wayback import wayback_client, cdx_client FAKE_PDF_BYTES = b"%PDF SOME JUNK" + struct.pack("!q", 112853843) @@ -36,7 +36,6 @@ def test_grobid_503(grobid_client): assert resp['status'] == "error" @responses.activate -@pytest.mark.skip(reason="XXX: need to fix unicode/bytes something something") def test_grobid_success(grobid_client): responses.add(responses.POST, @@ -50,10 +49,9 @@ def test_grobid_success(grobid_client): assert resp['status_code'] == 200 assert resp['status'] == "success" - print(type(resp['tei_xml'])) - print(type(REAL_TEI_XML)) - assert resp['tei_xml'] == REAL_TEI_XML.decode('utf-8') - #assert resp['tei_xml'].split('\n')[:3] == REAL_TEI_XML.split('\n')[:3] + #print(type(resp['tei_xml'])) + #print(type(REAL_TEI_XML)) + assert resp['tei_xml'] == REAL_TEI_XML.decode('ISO-8859-1') @responses.activate def test_grobid_worker_cdx(grobid_client, wayback_client): |