aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests/test_grobid.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-01-08 19:28:29 -0800
committerBryan Newbold <bnewbold@archive.org>2020-01-09 16:29:37 -0800
commita6f2067e288fc235375af4fed12c5782a82856dc (patch)
tree1946309d62751cc6cffb3d468533d2cf171206e9 /python/tests/test_grobid.py
parentcc9c911e714bc29b1ccd9133d56bec3960752a07 (diff)
downloadsandcrawler-a6f2067e288fc235375af4fed12c5782a82856dc.tar.gz
sandcrawler-a6f2067e288fc235375af4fed12c5782a82856dc.zip
fix grobid test (ISO-8859-1 encoding)
Also changes for wayback refactor
Diffstat (limited to 'python/tests/test_grobid.py')
-rw-r--r--python/tests/test_grobid.py10
1 files changed, 4 insertions, 6 deletions
diff --git a/python/tests/test_grobid.py b/python/tests/test_grobid.py
index 330c384..8c5e080 100644
--- a/python/tests/test_grobid.py
+++ b/python/tests/test_grobid.py
@@ -4,7 +4,7 @@ import struct
import responses
from sandcrawler import GrobidClient, GrobidWorker, CdxLinePusher, BlackholeSink, WaybackClient
-from test_wayback import *
+from test_wayback import wayback_client, cdx_client
FAKE_PDF_BYTES = b"%PDF SOME JUNK" + struct.pack("!q", 112853843)
@@ -36,7 +36,6 @@ def test_grobid_503(grobid_client):
assert resp['status'] == "error"
@responses.activate
-@pytest.mark.skip(reason="XXX: need to fix unicode/bytes something something")
def test_grobid_success(grobid_client):
responses.add(responses.POST,
@@ -50,10 +49,9 @@ def test_grobid_success(grobid_client):
assert resp['status_code'] == 200
assert resp['status'] == "success"
- print(type(resp['tei_xml']))
- print(type(REAL_TEI_XML))
- assert resp['tei_xml'] == REAL_TEI_XML.decode('utf-8')
- #assert resp['tei_xml'].split('\n')[:3] == REAL_TEI_XML.split('\n')[:3]
+ #print(type(resp['tei_xml']))
+ #print(type(REAL_TEI_XML))
+ assert resp['tei_xml'] == REAL_TEI_XML.decode('ISO-8859-1')
@responses.activate
def test_grobid_worker_cdx(grobid_client, wayback_client):