aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-11-22 00:03:25 -0800
committerBryan Newbold <bnewbold@archive.org>2018-11-22 00:03:25 -0800
commit5d86251aa32985c99c8e5941700728a796553c1f (patch)
tree243a5c49aa93eb6e1900ff0286d1ebe03bf85835
parent16f567d88cca7e79c36e4c06205861c7fe70bfa7 (diff)
downloadsandcrawler-5d86251aa32985c99c8e5941700728a796553c1f.tar.gz
sandcrawler-5d86251aa32985c99c8e5941700728a796553c1f.zip
fix ungrobid extraction tests
-rw-r--r--python/tests/test_extraction_ungrobided.py6
1 files changed, 4 insertions, 2 deletions
diff --git a/python/tests/test_extraction_ungrobided.py b/python/tests/test_extraction_ungrobided.py
index 366d392..cb46d29 100644
--- a/python/tests/test_extraction_ungrobided.py
+++ b/python/tests/test_extraction_ungrobided.py
@@ -73,7 +73,8 @@ def test_mapper_single_line(mock_fetch, job):
row = job.hb_table.row(b'sha1:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ')
assert struct.unpack("!q", row[b'file:size'])[0] == len(FAKE_PDF_BYTES)
- assert row[b'file:mime'] == b"application/pdf"
+ # file:mime should actually not get clobbered by GROBID updater
+ #assert row[b'file:mime'] == b"application/pdf"
assert struct.unpack("!q", row[b'grobid0:status_code'])[0] == 200
# TODO: assert row[b'grobid0:quality'] == None
status = json.loads(row[b'grobid0:status'].decode('utf-8'))
@@ -124,7 +125,8 @@ sha1:23RJIHUIOYY5747CR6YYCTMACXDCFYTT\t{"c": 1, "d": "2014-06-07T18:00:56", "f":
row = job.hb_table.row(b'sha1:23RJIHUIOYY5747CR6YYCTMACXDCFYTT')
assert struct.unpack("!q", row[b'file:size'])[0] == len(FAKE_PDF_BYTES)
- assert row[b'file:mime'] == b"application/pdf"
+ # file:mime should actually not get clobbered by GROBID updater
+ #assert row[b'file:mime'] == b"application/pdf"
assert struct.unpack("!q", row[b'grobid0:status_code'])[0] == 200
status = json.loads(row[b'grobid0:status'].decode('utf-8'))
assert type(status) == type(dict())