diff options
author | Bryan Newbold <bnewbold@archive.org> | 2018-11-22 00:03:25 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2018-11-22 00:03:25 -0800 |
commit | 5d86251aa32985c99c8e5941700728a796553c1f (patch) | |
tree | 243a5c49aa93eb6e1900ff0286d1ebe03bf85835 | |
parent | 16f567d88cca7e79c36e4c06205861c7fe70bfa7 (diff) | |
download | sandcrawler-5d86251aa32985c99c8e5941700728a796553c1f.tar.gz sandcrawler-5d86251aa32985c99c8e5941700728a796553c1f.zip |
fix ungrobid extraction tests
-rw-r--r-- | python/tests/test_extraction_ungrobided.py | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/python/tests/test_extraction_ungrobided.py b/python/tests/test_extraction_ungrobided.py index 366d392..cb46d29 100644 --- a/python/tests/test_extraction_ungrobided.py +++ b/python/tests/test_extraction_ungrobided.py @@ -73,7 +73,8 @@ def test_mapper_single_line(mock_fetch, job): row = job.hb_table.row(b'sha1:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ') assert struct.unpack("!q", row[b'file:size'])[0] == len(FAKE_PDF_BYTES) - assert row[b'file:mime'] == b"application/pdf" + # file:mime should actually not get clobbered by GROBID updater + #assert row[b'file:mime'] == b"application/pdf" assert struct.unpack("!q", row[b'grobid0:status_code'])[0] == 200 # TODO: assert row[b'grobid0:quality'] == None status = json.loads(row[b'grobid0:status'].decode('utf-8')) @@ -124,7 +125,8 @@ sha1:23RJIHUIOYY5747CR6YYCTMACXDCFYTT\t{"c": 1, "d": "2014-06-07T18:00:56", "f": row = job.hb_table.row(b'sha1:23RJIHUIOYY5747CR6YYCTMACXDCFYTT') assert struct.unpack("!q", row[b'file:size'])[0] == len(FAKE_PDF_BYTES) - assert row[b'file:mime'] == b"application/pdf" + # file:mime should actually not get clobbered by GROBID updater + #assert row[b'file:mime'] == b"application/pdf" assert struct.unpack("!q", row[b'grobid0:status_code'])[0] == 200 status = json.loads(row[b'grobid0:status'].decode('utf-8')) assert type(status) == type(dict()) |