aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-26 16:59:32 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-26 16:59:32 -0700
commit4a46f166f8514b5620d2bcb13a5c5f3e6cee66c8 (patch)
treea15635b11ca66d5cdbbc1c3f6eaa73fd5fe35801 /python/tests
parentf08bbeb7981fd692ffc9277d15d282883a408051 (diff)
downloadsandcrawler-4a46f166f8514b5620d2bcb13a5c5f3e6cee66c8.tar.gz
sandcrawler-4a46f166f8514b5620d2bcb13a5c5f3e6cee66c8.zip
more progress on type annotations and linting
Diffstat (limited to 'python/tests')
-rw-r--r--python/tests/test_ingest.py2
-rw-r--r--python/tests/test_misc.py2
2 files changed, 2 insertions, 2 deletions
diff --git a/python/tests/test_ingest.py b/python/tests/test_ingest.py
index f2318c2..617f2b4 100644
--- a/python/tests/test_ingest.py
+++ b/python/tests/test_ingest.py
@@ -105,7 +105,7 @@ def test_ingest_success(ingest_worker_pdf):
assert 'fatcat_release' in resp['grobid']
assert 'grobid_version' not in resp['grobid']['metadata']
assert 'fatcat_release' not in resp['grobid']['metadata']
- assert not 'tei_xml' in resp['grobid']
+ assert 'tei_xml' not in resp['grobid']
assert resp['pdf_meta']['status'] == "success"
assert resp['pdf_meta']['pdf_extra']['page_count'] == 1
assert resp['pdf_meta'].get('text') is None
diff --git a/python/tests/test_misc.py b/python/tests/test_misc.py
index 7d3e755..5830dc9 100644
--- a/python/tests/test_misc.py
+++ b/python/tests/test_misc.py
@@ -87,7 +87,7 @@ def test_invalid_cdx():
print("bad datetime")
raw = "edu,upenn,ldc)/sites/www.ldc.upenn.edu/files/medar2009-large-arabic-broadcast-collection.pdf 2070828233154 https://www.ldc.upenn.edu/sites/www.ldc.upenn.edu/files/medar2009-large-arabic-broadcast-collection.pdf application/pdf 200 WL3FEA62TEU4F52Y5DOVQ62VET4QJW7G - - 210251 931661233i SEMSCHOLAR-PDF-CRAWL-2017-08-04-20170828231135742-00000-00009-wbgrp-svc284/SEMSCHOLAR-PDF-CRAWL-2017-08-04-20170828232253025-00005-3480~wbgrp-svc284.us.archive.org~8443.warc.gz"
- assert parse_cdx_line(raw) == None
+ assert parse_cdx_line(raw) is None
def test_clean_url():