diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 16:59:32 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 16:59:32 -0700 |
commit | 4a46f166f8514b5620d2bcb13a5c5f3e6cee66c8 (patch) | |
tree | a15635b11ca66d5cdbbc1c3f6eaa73fd5fe35801 /python/tests | |
parent | f08bbeb7981fd692ffc9277d15d282883a408051 (diff) | |
download | sandcrawler-4a46f166f8514b5620d2bcb13a5c5f3e6cee66c8.tar.gz sandcrawler-4a46f166f8514b5620d2bcb13a5c5f3e6cee66c8.zip |
more progress on type annotations and linting
Diffstat (limited to 'python/tests')
-rw-r--r-- | python/tests/test_ingest.py | 2 | ||||
-rw-r--r-- | python/tests/test_misc.py | 2 |
2 files changed, 2 insertions, 2 deletions
diff --git a/python/tests/test_ingest.py b/python/tests/test_ingest.py index f2318c2..617f2b4 100644 --- a/python/tests/test_ingest.py +++ b/python/tests/test_ingest.py @@ -105,7 +105,7 @@ def test_ingest_success(ingest_worker_pdf): assert 'fatcat_release' in resp['grobid'] assert 'grobid_version' not in resp['grobid']['metadata'] assert 'fatcat_release' not in resp['grobid']['metadata'] - assert not 'tei_xml' in resp['grobid'] + assert 'tei_xml' not in resp['grobid'] assert resp['pdf_meta']['status'] == "success" assert resp['pdf_meta']['pdf_extra']['page_count'] == 1 assert resp['pdf_meta'].get('text') is None diff --git a/python/tests/test_misc.py b/python/tests/test_misc.py index 7d3e755..5830dc9 100644 --- a/python/tests/test_misc.py +++ b/python/tests/test_misc.py @@ -87,7 +87,7 @@ def test_invalid_cdx(): print("bad datetime") raw = "edu,upenn,ldc)/sites/www.ldc.upenn.edu/files/medar2009-large-arabic-broadcast-collection.pdf 2070828233154 https://www.ldc.upenn.edu/sites/www.ldc.upenn.edu/files/medar2009-large-arabic-broadcast-collection.pdf application/pdf 200 WL3FEA62TEU4F52Y5DOVQ62VET4QJW7G - - 210251 931661233i SEMSCHOLAR-PDF-CRAWL-2017-08-04-20170828231135742-00000-00009-wbgrp-svc284/SEMSCHOLAR-PDF-CRAWL-2017-08-04-20170828232253025-00005-3480~wbgrp-svc284.us.archive.org~8443.warc.gz" - assert parse_cdx_line(raw) == None + assert parse_cdx_line(raw) is None def test_clean_url(): |