aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/ingest_file.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-26 16:59:32 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-26 16:59:32 -0700
commit4a46f166f8514b5620d2bcb13a5c5f3e6cee66c8 (patch)
treea15635b11ca66d5cdbbc1c3f6eaa73fd5fe35801 /python/sandcrawler/ingest_file.py
parentf08bbeb7981fd692ffc9277d15d282883a408051 (diff)
downloadsandcrawler-4a46f166f8514b5620d2bcb13a5c5f3e6cee66c8.tar.gz
sandcrawler-4a46f166f8514b5620d2bcb13a5c5f3e6cee66c8.zip
more progress on type annotations and linting
Diffstat (limited to 'python/sandcrawler/ingest_file.py')
-rw-r--r--python/sandcrawler/ingest_file.py2
1 files changed, 2 insertions, 0 deletions
diff --git a/python/sandcrawler/ingest_file.py b/python/sandcrawler/ingest_file.py
index bc8643b..281c6d3 100644
--- a/python/sandcrawler/ingest_file.py
+++ b/python/sandcrawler/ingest_file.py
@@ -364,6 +364,8 @@ class IngestFileWorker(SandcrawlerWorker):
# Need to actually processes
result = process_pdf(resource.body)
+ assert result.sha1hex == file_meta['sha1hex']
+ assert result.file_meta is not None
assert result.file_meta['sha1hex'] == file_meta['sha1hex']
if self.thumbnail_sink and result.page0_thumbnail is not None:
self.thumbnail_sink.push_record(result.page0_thumbnail, key=result.sha1hex)