aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-12-15 19:08:44 -0800
committerBryan Newbold <bnewbold@archive.org>2020-12-15 19:08:44 -0800
commitdb5df5c7de7e691c5aab01b3a12bd7308df23b8d (patch)
treef3134cc1afac467e409f1f480c3ba93090026edf /python
parent327c71e8561e670e20bd8ad61afca522e4d9082b (diff)
downloadsandcrawler-db5df5c7de7e691c5aab01b3a12bd7308df23b8d.tar.gz
sandcrawler-db5df5c7de7e691c5aab01b3a12bd7308df23b8d.zip
persist: don't expect HTML TEI-XML in result object
Diffstat (limited to 'python')
-rw-r--r--python/sandcrawler/persist.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py
index f13b1f3..d840da2 100644
--- a/python/sandcrawler/persist.py
+++ b/python/sandcrawler/persist.py
@@ -168,7 +168,7 @@ class PersistIngestFileResultWorker(SandcrawlerWorker):
sha1hex=file_meta["sha1hex"],
status=record.get('status'),
scope=record.get('scope'),
- has_teixml=bool(html_body and html_body['status'] == 'success' and html_body.get('tei_xml')),
+ has_teixml=bool(html_body and html_body['status'] == 'success'),
has_thumbnail=False, # TODO
word_count=(html_body and html_body.get('word_count')) or None,
biblio=record.get('html_biblio'),