diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-12-15 19:08:44 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-12-15 19:08:44 -0800 |
commit | db5df5c7de7e691c5aab01b3a12bd7308df23b8d (patch) | |
tree | f3134cc1afac467e409f1f480c3ba93090026edf /python | |
parent | 327c71e8561e670e20bd8ad61afca522e4d9082b (diff) | |
download | sandcrawler-db5df5c7de7e691c5aab01b3a12bd7308df23b8d.tar.gz sandcrawler-db5df5c7de7e691c5aab01b3a12bd7308df23b8d.zip |
persist: don't expect HTML TEI-XML in result object
Diffstat (limited to 'python')
-rw-r--r-- | python/sandcrawler/persist.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py index f13b1f3..d840da2 100644 --- a/python/sandcrawler/persist.py +++ b/python/sandcrawler/persist.py @@ -168,7 +168,7 @@ class PersistIngestFileResultWorker(SandcrawlerWorker): sha1hex=file_meta["sha1hex"], status=record.get('status'), scope=record.get('scope'), - has_teixml=bool(html_body and html_body['status'] == 'success' and html_body.get('tei_xml')), + has_teixml=bool(html_body and html_body['status'] == 'success'), has_thumbnail=False, # TODO word_count=(html_body and html_body.get('word_count')) or None, biblio=record.get('html_biblio'), |