diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-11-08 14:28:24 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-11-08 14:28:24 -0800 |
commit | 5d525e9744303bf5ddcf673623483d4a6a787326 (patch) | |
tree | 14421cb165977aeeb80d652d582a65af7a44e304 /python/sandcrawler/ingest.py | |
parent | 5a9e8d9441662c508cf583114b9edc85cc608587 (diff) | |
download | sandcrawler-5d525e9744303bf5ddcf673623483d4a6a787326.tar.gz sandcrawler-5d525e9744303bf5ddcf673623483d4a6a787326.zip |
html: start improving scope detection
Diffstat (limited to 'python/sandcrawler/ingest.py')
-rw-r--r-- | python/sandcrawler/ingest.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py index 5d31d62..d95b8bf 100644 --- a/python/sandcrawler/ingest.py +++ b/python/sandcrawler/ingest.py @@ -353,7 +353,7 @@ class IngestFileWorker(SandcrawlerWorker): if html_scope not in ('article-fulltext', 'unknown'): html_body.pop("tei_xml", None) return dict( - status="html-body-wrong-scope", + status="wrong-scope", html_biblio=html_biblio_dict, html_scope=html_scope, ) |