aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/ingest.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-11-08 14:28:24 -0800
committerBryan Newbold <bnewbold@archive.org>2020-11-08 14:28:24 -0800
commit5d525e9744303bf5ddcf673623483d4a6a787326 (patch)
tree14421cb165977aeeb80d652d582a65af7a44e304 /python/sandcrawler/ingest.py
parent5a9e8d9441662c508cf583114b9edc85cc608587 (diff)
downloadsandcrawler-5d525e9744303bf5ddcf673623483d4a6a787326.tar.gz
sandcrawler-5d525e9744303bf5ddcf673623483d4a6a787326.zip
html: start improving scope detection
Diffstat (limited to 'python/sandcrawler/ingest.py')
-rw-r--r--python/sandcrawler/ingest.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py
index 5d31d62..d95b8bf 100644
--- a/python/sandcrawler/ingest.py
+++ b/python/sandcrawler/ingest.py
@@ -353,7 +353,7 @@ class IngestFileWorker(SandcrawlerWorker):
if html_scope not in ('article-fulltext', 'unknown'):
html_body.pop("tei_xml", None)
return dict(
- status="html-body-wrong-scope",
+ status="wrong-scope",
html_biblio=html_biblio_dict,
html_scope=html_scope,
)