diff options
author | Bryan Newbold <bnewbold@archive.org> | 2022-07-20 20:41:50 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2022-07-20 20:41:50 -0700 |
commit | 9fa29c80f1fdc14255ed53d0fd46291fe31f19ce (patch) | |
tree | 96816fa486a387ca0046c081943932b0a0ee9d72 | |
parent | 966df43c77581770df4d83d37afe8ead41d51abb (diff) | |
download | sandcrawler-9fa29c80f1fdc14255ed53d0fd46291fe31f19ce.tar.gz sandcrawler-9fa29c80f1fdc14255ed53d0fd46291fe31f19ce.zip |
ingest: bump max-hops from 6 to 8
-rw-r--r-- | python/sandcrawler/ingest_file.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/ingest_file.py b/python/sandcrawler/ingest_file.py index cf87fff..3102ec2 100644 --- a/python/sandcrawler/ingest_file.py +++ b/python/sandcrawler/ingest_file.py @@ -104,7 +104,7 @@ class IngestFileWorker(SandcrawlerWorker): self.pdftext_sink = kwargs.get("pdftext_sink") self.xmldoc_sink = kwargs.get("xmldoc_sink") self.htmlteixml_sink = kwargs.get("htmlteixml_sink") - self.max_hops = 6 + self.max_hops = 8 self.try_existing_ingest = kwargs.get("try_existing_ingest", False) self.try_existing_grobid = kwargs.get("try_existing_grobid", True) |