From 357b1b07d071cce7d4cd2289ee3965018c89646c Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 10 Nov 2021 15:33:46 -0800 Subject: ingest: start re-processing GROBID with newer version --- python/sandcrawler/ingest_file.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/python/sandcrawler/ingest_file.py b/python/sandcrawler/ingest_file.py index 4a5abbe..b6a5115 100644 --- a/python/sandcrawler/ingest_file.py +++ b/python/sandcrawler/ingest_file.py @@ -361,8 +361,12 @@ class IngestFileWorker(SandcrawlerWorker): if self.try_existing_grobid: existing = self.pgrest_client.get_grobid(file_meta["sha1hex"]) if existing: - print("found existing GROBID result", file=sys.stderr) - return existing + # grobid_timestamp = existing.get("grobid_timestamp") or None + # status + grobid_version = existing.get("grobid_version") or None + if grobid_version and grobid_version.startswith("0.7"): + print("found existing GROBID result", file=sys.stderr) + return existing # Need to actually processes result = self.grobid_client.process_fulltext(resource.body) -- cgit v1.2.3