aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
Diffstat (limited to 'python')
-rw-r--r--python/sandcrawler/ingest_file.py8
1 files changed, 6 insertions, 2 deletions
diff --git a/python/sandcrawler/ingest_file.py b/python/sandcrawler/ingest_file.py
index 4a5abbe..b6a5115 100644
--- a/python/sandcrawler/ingest_file.py
+++ b/python/sandcrawler/ingest_file.py
@@ -361,8 +361,12 @@ class IngestFileWorker(SandcrawlerWorker):
if self.try_existing_grobid:
existing = self.pgrest_client.get_grobid(file_meta["sha1hex"])
if existing:
- print("found existing GROBID result", file=sys.stderr)
- return existing
+ # grobid_timestamp = existing.get("grobid_timestamp") or None
+ # status
+ grobid_version = existing.get("grobid_version") or None
+ if grobid_version and grobid_version.startswith("0.7"):
+ print("found existing GROBID result", file=sys.stderr)
+ return existing
# Need to actually processes
result = self.grobid_client.process_fulltext(resource.body)