From a320544e6cf2f174558492d3ab09f152f65ac4d4 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 26 Jul 2021 09:41:31 -0700 Subject: ingest: fix postgrest lookup bug (double get of GROBID) --- python/sandcrawler/ingest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py index 290bebc..3fa34e3 100644 --- a/python/sandcrawler/ingest.py +++ b/python/sandcrawler/ingest.py @@ -225,7 +225,7 @@ class IngestFileWorker(SandcrawlerWorker): """ raise NotImplementedError("process_existing() not tested or safe yet") assert result_row['hit'] - existing_file_meta = self.pgrest_client.get_grobid(result_row['terminal_sha1hex']) + existing_file_meta = self.pgrest_client.get_file_meta(result_row['terminal_sha1hex']) existing_grobid = self.pgrest_client.get_grobid(result_row['terminal_sha1hex']) existing_cdx = self.pgrest_client.get_cdx(result_row['terminal_url'], result_row['terminal_dt']) if not (existing_file_meta and existing_grobid and existing_cdx): -- cgit v1.2.3