aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-07-26 09:41:31 -0700
committerBryan Newbold <bnewbold@archive.org>2021-07-26 09:41:31 -0700
commita320544e6cf2f174558492d3ab09f152f65ac4d4 (patch)
treecc7ff3bc67547b8de96d3c3fdea54730dab4c64e
parent04d91c8bca2e12f9fedd39383c8e390ae175e5d1 (diff)
downloadsandcrawler-a320544e6cf2f174558492d3ab09f152f65ac4d4.tar.gz
sandcrawler-a320544e6cf2f174558492d3ab09f152f65ac4d4.zip
ingest: fix postgrest lookup bug (double get of GROBID)
-rw-r--r--python/sandcrawler/ingest.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py
index 290bebc..3fa34e3 100644
--- a/python/sandcrawler/ingest.py
+++ b/python/sandcrawler/ingest.py
@@ -225,7 +225,7 @@ class IngestFileWorker(SandcrawlerWorker):
"""
raise NotImplementedError("process_existing() not tested or safe yet")
assert result_row['hit']
- existing_file_meta = self.pgrest_client.get_grobid(result_row['terminal_sha1hex'])
+ existing_file_meta = self.pgrest_client.get_file_meta(result_row['terminal_sha1hex'])
existing_grobid = self.pgrest_client.get_grobid(result_row['terminal_sha1hex'])
existing_cdx = self.pgrest_client.get_cdx(result_row['terminal_url'], result_row['terminal_dt'])
if not (existing_file_meta and existing_grobid and existing_cdx):