diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-07-26 09:41:31 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-07-26 09:41:31 -0700 |
commit | a320544e6cf2f174558492d3ab09f152f65ac4d4 (patch) | |
tree | cc7ff3bc67547b8de96d3c3fdea54730dab4c64e | |
parent | 04d91c8bca2e12f9fedd39383c8e390ae175e5d1 (diff) | |
download | sandcrawler-a320544e6cf2f174558492d3ab09f152f65ac4d4.tar.gz sandcrawler-a320544e6cf2f174558492d3ab09f152f65ac4d4.zip |
ingest: fix postgrest lookup bug (double get of GROBID)
-rw-r--r-- | python/sandcrawler/ingest.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py index 290bebc..3fa34e3 100644 --- a/python/sandcrawler/ingest.py +++ b/python/sandcrawler/ingest.py @@ -225,7 +225,7 @@ class IngestFileWorker(SandcrawlerWorker): """ raise NotImplementedError("process_existing() not tested or safe yet") assert result_row['hit'] - existing_file_meta = self.pgrest_client.get_grobid(result_row['terminal_sha1hex']) + existing_file_meta = self.pgrest_client.get_file_meta(result_row['terminal_sha1hex']) existing_grobid = self.pgrest_client.get_grobid(result_row['terminal_sha1hex']) existing_cdx = self.pgrest_client.get_cdx(result_row['terminal_url'], result_row['terminal_dt']) if not (existing_file_meta and existing_grobid and existing_cdx): |