diff options
| author | Bryan Newbold <bnewbold@archive.org> | 2021-07-26 09:41:31 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@archive.org> | 2021-07-26 09:41:31 -0700 | 
| commit | a320544e6cf2f174558492d3ab09f152f65ac4d4 (patch) | |
| tree | cc7ff3bc67547b8de96d3c3fdea54730dab4c64e | |
| parent | 04d91c8bca2e12f9fedd39383c8e390ae175e5d1 (diff) | |
| download | sandcrawler-a320544e6cf2f174558492d3ab09f152f65ac4d4.tar.gz sandcrawler-a320544e6cf2f174558492d3ab09f152f65ac4d4.zip | |
ingest: fix postgrest lookup bug (double get of GROBID)
| -rw-r--r-- | python/sandcrawler/ingest.py | 2 | 
1 files changed, 1 insertions, 1 deletions
| diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py index 290bebc..3fa34e3 100644 --- a/python/sandcrawler/ingest.py +++ b/python/sandcrawler/ingest.py @@ -225,7 +225,7 @@ class IngestFileWorker(SandcrawlerWorker):          """          raise NotImplementedError("process_existing() not tested or safe yet")          assert result_row['hit'] -        existing_file_meta = self.pgrest_client.get_grobid(result_row['terminal_sha1hex']) +        existing_file_meta = self.pgrest_client.get_file_meta(result_row['terminal_sha1hex'])          existing_grobid = self.pgrest_client.get_grobid(result_row['terminal_sha1hex'])          existing_cdx = self.pgrest_client.get_cdx(result_row['terminal_url'], result_row['terminal_dt'])          if not (existing_file_meta and existing_grobid and existing_cdx): | 
