From 4a9fba8005e0a65c03198c674d2c65f7440d71a6 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 21 Oct 2020 12:38:09 -0700 Subject: SQL: update weekly/quarterly ingest retry scripts --- sql/dump_unextracted_pdf.sql | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'sql/dump_unextracted_pdf.sql') diff --git a/sql/dump_unextracted_pdf.sql b/sql/dump_unextracted_pdf.sql index 7b5e823..fb4b0af 100644 --- a/sql/dump_unextracted_pdf.sql +++ b/sql/dump_unextracted_pdf.sql @@ -9,12 +9,14 @@ COPY ( FROM grobid LEFT JOIN cdx ON grobid.sha1hex = cdx.sha1hex --LEFT JOIN fatcat_file ON grobid.sha1hex = fatcat_file.sha1hex + LEFT JOIN ingest_file_result ON grobid.sha1hex = ingest_file_result.terminal_sha1hex LEFT JOIN pdf_meta ON grobid.sha1hex = pdf_meta.sha1hex WHERE cdx.sha1hex IS NOT NULL --AND fatcat_file.sha1hex IS NOT NULL + AND ingest_file_result.terminal_sha1hex IS NOT NULL AND pdf_meta.sha1hex IS NULL ) -TO '/grande/snapshots/dump_unextracted_pdf.fatcat.2020-07-22.json' +TO '/grande/snapshots/dump_unextracted_pdf.ingest.2020-10-21.json' WITH NULL ''; ROLLBACK; -- cgit v1.2.3