diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-04 12:50:32 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-04 12:50:32 -0700 |
commit | 001ea86c59d10c9bd8f8bf6b8980ae23d52f9216 (patch) | |
tree | f3e25d94b0796c2db8c4496d2398bb7dcc3583cf /sql | |
parent | 28557119a7caa0699d5b44b3aa4bddf6d5cf7d3c (diff) | |
download | sandcrawler-001ea86c59d10c9bd8f8bf6b8980ae23d52f9216.tar.gz sandcrawler-001ea86c59d10c9bd8f8bf6b8980ae23d52f9216.zip |
new SQL recent SPN request monitoring query
Diffstat (limited to 'sql')
-rw-r--r-- | sql/monitoring_queries.md | 32 |
1 files changed, 32 insertions, 0 deletions
diff --git a/sql/monitoring_queries.md b/sql/monitoring_queries.md index cf3b190..0859e79 100644 --- a/sql/monitoring_queries.md +++ b/sql/monitoring_queries.md @@ -168,3 +168,35 @@ Overall status, updated requests past 3 days: GROUP BY ingest_request.ingest_type, ingest_file_result.status ORDER BY COUNT(*) DESC; +## savepapernow and fatcat-ingest recent status + +Specific recent ingests (for debugging): + + -- for record layout: \x + SELECT + ingest_file_result.status as status, + ingest_request.ingest_type as ingest_type, + ingest_request.ingest_request_source as source, + ingest_request.link_source_id as source_id, + ingest_request.base_url as base_url, + ingest_file_result.terminal_dt as dt, + ingest_file_result.terminal_status_code as status_code, + ingest_file_result.terminal_sha1hex as sha1hex, + grobid.status as grobid_status + FROM ingest_file_result + LEFT JOIN ingest_request + ON ingest_file_result.ingest_type = ingest_request.ingest_type + AND ingest_file_result.base_url = ingest_request.base_url + LEFT JOIN grobid + ON ingest_file_result.terminal_sha1hex = grobid.sha1hex + WHERE + ingest_file_result.updated >= NOW() - '24 hour'::INTERVAL + -- AND ingest_request.ingest_type = 'pdf' + -- AND ingest_request.ingest_type = 'html' + AND ( + ingest_request.ingest_request_source = 'savepapernow-web' + -- OR ingest_request.ingest_request_source = 'fatcat-ingest' + ) + ORDER BY ingest_file_result.updated DESC + LIMIT 100; + |