diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-11-16 11:50:46 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-11-16 11:50:46 -0800 |
commit | 5537b666ad392fb13aa956ebff4e7aa0927b68ee (patch) | |
tree | fbed416cf7036bec06d8275a6030c7ee809c2ba0 /sql/monitoring_queries.md | |
parent | 54bb5b6b7fb7a19aac7093a170e5b062f51e5a47 (diff) | |
download | sandcrawler-5537b666ad392fb13aa956ebff4e7aa0927b68ee.tar.gz sandcrawler-5537b666ad392fb13aa956ebff4e7aa0927b68ee.zip |
SQL: more ingest monitoring
Diffstat (limited to 'sql/monitoring_queries.md')
-rw-r--r-- | sql/monitoring_queries.md | 28 |
1 files changed, 27 insertions, 1 deletions
diff --git a/sql/monitoring_queries.md b/sql/monitoring_queries.md index 1c872cc..cf3b190 100644 --- a/sql/monitoring_queries.md +++ b/sql/monitoring_queries.md @@ -61,9 +61,35 @@ Summary of significant domains and status, past 7 days: WHERE t1.domain != '' GROUP BY CUBE (domain, status) ) t2 - WHERE count > 500 + WHERE count > 200 ORDER BY domain ASC , count DESC; +Summary of DOI prefix and status, past 7 days: + + SELECT doi_prefix, status, count + FROM ( + SELECT doi_prefix, status, COUNT((doi_prefix, status)) as count + FROM ( + SELECT + ingest_file_result.ingest_type, + ingest_file_result.status, + substring(ingest_request.link_source_id FROM '(10\.[^/]*)/.*') AS doi_prefix + FROM ingest_file_result + LEFT JOIN ingest_request + ON ingest_file_result.ingest_type = ingest_request.ingest_type + AND ingest_file_result.base_url = ingest_request.base_url + WHERE + ingest_file_result.updated >= NOW() - '7 day'::INTERVAL + AND ingest_request.ingest_type = 'pdf' + AND ingest_request.ingest_request_source = 'fatcat-changelog' + AND ingest_request.link_source = 'doi' + ) t1 + WHERE t1.doi_prefix != '' + GROUP BY CUBE (doi_prefix, status) + ) t2 + WHERE count > 200 + ORDER BY doi_prefix ASC , count DESC; + Throughput per day, and success, for past 30 days: |