aboutsummaryrefslogtreecommitdiffstats
path: root/sql/random_queries.md
diff options
context:
space:
mode:
Diffstat (limited to 'sql/random_queries.md')
-rw-r--r--sql/random_queries.md62
1 files changed, 62 insertions, 0 deletions
diff --git a/sql/random_queries.md b/sql/random_queries.md
index 604088a..98b32f9 100644
--- a/sql/random_queries.md
+++ b/sql/random_queries.md
@@ -58,3 +58,65 @@ More stats (2019-12-27):
SELECT mimetype, COUNT(*) FROM file_meta GROUP BY mimetype ORDER BY COUNT(*) DESC LIMIT 20;
SELECT SUM(size_bytes) FROM file_meta;
+
+"Last 24 hour progress":
+
+ # "problem domains" and statuses
+ SELECT domain, status, COUNT((domain, status))
+ FROM (SELECT status, updated, substring(terminal_url FROM '[^/]+://([^/]*)') AS domain FROM ingest_file_result) t1
+ WHERE t1.domain != ''
+ AND t1.status != 'success'
+ AND t1.updated >= NOW() - '1 day'::INTERVAL
+ GROUP BY domain, status
+ ORDER BY COUNT DESC
+ LIMIT 10;
+
+ # "what type of errors"
+ SELECT ingest_type, status, COUNT(*)
+ FROM ingest_file_result
+ WHERE updated >= NOW() - '1 day'::INTERVAL
+ GROUP BY ingest_type, status
+ ORDER BY COUNT DESC
+ LIMIT 25;
+
+ # "throughput per day for last N days"
+ SELECT ingest_type,
+ date(updated),
+ COUNT(*) as total,
+ COUNT(CASE status WHEN 'success' THEN 1 ELSE null END) as success
+ FROM ingest_file_result
+ WHERE updated >= NOW() - '1 month'::INTERVAL
+ GROUP BY ingest_type, date(updated)
+ ORDER BY date(updated) DESC;
+
+## Parse URLs
+
+One approach is to do regexes, something like:
+
+ SELECT substring(column_name FROM '[^/]+://([^/]+)/') AS domain_name FROM table_name;
+
+Eg:
+
+ SELECT DISTINCT(domain), COUNT(domain)
+ FROM (select substring(base_url FROM '[^/]+://([^/]*)') as domain FROM ingest_file_result) t1
+ WHERE t1.domain != ''
+ GROUP BY domain
+ ORDER BY COUNT DESC
+ LIMIT 10;
+
+Or:
+
+ SELECT domain, status, COUNT((domain, status))
+ FROM (SELECT status, substring(terminal_url FROM '[^/]+://([^/]*)') AS domain FROM ingest_file_result) t1
+ WHERE t1.domain != ''
+ AND t1.status != 'success'
+ GROUP BY domain, status
+ ORDER BY COUNT DESC
+ LIMIT 10;
+
+Can also do some quick lookups for a specific domain and protocol like:
+
+ SELECT *
+ FROM ingest_file_result
+ WHERE terminal_url LIKE 'https://insights.ovid.com/%'
+ LIMIT 10;