aboutsummaryrefslogtreecommitdiffstats
path: root/sql/monitoring_queries.md
blob: fd0ce5141da3af2dcaa1970c95e2985d6f2f1c33 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118

## fatcat-changelog pipeline

Overall ingest status, past 3 days:

    SELECT ingest_file_result.ingest_type, ingest_file_result.status, COUNT(*)
    FROM ingest_file_result
    LEFT JOIN ingest_request
        ON ingest_file_result.ingest_type = ingest_request.ingest_type
        AND ingest_file_result.base_url = ingest_request.base_url
    WHERE ingest_request.created >= NOW() - '3 day'::INTERVAL
        AND ingest_request.ingest_type = 'pdf'
        AND ingest_request.ingest_request_source = 'fatcat-changelog'
    GROUP BY ingest_file_result.ingest_type, ingest_file_result.status
    ORDER BY COUNT DESC
    LIMIT 20;

Broken domains, past 3 days:

    SELECT domain, status, COUNT((domain, status))
    FROM (
        SELECT
            ingest_file_result.ingest_type,
            ingest_file_result.status,
            substring(ingest_file_result.terminal_url FROM '[^/]+://([^/]*)') AS domain
        FROM ingest_file_result
        LEFT JOIN ingest_request
            ON ingest_file_result.ingest_type = ingest_request.ingest_type
            AND ingest_file_result.base_url = ingest_request.base_url
        WHERE
            -- ingest_request.created >= NOW() - '3 day'::INTERVAL
            ingest_file_result.updated >= NOW() - '3 day'::INTERVAL
            AND ingest_request.ingest_type = 'pdf'
            AND ingest_request.ingest_request_source = 'fatcat-changelog'
    ) t1
    WHERE t1.domain != ''
        AND t1.status != 'success'
    GROUP BY domain, status
    ORDER BY COUNT DESC
    LIMIT 25;

Throughput per day, and success, for past month:

    SELECT ingest_request.ingest_type,
           date(ingest_request.created),
           COUNT(*) as total,
           COUNT(CASE ingest_file_result.status WHEN 'success' THEN 1 ELSE null END) as success
    FROM ingest_file_result
    LEFT JOIN ingest_request
        ON ingest_file_result.ingest_type = ingest_request.ingest_type
        AND ingest_file_result.base_url = ingest_request.base_url
    WHERE ingest_request.created >= NOW() - '1 month'::INTERVAL
        AND ingest_request.ingest_type = 'pdf'
        AND ingest_request.ingest_request_source = 'fatcat-changelog'
    GROUP BY ingest_request.ingest_type, ingest_file_result.ingest_type, date(ingest_request.created)
    ORDER BY date(ingest_request.created) DESC;

## fatcat-ingest

Broken domains, past 7 days:

    SELECT domain, status, COUNT((domain, status))
    FROM (
        SELECT
            ingest_file_result.ingest_type,
            ingest_file_result.status,
            substring(ingest_file_result.terminal_url FROM '[^/]+://([^/]*)') AS domain
        FROM ingest_file_result
        LEFT JOIN ingest_request
            ON ingest_file_result.ingest_type = ingest_request.ingest_type
            AND ingest_file_result.base_url = ingest_request.base_url
        WHERE
            -- ingest_request.created >= NOW() - '7 day'::INTERVAL
            ingest_file_result.updated >= NOW() - '24 hour'::INTERVAL
            AND ingest_request.ingest_type = 'pdf'
            AND ingest_request.ingest_request_source = 'fatcat-ingest'
    ) t1
    WHERE t1.domain != ''
        AND t1.status != 'success'
    GROUP BY domain, status
    ORDER BY COUNT DESC
    LIMIT 25;

Throughput per day, and success, for past 7 days:

    SELECT ingest_request.ingest_type,
           date(ingest_file_result.updated),
           COUNT(*) as total,
           COUNT(CASE ingest_file_result.status WHEN 'success' THEN 1 ELSE null END) as success
    FROM ingest_file_result
    LEFT JOIN ingest_request
        ON ingest_file_result.ingest_type = ingest_request.ingest_type
        AND ingest_file_result.base_url = ingest_request.base_url
    WHERE
        -- ingest_request.created >= NOW() - '7 day'::INTERVAL
        ingest_file_result.updated >= NOW() - '24 hour'::INTERVAL
        AND ingest_request.ingest_type = 'pdf'
        AND ingest_request.ingest_request_source = 'fatcat-ingest'
    GROUP BY ingest_request.ingest_type, ingest_file_result.ingest_type, date(ingest_file_result.updated)
    ORDER BY date(ingest_file_result.updated) DESC;

Overall status, updated requests past 3 days:

    SELECT ingest_request.ingest_type,
           ingest_file_result.status,
           COUNT(*)
    FROM ingest_file_result
    LEFT JOIN ingest_request
        ON ingest_file_result.ingest_type = ingest_request.ingest_type
        AND ingest_file_result.base_url = ingest_request.base_url
    WHERE
        -- ingest_file_result.updated >= NOW() - '3 day'::INTERVAL
        ingest_file_result.updated >= NOW() - '48 hour'::INTERVAL
        AND ingest_request.ingest_type = 'pdf'
        AND ingest_request.ingest_request_source = 'fatcat-ingest'
    GROUP BY ingest_request.ingest_type, ingest_file_result.status
    ORDER BY COUNT(*) DESC;