aboutsummaryrefslogtreecommitdiffstats
path: root/sql
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-11-16 11:50:46 -0800
committerBryan Newbold <bnewbold@archive.org>2020-11-16 11:50:46 -0800
commit5537b666ad392fb13aa956ebff4e7aa0927b68ee (patch)
treefbed416cf7036bec06d8275a6030c7ee809c2ba0 /sql
parent54bb5b6b7fb7a19aac7093a170e5b062f51e5a47 (diff)
downloadsandcrawler-5537b666ad392fb13aa956ebff4e7aa0927b68ee.tar.gz
sandcrawler-5537b666ad392fb13aa956ebff4e7aa0927b68ee.zip
SQL: more ingest monitoring
Diffstat (limited to 'sql')
-rw-r--r--sql/ingest_stats/2020-11-16_weekly_ingest_doi_prefix.txt326
-rw-r--r--sql/ingest_stats/2020-11-16_weekly_ingest_terminal_domain.txt307
-rw-r--r--sql/monitoring_queries.md28
3 files changed, 660 insertions, 1 deletions
diff --git a/sql/ingest_stats/2020-11-16_weekly_ingest_doi_prefix.txt b/sql/ingest_stats/2020-11-16_weekly_ingest_doi_prefix.txt
new file mode 100644
index 0000000..b684400
--- /dev/null
+++ b/sql/ingest_stats/2020-11-16_weekly_ingest_doi_prefix.txt
@@ -0,0 +1,326 @@
+ doi_prefix | status | count
+------------+-------------------------------+--------
+ 10.1001 | | 230
+ 10.1002 | | 3914
+ 10.1002 | terminal-bad-status | 1540
+ 10.1002 | forbidden | 1072
+ 10.1002 | redirect-loop | 995
+ 10.1002 | no-pdf-link | 210
+ 10.1016 | | 7976
+ 10.1016 | no-pdf-link | 4648
+ 10.1016 | terminal-bad-status | 1778
+ 10.1016 | forbidden | 622
+ 10.1016 | spn2-error:too-many-redirects | 344
+ 10.1016 | redirect-loop | 225
+ 10.1017 | | 2040
+ 10.1017 | no-pdf-link | 720
+ 10.1017 | success | 441
+ 10.1017 | link-loop | 371
+ 10.1017 | bad-redirect | 227
+ 10.1021 | | 1722
+ 10.1021 | blocked-cookie | 1552
+ 10.1029 | | 248
+ 10.1039 | | 1160
+ 10.1039 | redirect-loop | 486
+ 10.1039 | spn2-error:too-many-redirects | 395
+ 10.1039 | spn2-wayback-error | 213
+ 10.1051 | | 695
+ 10.1051 | success | 557
+ 10.1055 | | 541
+ 10.1055 | not-found | 295
+ 10.1055 | redirect-loop | 213
+ 10.1057 | | 2835
+ 10.1057 | redirect-loop | 2617
+ 10.1061 | | 550
+ 10.1061 | spn2-error:too-many-redirects | 425
+ 10.1063 | | 600
+ 10.1063 | spn2-error:too-many-redirects | 328
+ 10.1080 | | 3801
+ 10.1080 | blocked-cookie | 2431
+ 10.1080 | terminal-bad-status | 711
+ 10.1080 | forbidden | 341
+ 10.1081 | | 299
+ 10.1081 | link-loop | 222
+ 10.1089 | | 236
+ 10.1089 | blocked-cookie | 228
+ 10.1093 | | 12805
+ 10.1093 | link-loop | 8627
+ 10.1093 | redirect-loop | 1659
+ 10.1093 | no-pdf-link | 1475
+ 10.1093 | bad-redirect | 428
+ 10.1093 | success | 391
+ 10.1097 | | 1497
+ 10.1097 | no-pdf-link | 503
+ 10.1097 | link-loop | 346
+ 10.1097 | spn2-error:too-many-redirects | 259
+ 10.1097 | terminal-bad-status | 202
+ 10.1101 | | 1859
+ 10.1101 | redirect-loop | 993
+ 10.1101 | forbidden | 703
+ 10.1103 | | 597
+ 10.1103 | not-found | 534
+ 10.1108 | | 1055
+ 10.1108 | no-pdf-link | 945
+ 10.1109 | | 7067
+ 10.1109 | spn2-error:too-many-redirects | 6299
+ 10.1109 | success | 667
+ 10.1111 | | 2099
+ 10.1111 | redirect-loop | 1331
+ 10.1111 | terminal-bad-status | 313
+ 10.1111 | forbidden | 226
+ 10.1115 | | 1278
+ 10.1115 | bad-redirect | 707
+ 10.1117 | | 561
+ 10.1117 | spn2-error:too-many-redirects | 501
+ 10.1126 | | 214
+ 10.1136 | | 1989
+ 10.1136 | success | 1463
+ 10.1136 | link-loop | 294
+ 10.1142 | | 300
+ 10.1142 | blocked-cookie | 237
+ 10.1145 | | 440
+ 10.1145 | blocked-cookie | 354
+ 10.1155 | | 480
+ 10.1155 | success | 474
+ 10.11588 | | 506
+ 10.11588 | no-pdf-link | 264
+ 10.11588 | success | 236
+ 10.1159 | | 226
+ 10.11606 | | 304
+ 10.1161 | | 1142
+ 10.1161 | blocked-cookie | 1011
+ 10.1163 | | 2261
+ 10.1163 | link-loop | 1767
+ 10.1163 | success | 348
+ 10.11648 | | 405
+ 10.11648 | success | 404
+ 10.1182 | | 2125
+ 10.1182 | no-pdf-link | 2024
+ 10.1183 | | 987
+ 10.1183 | redirect-loop | 838
+ 10.1186 | | 1481
+ 10.1186 | success | 1412
+ 10.1201 | | 7649
+ 10.1201 | link-loop | 5383
+ 10.1201 | forbidden | 1504
+ 10.1201 | no-pdf-link | 312
+ 10.1299 | | 264
+ 10.1299 | no-pdf-link | 209
+ 10.13134 | | 201
+ 10.1353 | | 549
+ 10.1353 | terminal-bad-status | 443
+ 10.1371 | | 552
+ 10.1371 | success | 542
+ 10.14201 | | 656
+ 10.14201 | success | 366
+ 10.14361 | | 647
+ 10.14361 | link-loop | 585
+ 10.14746 | | 260
+ 10.14746 | success | 232
+ 10.1504 | | 527
+ 10.1504 | no-pdf-link | 501
+ 10.15122 | | 246
+ 10.15122 | success | 243
+ 10.1515 | | 16240
+ 10.1515 | link-loop | 12589
+ 10.1515 | success | 1941
+ 10.1515 | no-pdf-link | 1008
+ 10.1515 | not-found | 283
+ 10.15405 | | 229
+ 10.15405 | success | 218
+ 10.1553 | | 418
+ 10.1553 | no-pdf-link | 396
+ 10.1590 | | 655
+ 10.1590 | success | 623
+ 10.17104 | | 1202
+ 10.17104 | no-pdf-link | 953
+ 10.17104 | bad-redirect | 249
+ 10.17605 | | 368
+ 10.17605 | not-found | 337
+ 10.17615 | | 9401
+ 10.17615 | redirect-loop | 5720
+ 10.17615 | spn2-wayback-error | 3099
+ 10.17615 | spn2-cdx-lookup-failure | 201
+ 10.17863 | | 438
+ 10.18148 | | 465
+ 10.18148 | success | 462
+ 10.18720 | | 210
+ 10.18821 | | 476
+ 10.18821 | redirect-loop | 366
+ 10.20345 | | 222
+ 10.20345 | terminal-bad-status | 215
+ 10.20546 | | 244
+ 10.20546 | no-pdf-link | 241
+ 10.21037 | | 232
+ 10.2118 | | 903
+ 10.2118 | redirect-loop | 853
+ 10.21203 | | 1824
+ 10.21203 | success | 1545
+ 10.2139 | | 1493
+ 10.2139 | link-loop | 1145
+ 10.2147 | | 318
+ 10.2147 | success | 267
+ 10.2172 | | 282
+ 10.2174 | | 363
+ 10.2174 | no-pdf-link | 320
+ 10.2196 | | 265
+ 10.2208 | | 299
+ 10.22215 | | 218
+ 10.22215 | success | 217
+ 10.22323 | | 289
+ 10.22323 | success | 262
+ 10.22533 | | 395
+ 10.22533 | success | 393
+ 10.22541 | | 291
+ 10.22541 | success | 275
+ 10.23919 | | 426
+ 10.23919 | spn2-error:too-many-redirects | 403
+ 10.24034 | | 319
+ 10.24034 | spn2-error | 203
+ 10.24355 | | 15360
+ 10.24355 | no-pdf-link | 15228
+ 10.24411 | | 1506
+ 10.24411 | forbidden | 823
+ 10.24411 | redirect-loop | 647
+ 10.25335 | | 550
+ 10.25335 | no-pdf-link | 550
+ 10.25365 | | 429
+ 10.25365 | success | 424
+ 10.25384 | | 338
+ 10.25384 | success | 249
+ 10.25646 | | 239
+ 10.26197 | no-pdf-link | 303
+ 10.26197 | | 303
+ 10.26226 | | 272
+ 10.26278 | | 1291
+ 10.26278 | redirect-loop | 756
+ 10.26278 | spn2-error:too-many-redirects | 509
+ 10.29327 | | 232
+ 10.2991 | | 307
+ 10.2991 | spn2-wayback-error | 227
+ 10.30965 | | 722
+ 10.30965 | link-loop | 709
+ 10.3109 | | 801
+ 10.3109 | link-loop | 572
+ 10.3109 | forbidden | 228
+ 10.31219 | | 951
+ 10.31219 | redirect-loop | 518
+ 10.31219 | spn2-wayback-error | 356
+ 10.31274 | | 296
+ 10.31743 | | 403
+ 10.31743 | success | 294
+ 10.31857 | | 209
+ 10.3233 | | 471
+ 10.33448 | | 213
+ 10.33448 | success | 212
+ 10.3389 | | 1459
+ 10.3389 | success | 1417
+ 10.3390 | | 4511
+ 10.3390 | success | 3577
+ 10.3390 | terminal-bad-status | 485
+ 10.3390 | forbidden | 379
+ 10.3406 | | 243
+ 10.3406 | terminal-bad-status | 213
+ 10.34944 | | 527
+ 10.34944 | success | 459
+ 10.35016 | | 688
+ 10.35016 | no-pdf-link | 687
+ 10.36347 | success | 213
+ 10.36347 | | 213
+ 10.37747 | | 213
+ 10.37747 | no-pdf-link | 213
+ 10.37904 | | 227
+ 10.37904 | no-pdf-link | 226
+ 10.3917 | | 347
+ 10.3917 | redirect-loop | 208
+ 10.3923 | | 356
+ 10.3923 | redirect-loop | 254
+ 10.3929 | | 317
+ 10.3929 | terminal-bad-status | 310
+ 10.3931 | | 279
+ 10.3931 | no-pdf-link | 279
+ 10.4000 | | 7828
+ 10.4000 | success | 3485
+ 10.4000 | spn2-wayback-error | 2142
+ 10.4000 | redirect-loop | 2106
+ 10.4018 | | 249
+ 10.4018 | not-found | 240
+ 10.4103 | | 726
+ 10.4103 | remote-server-error | 343
+ 10.4103 | redirect-loop | 324
+ 10.4159 | | 286
+ 10.4159 | link-loop | 238
+ 10.4324 | | 19398
+ 10.4324 | link-loop | 12471
+ 10.4324 | forbidden | 3632
+ 10.4324 | not-found | 2283
+ 10.4324 | terminal-bad-status | 645
+ 10.4324 | success | 208
+ 10.47295 | | 456
+ 10.47295 | success | 449
+ 10.47513 | | 218
+ 10.47513 | no-pdf-link | 203
+ 10.48084 | success | 538
+ 10.48084 | | 538
+ 10.5040 | | 375
+ 10.5040 | no-pdf-link | 365
+ 10.5167 | | 290
+ 10.5167 | redirect-loop | 278
+ 10.5169 | | 360
+ 10.5169 | no-pdf-link | 355
+ 10.5194 | | 917
+ 10.5194 | success | 887
+ 10.5216 | | 213
+ 10.5220 | no-pdf-link | 397
+ 10.5220 | | 397
+ 10.5281 | | 22551
+ 10.5281 | terminal-bad-status | 12158
+ 10.5281 | success | 4901
+ 10.5281 | no-pdf-link | 4754
+ 10.5281 | spn2-error:unknown | 360
+ 10.5282 | | 228
+ 10.5451 | | 2068
+ 10.5451 | success | 1071
+ 10.5451 | terminal-bad-status | 817
+ 10.5753 | | 268
+ 10.5753 | success | 264
+ 10.5771 | | 941
+ 10.5771 | no-pdf-link | 397
+ 10.5771 | bad-redirect | 269
+ 10.5771 | link-loop | 238
+ 10.6068 | | 441
+ 10.6068 | no-pdf-link | 384
+ 10.6084 | | 917
+ 10.6084 | no-pdf-link | 520
+ 10.6084 | success | 368
+ 10.7287 | | 234
+ 10.7287 | no-pdf-link | 212
+ 10.7312 | | 382
+ 10.7312 | link-loop | 291
+ 10.7554 | | 205
+ 10.7891 | | 380
+ 10.7891 | no-pdf-link | 376
+ 10.7916 | | 331
+ 10.7916 | no-pdf-link | 201
+ 10.7939 | | 535
+ 10.7939 | no-pdf-link | 527
+ | | 272831
+ | success | 62298
+ | no-pdf-link | 60737
+ | link-loop | 48558
+ | redirect-loop | 26842
+ | terminal-bad-status | 22685
+ | spn2-error:too-many-redirects | 11174
+ | forbidden | 10900
+ | spn2-wayback-error | 7796
+ | blocked-cookie | 6961
+ | not-found | 5468
+ | bad-redirect | 2666
+ | spn2-error | 2398
+ | spn2-cdx-lookup-failure | 1374
+ | petabox-error | 678
+ | remote-server-error | 461
+ | wrong-mimetype | 443
+ | spn2-error:proxy-error | 420
+ | spn2-error:unknown | 360
+(323 rows)
diff --git a/sql/ingest_stats/2020-11-16_weekly_ingest_terminal_domain.txt b/sql/ingest_stats/2020-11-16_weekly_ingest_terminal_domain.txt
new file mode 100644
index 0000000..28dd0d0
--- /dev/null
+++ b/sql/ingest_stats/2020-11-16_weekly_ingest_terminal_domain.txt
@@ -0,0 +1,307 @@
+ domain | status | count
+-------------------------------------------------------------------+-------------------------------+--------
+ 202.148.31.178 | | 298
+ academic.oup.com | | 1624
+ academic.oup.com | no-pdf-link | 673
+ academic.oup.com | bad-redirect | 444
+ academic.oup.com | link-loop | 358
+ aip.scitation.org | | 257
+ apps.crossref.org | | 1414
+ apps.crossref.org | no-pdf-link | 1410
+ article.sciencepublishinggroup.com | | 404
+ article.sciencepublishinggroup.com | success | 404
+ arxiv.org | | 24340
+ arxiv.org | success | 22381
+ arxiv.org | terminal-bad-status | 1260
+ arxiv.org | no-pdf-link | 412
+ arxiv.org | no-capture | 262
+ ashpublications.org | | 2049
+ ashpublications.org | no-pdf-link | 2024
+ asmedigitalcollection.asme.org | | 1245
+ asmedigitalcollection.asme.org | bad-redirect | 707
+ assets.researchsquare.com | | 1549
+ assets.researchsquare.com | success | 1546
+ bioone.org | | 201
+ biorxiv.org | redirect-loop | 702
+ biorxiv.org | | 702
+ blogs.ethz.ch | | 687
+ blogs.ethz.ch | no-pdf-link | 686
+ books.openedition.org | | 446
+ books.openedition.org | redirect-loop | 382
+ brill.com | | 2203
+ brill.com | link-loop | 1779
+ brill.com | success | 359
+ catalog.paradisec.org.au | | 770
+ catalog.paradisec.org.au | redirect-loop | 756
+ cdr.lib.unc.edu | | 9432
+ cdr.lib.unc.edu | redirect-loop | 5720
+ cdr.lib.unc.edu | spn2-wayback-error | 3187
+ cdr.lib.unc.edu | spn2-cdx-lookup-failure | 201
+ classiques-garnier.com | | 246
+ classiques-garnier.com | success | 243
+ content.iospress.com | | 242
+ content.taylorfrancis.com | | 309
+ content.taylorfrancis.com | terminal-bad-status | 309
+ curve.carleton.ca | success | 201
+ curve.carleton.ca | | 201
+ cyberdoi.ru | redirect-loop | 647
+ cyberdoi.ru | | 647
+ czasopisma.kul.pl | | 402
+ czasopisma.kul.pl | success | 294
+ d.lib.msu.edu | | 550
+ d.lib.msu.edu | no-pdf-link | 550
+ d197for5662m48.cloudfront.net | success | 276
+ d197for5662m48.cloudfront.net | | 276
+ dergipark.org.tr | | 674
+ dergipark.org.tr | no-pdf-link | 255
+ dergipark.org.tr | success | 248
+ digi.ub.uni-heidelberg.de | no-pdf-link | 261
+ digi.ub.uni-heidelberg.de | | 261
+ dl.acm.org | | 441
+ dl.acm.org | blocked-cookie | 361
+ dlc.library.columbia.edu | | 201
+ dlc.library.columbia.edu | no-pdf-link | 201
+ doi.ala.org.au | | 308
+ doi.ala.org.au | no-pdf-link | 308
+ doi.org | | 474
+ doi.org | terminal-bad-status | 344
+ downloads.hindawi.com | | 479
+ downloads.hindawi.com | success | 478
+ edoc.rki.de | | 238
+ edoc.unibas.ch | | 2018
+ edoc.unibas.ch | success | 1067
+ edoc.unibas.ch | terminal-bad-status | 817
+ elib.spbstu.ru | | 205
+ elifesciences.org | | 204
+ era.library.ualberta.ca | | 531
+ era.library.ualberta.ca | no-pdf-link | 527
+ erj.ersjournals.com | | 951
+ erj.ersjournals.com | redirect-loop | 829
+ europepmc.org | | 289
+ europepmc.org | success | 283
+ figshare.com | | 233
+ figshare.com | no-pdf-link | 208
+ fjfsdata01prod.blob.core.windows.net | | 1430
+ fjfsdata01prod.blob.core.windows.net | success | 1418
+ hw.oeaw.ac.at | | 283
+ hw.oeaw.ac.at | no-pdf-link | 283
+ idb.ub.uni-tuebingen.de | | 216
+ idb.ub.uni-tuebingen.de | terminal-bad-status | 215
+ ieeexplore.ieee.org | | 7561
+ ieeexplore.ieee.org | spn2-error:too-many-redirects | 6732
+ ieeexplore.ieee.org | success | 683
+ ijgc.bmj.com | | 411
+ ijgc.bmj.com | success | 399
+ jamanetwork.com | | 229
+ jitc.bmj.com | | 849
+ jitc.bmj.com | success | 773
+ journals.aps.org | | 539
+ journals.aps.org | not-found | 534
+ journals.lww.com | | 1124
+ journals.lww.com | no-pdf-link | 547
+ journals.lww.com | link-loop | 399
+ journals.openedition.org | | 7366
+ journals.openedition.org | success | 3484
+ journals.openedition.org | spn2-wayback-error | 2120
+ journals.openedition.org | redirect-loop | 1720
+ journals.plos.org | | 552
+ journals.plos.org | success | 542
+ kiss.kstudy.com | | 306
+ kiss.kstudy.com | no-pdf-link | 292
+ lib.dr.iastate.edu | | 297
+ link.springer.com | | 2830
+ link.springer.com | redirect-loop | 2625
+ linkinghub.elsevier.com | | 970
+ linkinghub.elsevier.com | forbidden | 415
+ linkinghub.elsevier.com | spn2-error:too-many-redirects | 357
+ medrxiv.org | | 287
+ medrxiv.org | redirect-loop | 287
+ muse.jhu.edu | | 470
+ muse.jhu.edu | terminal-bad-status | 443
+ ojs.ub.uni-konstanz.de | | 463
+ ojs.ub.uni-konstanz.de | success | 462
+ onlinelibrary.wiley.com | | 2064
+ onlinelibrary.wiley.com | terminal-bad-status | 1973
+ osf.io | | 1394
+ osf.io | redirect-loop | 589
+ osf.io | spn2-wayback-error | 425
+ osf.io | not-found | 342
+ othes.univie.ac.at | | 424
+ othes.univie.ac.at | success | 424
+ oxford.universitypressscholarship.com | | 8999
+ oxford.universitypressscholarship.com | link-loop | 8282
+ oxford.universitypressscholarship.com | no-pdf-link | 695
+ oxfordhandbooks.com | redirect-loop | 460
+ oxfordhandbooks.com | | 460
+ papers.ssrn.com | | 1313
+ papers.ssrn.com | link-loop | 1145
+ peerj.com | | 313
+ peerj.com | no-pdf-link | 212
+ periodicos.urca.br | | 446
+ periodicos.urca.br | success | 439
+ pos.sissa.it | | 277
+ pos.sissa.it | success | 262
+ preprints.jmir.org | | 242
+ pressto.amu.edu.pl | | 260
+ pressto.amu.edu.pl | success | 232
+ publikationsserver.tu-braunschweig.de | | 15358
+ publikationsserver.tu-braunschweig.de | no-pdf-link | 15228
+ publons.com | | 2810
+ publons.com | redirect-loop | 2359
+ publons.com | no-pdf-link | 444
+ pubs.acs.org | | 1647
+ pubs.acs.org | blocked-cookie | 1553
+ pubs.rsc.org | | 765
+ pubs.rsc.org | redirect-loop | 486
+ pubs.rsc.org | spn2-wayback-error | 214
+ res.mdpi.com | | 3620
+ res.mdpi.com | success | 3591
+ revistas.usal.es | | 580
+ revistas.usal.es | success | 298
+ revues.imist.ma | | 229
+ rsdjournal.org | | 213
+ rsdjournal.org | success | 212
+ s3-eu-west-1.amazonaws.com | | 764
+ s3-eu-west-1.amazonaws.com | success | 763
+ s3-euw1-ap-pe-ws4-capi2-distribution-p.s3-eu-west-1.amazonaws.com | | 324
+ s3-euw1-ap-pe-ws4-capi2-distribution-p.s3-eu-west-1.amazonaws.com | success | 324
+ saspublishers.com | | 213
+ saspublishers.com | success | 213
+ scholarshare.temple.edu | | 524
+ scholarshare.temple.edu | success | 464
+ sol.sbc.org.br | | 268
+ sol.sbc.org.br | success | 264
+ statisticaldatasets.data-planet.com | | 442
+ statisticaldatasets.data-planet.com | no-pdf-link | 390
+ watermark.silverchair.com | | 521
+ watermark.silverchair.com | success | 514
+ www.ahajournals.org | | 1061
+ www.ahajournals.org | blocked-cookie | 1011
+ www.atlantis-press.com | | 308
+ www.atlantis-press.com | spn2-wayback-error | 228
+ www.beck-elibrary.de | | 1202
+ www.beck-elibrary.de | no-pdf-link | 953
+ www.beck-elibrary.de | bad-redirect | 249
+ www.cairn.info | | 255
+ www.cairn.info | redirect-loop | 208
+ www.cambridge.org | | 2061
+ www.cambridge.org | no-pdf-link | 727
+ www.cambridge.org | success | 485
+ www.cambridge.org | link-loop | 388
+ www.cambridge.org | bad-redirect | 252
+ www.confer.cz | | 227
+ www.confer.cz | no-pdf-link | 226
+ www.dbpia.co.kr | | 773
+ www.dbpia.co.kr | no-pdf-link | 679
+ www.degruyter.com | | 17046
+ www.degruyter.com | link-loop | 14202
+ www.degruyter.com | success | 2201
+ www.degruyter.com | not-found | 235
+ www.dovepress.com | | 316
+ www.dovepress.com | success | 267
+ www.e-manuscripta.ch | | 384
+ www.e-manuscripta.ch | no-pdf-link | 383
+ www.e-periodica.ch | | 358
+ www.e-periodica.ch | no-pdf-link | 355
+ www.e-rara.ch | no-pdf-link | 279
+ www.e-rara.ch | | 279
+ www.e3s-conferences.org | | 426
+ www.e3s-conferences.org | success | 419
+ www.elibrary.ru | | 303
+ www.elibrary.ru | no-pdf-link | 301
+ www.emerald.com | | 943
+ www.emerald.com | no-pdf-link | 933
+ www.etasr.com | | 466
+ www.etasr.com | success | 466
+ www.eurekaselect.com | | 345
+ www.eurekaselect.com | no-pdf-link | 321
+ www.europeanproceedings.com | | 218
+ www.europeanproceedings.com | success | 218
+ www.finersistemas.com | success | 397
+ www.finersistemas.com | | 397
+ www.humankineticslibrary.com | no-pdf-link | 321
+ www.humankineticslibrary.com | | 321
+ www.ijcmas.com | | 251
+ www.ijcmas.com | no-pdf-link | 248
+ www.inderscience.com | | 524
+ www.inderscience.com | no-pdf-link | 501
+ www.ingentaconnect.com | | 366
+ www.ingentaconnect.com | no-pdf-link | 349
+ www.jstage.jst.go.jp | | 1591
+ www.jstage.jst.go.jp | success | 862
+ www.jstage.jst.go.jp | no-pdf-link | 567
+ www.jstor.org | | 351
+ www.karger.com | | 224
+ www.liebertpub.com | | 236
+ www.liebertpub.com | blocked-cookie | 228
+ www.mdpi.com | | 694
+ www.mdpi.com | terminal-bad-status | 480
+ www.medlit.ru | | 458
+ www.medlit.ru | redirect-loop | 366
+ www.morressier.com | | 285
+ www.morressier.com | no-pdf-link | 253
+ www.njca.info | | 223
+ www.njca.info | remote-server-error | 222
+ www.nomos-elibrary.de | | 913
+ www.nomos-elibrary.de | no-pdf-link | 379
+ www.nomos-elibrary.de | bad-redirect | 265
+ www.nomos-elibrary.de | link-loop | 236
+ www.onepetro.org | | 895
+ www.onepetro.org | redirect-loop | 853
+ www.osti.gov | | 212
+ www.persee.fr | | 232
+ www.persee.fr | terminal-bad-status | 213
+ www.repository.cam.ac.uk | | 439
+ www.research-collection.ethz.ch | | 312
+ www.research-collection.ethz.ch | terminal-bad-status | 310
+ www.revistas.ufg.br | | 212
+ www.schoeningh.de | | 371
+ www.schoeningh.de | link-loop | 366
+ www.scialert.net | | 276
+ www.scialert.net | redirect-loop | 254
+ www.scielo.br | | 644
+ www.scielo.br | success | 624
+ www.sciencedirect.com | | 6523
+ www.sciencedirect.com | no-pdf-link | 4668
+ www.sciencedirect.com | terminal-bad-status | 1737
+ www.scitepress.org | no-pdf-link | 397
+ www.scitepress.org | | 397
+ www.tandfonline.com | | 3448
+ www.tandfonline.com | blocked-cookie | 2446
+ www.tandfonline.com | terminal-bad-status | 714
+ www.taylorfrancis.com | | 21292
+ www.taylorfrancis.com | link-loop | 18648
+ www.taylorfrancis.com | forbidden | 2022
+ www.taylorfrancis.com | terminal-bad-status | 518
+ www.thieme-connect.de | | 513
+ www.thieme-connect.de | not-found | 292
+ www.thieme-connect.de | redirect-loop | 213
+ www.whateveryoneneedstoknow.com | | 1174
+ www.whateveryoneneedstoknow.com | redirect-loop | 1163
+ www.worldscientific.com | | 293
+ www.worldscientific.com | blocked-cookie | 240
+ www.zora.uzh.ch | | 290
+ www.zora.uzh.ch | redirect-loop | 278
+ zenodo.org | | 22202
+ zenodo.org | terminal-bad-status | 12158
+ zenodo.org | success | 4923
+ zenodo.org | no-pdf-link | 4788
+ | | 280719
+ | success | 85143
+ | no-pdf-link | 61335
+ | link-loop | 48566
+ | redirect-loop | 26845
+ | terminal-bad-status | 23955
+ | spn2-wayback-error | 7920
+ | spn2-error:too-many-redirects | 7175
+ | blocked-cookie | 6980
+ | forbidden | 2912
+ | bad-redirect | 2666
+ | spn2-error | 1943
+ | not-found | 1762
+ | spn2-cdx-lookup-failure | 1376
+ | wrong-mimetype | 467
+ | remote-server-error | 388
+ | spn2-error:proxy-error | 295
+ | no-capture | 262
+(304 rows)
diff --git a/sql/monitoring_queries.md b/sql/monitoring_queries.md
index 1c872cc..cf3b190 100644
--- a/sql/monitoring_queries.md
+++ b/sql/monitoring_queries.md
@@ -61,9 +61,35 @@ Summary of significant domains and status, past 7 days:
WHERE t1.domain != ''
GROUP BY CUBE (domain, status)
) t2
- WHERE count > 500
+ WHERE count > 200
ORDER BY domain ASC , count DESC;
+Summary of DOI prefix and status, past 7 days:
+
+ SELECT doi_prefix, status, count
+ FROM (
+ SELECT doi_prefix, status, COUNT((doi_prefix, status)) as count
+ FROM (
+ SELECT
+ ingest_file_result.ingest_type,
+ ingest_file_result.status,
+ substring(ingest_request.link_source_id FROM '(10\.[^/]*)/.*') AS doi_prefix
+ FROM ingest_file_result
+ LEFT JOIN ingest_request
+ ON ingest_file_result.ingest_type = ingest_request.ingest_type
+ AND ingest_file_result.base_url = ingest_request.base_url
+ WHERE
+ ingest_file_result.updated >= NOW() - '7 day'::INTERVAL
+ AND ingest_request.ingest_type = 'pdf'
+ AND ingest_request.ingest_request_source = 'fatcat-changelog'
+ AND ingest_request.link_source = 'doi'
+ ) t1
+ WHERE t1.doi_prefix != ''
+ GROUP BY CUBE (doi_prefix, status)
+ ) t2
+ WHERE count > 200
+ ORDER BY doi_prefix ASC , count DESC;
+
Throughput per day, and success, for past 30 days: