From 5537b666ad392fb13aa956ebff4e7aa0927b68ee Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 16 Nov 2020 11:50:46 -0800 Subject: SQL: more ingest monitoring --- .../2020-11-16_weekly_ingest_doi_prefix.txt | 326 +++++++++++++++++++++ .../2020-11-16_weekly_ingest_terminal_domain.txt | 307 +++++++++++++++++++ sql/monitoring_queries.md | 28 +- 3 files changed, 660 insertions(+), 1 deletion(-) create mode 100644 sql/ingest_stats/2020-11-16_weekly_ingest_doi_prefix.txt create mode 100644 sql/ingest_stats/2020-11-16_weekly_ingest_terminal_domain.txt (limited to 'sql') diff --git a/sql/ingest_stats/2020-11-16_weekly_ingest_doi_prefix.txt b/sql/ingest_stats/2020-11-16_weekly_ingest_doi_prefix.txt new file mode 100644 index 0000000..b684400 --- /dev/null +++ b/sql/ingest_stats/2020-11-16_weekly_ingest_doi_prefix.txt @@ -0,0 +1,326 @@ + doi_prefix | status | count +------------+-------------------------------+-------- + 10.1001 | | 230 + 10.1002 | | 3914 + 10.1002 | terminal-bad-status | 1540 + 10.1002 | forbidden | 1072 + 10.1002 | redirect-loop | 995 + 10.1002 | no-pdf-link | 210 + 10.1016 | | 7976 + 10.1016 | no-pdf-link | 4648 + 10.1016 | terminal-bad-status | 1778 + 10.1016 | forbidden | 622 + 10.1016 | spn2-error:too-many-redirects | 344 + 10.1016 | redirect-loop | 225 + 10.1017 | | 2040 + 10.1017 | no-pdf-link | 720 + 10.1017 | success | 441 + 10.1017 | link-loop | 371 + 10.1017 | bad-redirect | 227 + 10.1021 | | 1722 + 10.1021 | blocked-cookie | 1552 + 10.1029 | | 248 + 10.1039 | | 1160 + 10.1039 | redirect-loop | 486 + 10.1039 | spn2-error:too-many-redirects | 395 + 10.1039 | spn2-wayback-error | 213 + 10.1051 | | 695 + 10.1051 | success | 557 + 10.1055 | | 541 + 10.1055 | not-found | 295 + 10.1055 | redirect-loop | 213 + 10.1057 | | 2835 + 10.1057 | redirect-loop | 2617 + 10.1061 | | 550 + 10.1061 | spn2-error:too-many-redirects | 425 + 10.1063 | | 600 + 10.1063 | spn2-error:too-many-redirects | 328 + 10.1080 | | 3801 + 10.1080 | blocked-cookie | 2431 + 10.1080 | terminal-bad-status | 711 + 10.1080 | forbidden | 341 + 10.1081 | | 299 + 10.1081 | link-loop | 222 + 10.1089 | | 236 + 10.1089 | blocked-cookie | 228 + 10.1093 | | 12805 + 10.1093 | link-loop | 8627 + 10.1093 | redirect-loop | 1659 + 10.1093 | no-pdf-link | 1475 + 10.1093 | bad-redirect | 428 + 10.1093 | success | 391 + 10.1097 | | 1497 + 10.1097 | no-pdf-link | 503 + 10.1097 | link-loop | 346 + 10.1097 | spn2-error:too-many-redirects | 259 + 10.1097 | terminal-bad-status | 202 + 10.1101 | | 1859 + 10.1101 | redirect-loop | 993 + 10.1101 | forbidden | 703 + 10.1103 | | 597 + 10.1103 | not-found | 534 + 10.1108 | | 1055 + 10.1108 | no-pdf-link | 945 + 10.1109 | | 7067 + 10.1109 | spn2-error:too-many-redirects | 6299 + 10.1109 | success | 667 + 10.1111 | | 2099 + 10.1111 | redirect-loop | 1331 + 10.1111 | terminal-bad-status | 313 + 10.1111 | forbidden | 226 + 10.1115 | | 1278 + 10.1115 | bad-redirect | 707 + 10.1117 | | 561 + 10.1117 | spn2-error:too-many-redirects | 501 + 10.1126 | | 214 + 10.1136 | | 1989 + 10.1136 | success | 1463 + 10.1136 | link-loop | 294 + 10.1142 | | 300 + 10.1142 | blocked-cookie | 237 + 10.1145 | | 440 + 10.1145 | blocked-cookie | 354 + 10.1155 | | 480 + 10.1155 | success | 474 + 10.11588 | | 506 + 10.11588 | no-pdf-link | 264 + 10.11588 | success | 236 + 10.1159 | | 226 + 10.11606 | | 304 + 10.1161 | | 1142 + 10.1161 | blocked-cookie | 1011 + 10.1163 | | 2261 + 10.1163 | link-loop | 1767 + 10.1163 | success | 348 + 10.11648 | | 405 + 10.11648 | success | 404 + 10.1182 | | 2125 + 10.1182 | no-pdf-link | 2024 + 10.1183 | | 987 + 10.1183 | redirect-loop | 838 + 10.1186 | | 1481 + 10.1186 | success | 1412 + 10.1201 | | 7649 + 10.1201 | link-loop | 5383 + 10.1201 | forbidden | 1504 + 10.1201 | no-pdf-link | 312 + 10.1299 | | 264 + 10.1299 | no-pdf-link | 209 + 10.13134 | | 201 + 10.1353 | | 549 + 10.1353 | terminal-bad-status | 443 + 10.1371 | | 552 + 10.1371 | success | 542 + 10.14201 | | 656 + 10.14201 | success | 366 + 10.14361 | | 647 + 10.14361 | link-loop | 585 + 10.14746 | | 260 + 10.14746 | success | 232 + 10.1504 | | 527 + 10.1504 | no-pdf-link | 501 + 10.15122 | | 246 + 10.15122 | success | 243 + 10.1515 | | 16240 + 10.1515 | link-loop | 12589 + 10.1515 | success | 1941 + 10.1515 | no-pdf-link | 1008 + 10.1515 | not-found | 283 + 10.15405 | | 229 + 10.15405 | success | 218 + 10.1553 | | 418 + 10.1553 | no-pdf-link | 396 + 10.1590 | | 655 + 10.1590 | success | 623 + 10.17104 | | 1202 + 10.17104 | no-pdf-link | 953 + 10.17104 | bad-redirect | 249 + 10.17605 | | 368 + 10.17605 | not-found | 337 + 10.17615 | | 9401 + 10.17615 | redirect-loop | 5720 + 10.17615 | spn2-wayback-error | 3099 + 10.17615 | spn2-cdx-lookup-failure | 201 + 10.17863 | | 438 + 10.18148 | | 465 + 10.18148 | success | 462 + 10.18720 | | 210 + 10.18821 | | 476 + 10.18821 | redirect-loop | 366 + 10.20345 | | 222 + 10.20345 | terminal-bad-status | 215 + 10.20546 | | 244 + 10.20546 | no-pdf-link | 241 + 10.21037 | | 232 + 10.2118 | | 903 + 10.2118 | redirect-loop | 853 + 10.21203 | | 1824 + 10.21203 | success | 1545 + 10.2139 | | 1493 + 10.2139 | link-loop | 1145 + 10.2147 | | 318 + 10.2147 | success | 267 + 10.2172 | | 282 + 10.2174 | | 363 + 10.2174 | no-pdf-link | 320 + 10.2196 | | 265 + 10.2208 | | 299 + 10.22215 | | 218 + 10.22215 | success | 217 + 10.22323 | | 289 + 10.22323 | success | 262 + 10.22533 | | 395 + 10.22533 | success | 393 + 10.22541 | | 291 + 10.22541 | success | 275 + 10.23919 | | 426 + 10.23919 | spn2-error:too-many-redirects | 403 + 10.24034 | | 319 + 10.24034 | spn2-error | 203 + 10.24355 | | 15360 + 10.24355 | no-pdf-link | 15228 + 10.24411 | | 1506 + 10.24411 | forbidden | 823 + 10.24411 | redirect-loop | 647 + 10.25335 | | 550 + 10.25335 | no-pdf-link | 550 + 10.25365 | | 429 + 10.25365 | success | 424 + 10.25384 | | 338 + 10.25384 | success | 249 + 10.25646 | | 239 + 10.26197 | no-pdf-link | 303 + 10.26197 | | 303 + 10.26226 | | 272 + 10.26278 | | 1291 + 10.26278 | redirect-loop | 756 + 10.26278 | spn2-error:too-many-redirects | 509 + 10.29327 | | 232 + 10.2991 | | 307 + 10.2991 | spn2-wayback-error | 227 + 10.30965 | | 722 + 10.30965 | link-loop | 709 + 10.3109 | | 801 + 10.3109 | link-loop | 572 + 10.3109 | forbidden | 228 + 10.31219 | | 951 + 10.31219 | redirect-loop | 518 + 10.31219 | spn2-wayback-error | 356 + 10.31274 | | 296 + 10.31743 | | 403 + 10.31743 | success | 294 + 10.31857 | | 209 + 10.3233 | | 471 + 10.33448 | | 213 + 10.33448 | success | 212 + 10.3389 | | 1459 + 10.3389 | success | 1417 + 10.3390 | | 4511 + 10.3390 | success | 3577 + 10.3390 | terminal-bad-status | 485 + 10.3390 | forbidden | 379 + 10.3406 | | 243 + 10.3406 | terminal-bad-status | 213 + 10.34944 | | 527 + 10.34944 | success | 459 + 10.35016 | | 688 + 10.35016 | no-pdf-link | 687 + 10.36347 | success | 213 + 10.36347 | | 213 + 10.37747 | | 213 + 10.37747 | no-pdf-link | 213 + 10.37904 | | 227 + 10.37904 | no-pdf-link | 226 + 10.3917 | | 347 + 10.3917 | redirect-loop | 208 + 10.3923 | | 356 + 10.3923 | redirect-loop | 254 + 10.3929 | | 317 + 10.3929 | terminal-bad-status | 310 + 10.3931 | | 279 + 10.3931 | no-pdf-link | 279 + 10.4000 | | 7828 + 10.4000 | success | 3485 + 10.4000 | spn2-wayback-error | 2142 + 10.4000 | redirect-loop | 2106 + 10.4018 | | 249 + 10.4018 | not-found | 240 + 10.4103 | | 726 + 10.4103 | remote-server-error | 343 + 10.4103 | redirect-loop | 324 + 10.4159 | | 286 + 10.4159 | link-loop | 238 + 10.4324 | | 19398 + 10.4324 | link-loop | 12471 + 10.4324 | forbidden | 3632 + 10.4324 | not-found | 2283 + 10.4324 | terminal-bad-status | 645 + 10.4324 | success | 208 + 10.47295 | | 456 + 10.47295 | success | 449 + 10.47513 | | 218 + 10.47513 | no-pdf-link | 203 + 10.48084 | success | 538 + 10.48084 | | 538 + 10.5040 | | 375 + 10.5040 | no-pdf-link | 365 + 10.5167 | | 290 + 10.5167 | redirect-loop | 278 + 10.5169 | | 360 + 10.5169 | no-pdf-link | 355 + 10.5194 | | 917 + 10.5194 | success | 887 + 10.5216 | | 213 + 10.5220 | no-pdf-link | 397 + 10.5220 | | 397 + 10.5281 | | 22551 + 10.5281 | terminal-bad-status | 12158 + 10.5281 | success | 4901 + 10.5281 | no-pdf-link | 4754 + 10.5281 | spn2-error:unknown | 360 + 10.5282 | | 228 + 10.5451 | | 2068 + 10.5451 | success | 1071 + 10.5451 | terminal-bad-status | 817 + 10.5753 | | 268 + 10.5753 | success | 264 + 10.5771 | | 941 + 10.5771 | no-pdf-link | 397 + 10.5771 | bad-redirect | 269 + 10.5771 | link-loop | 238 + 10.6068 | | 441 + 10.6068 | no-pdf-link | 384 + 10.6084 | | 917 + 10.6084 | no-pdf-link | 520 + 10.6084 | success | 368 + 10.7287 | | 234 + 10.7287 | no-pdf-link | 212 + 10.7312 | | 382 + 10.7312 | link-loop | 291 + 10.7554 | | 205 + 10.7891 | | 380 + 10.7891 | no-pdf-link | 376 + 10.7916 | | 331 + 10.7916 | no-pdf-link | 201 + 10.7939 | | 535 + 10.7939 | no-pdf-link | 527 + | | 272831 + | success | 62298 + | no-pdf-link | 60737 + | link-loop | 48558 + | redirect-loop | 26842 + | terminal-bad-status | 22685 + | spn2-error:too-many-redirects | 11174 + | forbidden | 10900 + | spn2-wayback-error | 7796 + | blocked-cookie | 6961 + | not-found | 5468 + | bad-redirect | 2666 + | spn2-error | 2398 + | spn2-cdx-lookup-failure | 1374 + | petabox-error | 678 + | remote-server-error | 461 + | wrong-mimetype | 443 + | spn2-error:proxy-error | 420 + | spn2-error:unknown | 360 +(323 rows) diff --git a/sql/ingest_stats/2020-11-16_weekly_ingest_terminal_domain.txt b/sql/ingest_stats/2020-11-16_weekly_ingest_terminal_domain.txt new file mode 100644 index 0000000..28dd0d0 --- /dev/null +++ b/sql/ingest_stats/2020-11-16_weekly_ingest_terminal_domain.txt @@ -0,0 +1,307 @@ + domain | status | count +-------------------------------------------------------------------+-------------------------------+-------- + 202.148.31.178 | | 298 + academic.oup.com | | 1624 + academic.oup.com | no-pdf-link | 673 + academic.oup.com | bad-redirect | 444 + academic.oup.com | link-loop | 358 + aip.scitation.org | | 257 + apps.crossref.org | | 1414 + apps.crossref.org | no-pdf-link | 1410 + article.sciencepublishinggroup.com | | 404 + article.sciencepublishinggroup.com | success | 404 + arxiv.org | | 24340 + arxiv.org | success | 22381 + arxiv.org | terminal-bad-status | 1260 + arxiv.org | no-pdf-link | 412 + arxiv.org | no-capture | 262 + ashpublications.org | | 2049 + ashpublications.org | no-pdf-link | 2024 + asmedigitalcollection.asme.org | | 1245 + asmedigitalcollection.asme.org | bad-redirect | 707 + assets.researchsquare.com | | 1549 + assets.researchsquare.com | success | 1546 + bioone.org | | 201 + biorxiv.org | redirect-loop | 702 + biorxiv.org | | 702 + blogs.ethz.ch | | 687 + blogs.ethz.ch | no-pdf-link | 686 + books.openedition.org | | 446 + books.openedition.org | redirect-loop | 382 + brill.com | | 2203 + brill.com | link-loop | 1779 + brill.com | success | 359 + catalog.paradisec.org.au | | 770 + catalog.paradisec.org.au | redirect-loop | 756 + cdr.lib.unc.edu | | 9432 + cdr.lib.unc.edu | redirect-loop | 5720 + cdr.lib.unc.edu | spn2-wayback-error | 3187 + cdr.lib.unc.edu | spn2-cdx-lookup-failure | 201 + classiques-garnier.com | | 246 + classiques-garnier.com | success | 243 + content.iospress.com | | 242 + content.taylorfrancis.com | | 309 + content.taylorfrancis.com | terminal-bad-status | 309 + curve.carleton.ca | success | 201 + curve.carleton.ca | | 201 + cyberdoi.ru | redirect-loop | 647 + cyberdoi.ru | | 647 + czasopisma.kul.pl | | 402 + czasopisma.kul.pl | success | 294 + d.lib.msu.edu | | 550 + d.lib.msu.edu | no-pdf-link | 550 + d197for5662m48.cloudfront.net | success | 276 + d197for5662m48.cloudfront.net | | 276 + dergipark.org.tr | | 674 + dergipark.org.tr | no-pdf-link | 255 + dergipark.org.tr | success | 248 + digi.ub.uni-heidelberg.de | no-pdf-link | 261 + digi.ub.uni-heidelberg.de | | 261 + dl.acm.org | | 441 + dl.acm.org | blocked-cookie | 361 + dlc.library.columbia.edu | | 201 + dlc.library.columbia.edu | no-pdf-link | 201 + doi.ala.org.au | | 308 + doi.ala.org.au | no-pdf-link | 308 + doi.org | | 474 + doi.org | terminal-bad-status | 344 + downloads.hindawi.com | | 479 + downloads.hindawi.com | success | 478 + edoc.rki.de | | 238 + edoc.unibas.ch | | 2018 + edoc.unibas.ch | success | 1067 + edoc.unibas.ch | terminal-bad-status | 817 + elib.spbstu.ru | | 205 + elifesciences.org | | 204 + era.library.ualberta.ca | | 531 + era.library.ualberta.ca | no-pdf-link | 527 + erj.ersjournals.com | | 951 + erj.ersjournals.com | redirect-loop | 829 + europepmc.org | | 289 + europepmc.org | success | 283 + figshare.com | | 233 + figshare.com | no-pdf-link | 208 + fjfsdata01prod.blob.core.windows.net | | 1430 + fjfsdata01prod.blob.core.windows.net | success | 1418 + hw.oeaw.ac.at | | 283 + hw.oeaw.ac.at | no-pdf-link | 283 + idb.ub.uni-tuebingen.de | | 216 + idb.ub.uni-tuebingen.de | terminal-bad-status | 215 + ieeexplore.ieee.org | | 7561 + ieeexplore.ieee.org | spn2-error:too-many-redirects | 6732 + ieeexplore.ieee.org | success | 683 + ijgc.bmj.com | | 411 + ijgc.bmj.com | success | 399 + jamanetwork.com | | 229 + jitc.bmj.com | | 849 + jitc.bmj.com | success | 773 + journals.aps.org | | 539 + journals.aps.org | not-found | 534 + journals.lww.com | | 1124 + journals.lww.com | no-pdf-link | 547 + journals.lww.com | link-loop | 399 + journals.openedition.org | | 7366 + journals.openedition.org | success | 3484 + journals.openedition.org | spn2-wayback-error | 2120 + journals.openedition.org | redirect-loop | 1720 + journals.plos.org | | 552 + journals.plos.org | success | 542 + kiss.kstudy.com | | 306 + kiss.kstudy.com | no-pdf-link | 292 + lib.dr.iastate.edu | | 297 + link.springer.com | | 2830 + link.springer.com | redirect-loop | 2625 + linkinghub.elsevier.com | | 970 + linkinghub.elsevier.com | forbidden | 415 + linkinghub.elsevier.com | spn2-error:too-many-redirects | 357 + medrxiv.org | | 287 + medrxiv.org | redirect-loop | 287 + muse.jhu.edu | | 470 + muse.jhu.edu | terminal-bad-status | 443 + ojs.ub.uni-konstanz.de | | 463 + ojs.ub.uni-konstanz.de | success | 462 + onlinelibrary.wiley.com | | 2064 + onlinelibrary.wiley.com | terminal-bad-status | 1973 + osf.io | | 1394 + osf.io | redirect-loop | 589 + osf.io | spn2-wayback-error | 425 + osf.io | not-found | 342 + othes.univie.ac.at | | 424 + othes.univie.ac.at | success | 424 + oxford.universitypressscholarship.com | | 8999 + oxford.universitypressscholarship.com | link-loop | 8282 + oxford.universitypressscholarship.com | no-pdf-link | 695 + oxfordhandbooks.com | redirect-loop | 460 + oxfordhandbooks.com | | 460 + papers.ssrn.com | | 1313 + papers.ssrn.com | link-loop | 1145 + peerj.com | | 313 + peerj.com | no-pdf-link | 212 + periodicos.urca.br | | 446 + periodicos.urca.br | success | 439 + pos.sissa.it | | 277 + pos.sissa.it | success | 262 + preprints.jmir.org | | 242 + pressto.amu.edu.pl | | 260 + pressto.amu.edu.pl | success | 232 + publikationsserver.tu-braunschweig.de | | 15358 + publikationsserver.tu-braunschweig.de | no-pdf-link | 15228 + publons.com | | 2810 + publons.com | redirect-loop | 2359 + publons.com | no-pdf-link | 444 + pubs.acs.org | | 1647 + pubs.acs.org | blocked-cookie | 1553 + pubs.rsc.org | | 765 + pubs.rsc.org | redirect-loop | 486 + pubs.rsc.org | spn2-wayback-error | 214 + res.mdpi.com | | 3620 + res.mdpi.com | success | 3591 + revistas.usal.es | | 580 + revistas.usal.es | success | 298 + revues.imist.ma | | 229 + rsdjournal.org | | 213 + rsdjournal.org | success | 212 + s3-eu-west-1.amazonaws.com | | 764 + s3-eu-west-1.amazonaws.com | success | 763 + s3-euw1-ap-pe-ws4-capi2-distribution-p.s3-eu-west-1.amazonaws.com | | 324 + s3-euw1-ap-pe-ws4-capi2-distribution-p.s3-eu-west-1.amazonaws.com | success | 324 + saspublishers.com | | 213 + saspublishers.com | success | 213 + scholarshare.temple.edu | | 524 + scholarshare.temple.edu | success | 464 + sol.sbc.org.br | | 268 + sol.sbc.org.br | success | 264 + statisticaldatasets.data-planet.com | | 442 + statisticaldatasets.data-planet.com | no-pdf-link | 390 + watermark.silverchair.com | | 521 + watermark.silverchair.com | success | 514 + www.ahajournals.org | | 1061 + www.ahajournals.org | blocked-cookie | 1011 + www.atlantis-press.com | | 308 + www.atlantis-press.com | spn2-wayback-error | 228 + www.beck-elibrary.de | | 1202 + www.beck-elibrary.de | no-pdf-link | 953 + www.beck-elibrary.de | bad-redirect | 249 + www.cairn.info | | 255 + www.cairn.info | redirect-loop | 208 + www.cambridge.org | | 2061 + www.cambridge.org | no-pdf-link | 727 + www.cambridge.org | success | 485 + www.cambridge.org | link-loop | 388 + www.cambridge.org | bad-redirect | 252 + www.confer.cz | | 227 + www.confer.cz | no-pdf-link | 226 + www.dbpia.co.kr | | 773 + www.dbpia.co.kr | no-pdf-link | 679 + www.degruyter.com | | 17046 + www.degruyter.com | link-loop | 14202 + www.degruyter.com | success | 2201 + www.degruyter.com | not-found | 235 + www.dovepress.com | | 316 + www.dovepress.com | success | 267 + www.e-manuscripta.ch | | 384 + www.e-manuscripta.ch | no-pdf-link | 383 + www.e-periodica.ch | | 358 + www.e-periodica.ch | no-pdf-link | 355 + www.e-rara.ch | no-pdf-link | 279 + www.e-rara.ch | | 279 + www.e3s-conferences.org | | 426 + www.e3s-conferences.org | success | 419 + www.elibrary.ru | | 303 + www.elibrary.ru | no-pdf-link | 301 + www.emerald.com | | 943 + www.emerald.com | no-pdf-link | 933 + www.etasr.com | | 466 + www.etasr.com | success | 466 + www.eurekaselect.com | | 345 + www.eurekaselect.com | no-pdf-link | 321 + www.europeanproceedings.com | | 218 + www.europeanproceedings.com | success | 218 + www.finersistemas.com | success | 397 + www.finersistemas.com | | 397 + www.humankineticslibrary.com | no-pdf-link | 321 + www.humankineticslibrary.com | | 321 + www.ijcmas.com | | 251 + www.ijcmas.com | no-pdf-link | 248 + www.inderscience.com | | 524 + www.inderscience.com | no-pdf-link | 501 + www.ingentaconnect.com | | 366 + www.ingentaconnect.com | no-pdf-link | 349 + www.jstage.jst.go.jp | | 1591 + www.jstage.jst.go.jp | success | 862 + www.jstage.jst.go.jp | no-pdf-link | 567 + www.jstor.org | | 351 + www.karger.com | | 224 + www.liebertpub.com | | 236 + www.liebertpub.com | blocked-cookie | 228 + www.mdpi.com | | 694 + www.mdpi.com | terminal-bad-status | 480 + www.medlit.ru | | 458 + www.medlit.ru | redirect-loop | 366 + www.morressier.com | | 285 + www.morressier.com | no-pdf-link | 253 + www.njca.info | | 223 + www.njca.info | remote-server-error | 222 + www.nomos-elibrary.de | | 913 + www.nomos-elibrary.de | no-pdf-link | 379 + www.nomos-elibrary.de | bad-redirect | 265 + www.nomos-elibrary.de | link-loop | 236 + www.onepetro.org | | 895 + www.onepetro.org | redirect-loop | 853 + www.osti.gov | | 212 + www.persee.fr | | 232 + www.persee.fr | terminal-bad-status | 213 + www.repository.cam.ac.uk | | 439 + www.research-collection.ethz.ch | | 312 + www.research-collection.ethz.ch | terminal-bad-status | 310 + www.revistas.ufg.br | | 212 + www.schoeningh.de | | 371 + www.schoeningh.de | link-loop | 366 + www.scialert.net | | 276 + www.scialert.net | redirect-loop | 254 + www.scielo.br | | 644 + www.scielo.br | success | 624 + www.sciencedirect.com | | 6523 + www.sciencedirect.com | no-pdf-link | 4668 + www.sciencedirect.com | terminal-bad-status | 1737 + www.scitepress.org | no-pdf-link | 397 + www.scitepress.org | | 397 + www.tandfonline.com | | 3448 + www.tandfonline.com | blocked-cookie | 2446 + www.tandfonline.com | terminal-bad-status | 714 + www.taylorfrancis.com | | 21292 + www.taylorfrancis.com | link-loop | 18648 + www.taylorfrancis.com | forbidden | 2022 + www.taylorfrancis.com | terminal-bad-status | 518 + www.thieme-connect.de | | 513 + www.thieme-connect.de | not-found | 292 + www.thieme-connect.de | redirect-loop | 213 + www.whateveryoneneedstoknow.com | | 1174 + www.whateveryoneneedstoknow.com | redirect-loop | 1163 + www.worldscientific.com | | 293 + www.worldscientific.com | blocked-cookie | 240 + www.zora.uzh.ch | | 290 + www.zora.uzh.ch | redirect-loop | 278 + zenodo.org | | 22202 + zenodo.org | terminal-bad-status | 12158 + zenodo.org | success | 4923 + zenodo.org | no-pdf-link | 4788 + | | 280719 + | success | 85143 + | no-pdf-link | 61335 + | link-loop | 48566 + | redirect-loop | 26845 + | terminal-bad-status | 23955 + | spn2-wayback-error | 7920 + | spn2-error:too-many-redirects | 7175 + | blocked-cookie | 6980 + | forbidden | 2912 + | bad-redirect | 2666 + | spn2-error | 1943 + | not-found | 1762 + | spn2-cdx-lookup-failure | 1376 + | wrong-mimetype | 467 + | remote-server-error | 388 + | spn2-error:proxy-error | 295 + | no-capture | 262 +(304 rows) diff --git a/sql/monitoring_queries.md b/sql/monitoring_queries.md index 1c872cc..cf3b190 100644 --- a/sql/monitoring_queries.md +++ b/sql/monitoring_queries.md @@ -61,9 +61,35 @@ Summary of significant domains and status, past 7 days: WHERE t1.domain != '' GROUP BY CUBE (domain, status) ) t2 - WHERE count > 500 + WHERE count > 200 ORDER BY domain ASC , count DESC; +Summary of DOI prefix and status, past 7 days: + + SELECT doi_prefix, status, count + FROM ( + SELECT doi_prefix, status, COUNT((doi_prefix, status)) as count + FROM ( + SELECT + ingest_file_result.ingest_type, + ingest_file_result.status, + substring(ingest_request.link_source_id FROM '(10\.[^/]*)/.*') AS doi_prefix + FROM ingest_file_result + LEFT JOIN ingest_request + ON ingest_file_result.ingest_type = ingest_request.ingest_type + AND ingest_file_result.base_url = ingest_request.base_url + WHERE + ingest_file_result.updated >= NOW() - '7 day'::INTERVAL + AND ingest_request.ingest_type = 'pdf' + AND ingest_request.ingest_request_source = 'fatcat-changelog' + AND ingest_request.link_source = 'doi' + ) t1 + WHERE t1.doi_prefix != '' + GROUP BY CUBE (doi_prefix, status) + ) t2 + WHERE count > 200 + ORDER BY doi_prefix ASC , count DESC; + Throughput per day, and success, for past 30 days: -- cgit v1.2.3