diff options
| author | Bryan Newbold <bnewbold@archive.org> | 2020-11-16 11:50:46 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@archive.org> | 2020-11-16 11:50:46 -0800 | 
| commit | 5537b666ad392fb13aa956ebff4e7aa0927b68ee (patch) | |
| tree | fbed416cf7036bec06d8275a6030c7ee809c2ba0 | |
| parent | 54bb5b6b7fb7a19aac7093a170e5b062f51e5a47 (diff) | |
| download | sandcrawler-5537b666ad392fb13aa956ebff4e7aa0927b68ee.tar.gz sandcrawler-5537b666ad392fb13aa956ebff4e7aa0927b68ee.zip | |
SQL: more ingest monitoring
| -rw-r--r-- | sql/ingest_stats/2020-11-16_weekly_ingest_doi_prefix.txt | 326 | ||||
| -rw-r--r-- | sql/ingest_stats/2020-11-16_weekly_ingest_terminal_domain.txt | 307 | ||||
| -rw-r--r-- | sql/monitoring_queries.md | 28 | 
3 files changed, 660 insertions, 1 deletions
| diff --git a/sql/ingest_stats/2020-11-16_weekly_ingest_doi_prefix.txt b/sql/ingest_stats/2020-11-16_weekly_ingest_doi_prefix.txt new file mode 100644 index 0000000..b684400 --- /dev/null +++ b/sql/ingest_stats/2020-11-16_weekly_ingest_doi_prefix.txt @@ -0,0 +1,326 @@ + doi_prefix |            status             | count   +------------+-------------------------------+-------- + 10.1001    |                               |    230 + 10.1002    |                               |   3914 + 10.1002    | terminal-bad-status           |   1540 + 10.1002    | forbidden                     |   1072 + 10.1002    | redirect-loop                 |    995 + 10.1002    | no-pdf-link                   |    210 + 10.1016    |                               |   7976 + 10.1016    | no-pdf-link                   |   4648 + 10.1016    | terminal-bad-status           |   1778 + 10.1016    | forbidden                     |    622 + 10.1016    | spn2-error:too-many-redirects |    344 + 10.1016    | redirect-loop                 |    225 + 10.1017    |                               |   2040 + 10.1017    | no-pdf-link                   |    720 + 10.1017    | success                       |    441 + 10.1017    | link-loop                     |    371 + 10.1017    | bad-redirect                  |    227 + 10.1021    |                               |   1722 + 10.1021    | blocked-cookie                |   1552 + 10.1029    |                               |    248 + 10.1039    |                               |   1160 + 10.1039    | redirect-loop                 |    486 + 10.1039    | spn2-error:too-many-redirects |    395 + 10.1039    | spn2-wayback-error            |    213 + 10.1051    |                               |    695 + 10.1051    | success                       |    557 + 10.1055    |                               |    541 + 10.1055    | not-found                     |    295 + 10.1055    | redirect-loop                 |    213 + 10.1057    |                               |   2835 + 10.1057    | redirect-loop                 |   2617 + 10.1061    |                               |    550 + 10.1061    | spn2-error:too-many-redirects |    425 + 10.1063    |                               |    600 + 10.1063    | spn2-error:too-many-redirects |    328 + 10.1080    |                               |   3801 + 10.1080    | blocked-cookie                |   2431 + 10.1080    | terminal-bad-status           |    711 + 10.1080    | forbidden                     |    341 + 10.1081    |                               |    299 + 10.1081    | link-loop                     |    222 + 10.1089    |                               |    236 + 10.1089    | blocked-cookie                |    228 + 10.1093    |                               |  12805 + 10.1093    | link-loop                     |   8627 + 10.1093    | redirect-loop                 |   1659 + 10.1093    | no-pdf-link                   |   1475 + 10.1093    | bad-redirect                  |    428 + 10.1093    | success                       |    391 + 10.1097    |                               |   1497 + 10.1097    | no-pdf-link                   |    503 + 10.1097    | link-loop                     |    346 + 10.1097    | spn2-error:too-many-redirects |    259 + 10.1097    | terminal-bad-status           |    202 + 10.1101    |                               |   1859 + 10.1101    | redirect-loop                 |    993 + 10.1101    | forbidden                     |    703 + 10.1103    |                               |    597 + 10.1103    | not-found                     |    534 + 10.1108    |                               |   1055 + 10.1108    | no-pdf-link                   |    945 + 10.1109    |                               |   7067 + 10.1109    | spn2-error:too-many-redirects |   6299 + 10.1109    | success                       |    667 + 10.1111    |                               |   2099 + 10.1111    | redirect-loop                 |   1331 + 10.1111    | terminal-bad-status           |    313 + 10.1111    | forbidden                     |    226 + 10.1115    |                               |   1278 + 10.1115    | bad-redirect                  |    707 + 10.1117    |                               |    561 + 10.1117    | spn2-error:too-many-redirects |    501 + 10.1126    |                               |    214 + 10.1136    |                               |   1989 + 10.1136    | success                       |   1463 + 10.1136    | link-loop                     |    294 + 10.1142    |                               |    300 + 10.1142    | blocked-cookie                |    237 + 10.1145    |                               |    440 + 10.1145    | blocked-cookie                |    354 + 10.1155    |                               |    480 + 10.1155    | success                       |    474 + 10.11588   |                               |    506 + 10.11588   | no-pdf-link                   |    264 + 10.11588   | success                       |    236 + 10.1159    |                               |    226 + 10.11606   |                               |    304 + 10.1161    |                               |   1142 + 10.1161    | blocked-cookie                |   1011 + 10.1163    |                               |   2261 + 10.1163    | link-loop                     |   1767 + 10.1163    | success                       |    348 + 10.11648   |                               |    405 + 10.11648   | success                       |    404 + 10.1182    |                               |   2125 + 10.1182    | no-pdf-link                   |   2024 + 10.1183    |                               |    987 + 10.1183    | redirect-loop                 |    838 + 10.1186    |                               |   1481 + 10.1186    | success                       |   1412 + 10.1201    |                               |   7649 + 10.1201    | link-loop                     |   5383 + 10.1201    | forbidden                     |   1504 + 10.1201    | no-pdf-link                   |    312 + 10.1299    |                               |    264 + 10.1299    | no-pdf-link                   |    209 + 10.13134   |                               |    201 + 10.1353    |                               |    549 + 10.1353    | terminal-bad-status           |    443 + 10.1371    |                               |    552 + 10.1371    | success                       |    542 + 10.14201   |                               |    656 + 10.14201   | success                       |    366 + 10.14361   |                               |    647 + 10.14361   | link-loop                     |    585 + 10.14746   |                               |    260 + 10.14746   | success                       |    232 + 10.1504    |                               |    527 + 10.1504    | no-pdf-link                   |    501 + 10.15122   |                               |    246 + 10.15122   | success                       |    243 + 10.1515    |                               |  16240 + 10.1515    | link-loop                     |  12589 + 10.1515    | success                       |   1941 + 10.1515    | no-pdf-link                   |   1008 + 10.1515    | not-found                     |    283 + 10.15405   |                               |    229 + 10.15405   | success                       |    218 + 10.1553    |                               |    418 + 10.1553    | no-pdf-link                   |    396 + 10.1590    |                               |    655 + 10.1590    | success                       |    623 + 10.17104   |                               |   1202 + 10.17104   | no-pdf-link                   |    953 + 10.17104   | bad-redirect                  |    249 + 10.17605   |                               |    368 + 10.17605   | not-found                     |    337 + 10.17615   |                               |   9401 + 10.17615   | redirect-loop                 |   5720 + 10.17615   | spn2-wayback-error            |   3099 + 10.17615   | spn2-cdx-lookup-failure       |    201 + 10.17863   |                               |    438 + 10.18148   |                               |    465 + 10.18148   | success                       |    462 + 10.18720   |                               |    210 + 10.18821   |                               |    476 + 10.18821   | redirect-loop                 |    366 + 10.20345   |                               |    222 + 10.20345   | terminal-bad-status           |    215 + 10.20546   |                               |    244 + 10.20546   | no-pdf-link                   |    241 + 10.21037   |                               |    232 + 10.2118    |                               |    903 + 10.2118    | redirect-loop                 |    853 + 10.21203   |                               |   1824 + 10.21203   | success                       |   1545 + 10.2139    |                               |   1493 + 10.2139    | link-loop                     |   1145 + 10.2147    |                               |    318 + 10.2147    | success                       |    267 + 10.2172    |                               |    282 + 10.2174    |                               |    363 + 10.2174    | no-pdf-link                   |    320 + 10.2196    |                               |    265 + 10.2208    |                               |    299 + 10.22215   |                               |    218 + 10.22215   | success                       |    217 + 10.22323   |                               |    289 + 10.22323   | success                       |    262 + 10.22533   |                               |    395 + 10.22533   | success                       |    393 + 10.22541   |                               |    291 + 10.22541   | success                       |    275 + 10.23919   |                               |    426 + 10.23919   | spn2-error:too-many-redirects |    403 + 10.24034   |                               |    319 + 10.24034   | spn2-error                    |    203 + 10.24355   |                               |  15360 + 10.24355   | no-pdf-link                   |  15228 + 10.24411   |                               |   1506 + 10.24411   | forbidden                     |    823 + 10.24411   | redirect-loop                 |    647 + 10.25335   |                               |    550 + 10.25335   | no-pdf-link                   |    550 + 10.25365   |                               |    429 + 10.25365   | success                       |    424 + 10.25384   |                               |    338 + 10.25384   | success                       |    249 + 10.25646   |                               |    239 + 10.26197   | no-pdf-link                   |    303 + 10.26197   |                               |    303 + 10.26226   |                               |    272 + 10.26278   |                               |   1291 + 10.26278   | redirect-loop                 |    756 + 10.26278   | spn2-error:too-many-redirects |    509 + 10.29327   |                               |    232 + 10.2991    |                               |    307 + 10.2991    | spn2-wayback-error            |    227 + 10.30965   |                               |    722 + 10.30965   | link-loop                     |    709 + 10.3109    |                               |    801 + 10.3109    | link-loop                     |    572 + 10.3109    | forbidden                     |    228 + 10.31219   |                               |    951 + 10.31219   | redirect-loop                 |    518 + 10.31219   | spn2-wayback-error            |    356 + 10.31274   |                               |    296 + 10.31743   |                               |    403 + 10.31743   | success                       |    294 + 10.31857   |                               |    209 + 10.3233    |                               |    471 + 10.33448   |                               |    213 + 10.33448   | success                       |    212 + 10.3389    |                               |   1459 + 10.3389    | success                       |   1417 + 10.3390    |                               |   4511 + 10.3390    | success                       |   3577 + 10.3390    | terminal-bad-status           |    485 + 10.3390    | forbidden                     |    379 + 10.3406    |                               |    243 + 10.3406    | terminal-bad-status           |    213 + 10.34944   |                               |    527 + 10.34944   | success                       |    459 + 10.35016   |                               |    688 + 10.35016   | no-pdf-link                   |    687 + 10.36347   | success                       |    213 + 10.36347   |                               |    213 + 10.37747   |                               |    213 + 10.37747   | no-pdf-link                   |    213 + 10.37904   |                               |    227 + 10.37904   | no-pdf-link                   |    226 + 10.3917    |                               |    347 + 10.3917    | redirect-loop                 |    208 + 10.3923    |                               |    356 + 10.3923    | redirect-loop                 |    254 + 10.3929    |                               |    317 + 10.3929    | terminal-bad-status           |    310 + 10.3931    |                               |    279 + 10.3931    | no-pdf-link                   |    279 + 10.4000    |                               |   7828 + 10.4000    | success                       |   3485 + 10.4000    | spn2-wayback-error            |   2142 + 10.4000    | redirect-loop                 |   2106 + 10.4018    |                               |    249 + 10.4018    | not-found                     |    240 + 10.4103    |                               |    726 + 10.4103    | remote-server-error           |    343 + 10.4103    | redirect-loop                 |    324 + 10.4159    |                               |    286 + 10.4159    | link-loop                     |    238 + 10.4324    |                               |  19398 + 10.4324    | link-loop                     |  12471 + 10.4324    | forbidden                     |   3632 + 10.4324    | not-found                     |   2283 + 10.4324    | terminal-bad-status           |    645 + 10.4324    | success                       |    208 + 10.47295   |                               |    456 + 10.47295   | success                       |    449 + 10.47513   |                               |    218 + 10.47513   | no-pdf-link                   |    203 + 10.48084   | success                       |    538 + 10.48084   |                               |    538 + 10.5040    |                               |    375 + 10.5040    | no-pdf-link                   |    365 + 10.5167    |                               |    290 + 10.5167    | redirect-loop                 |    278 + 10.5169    |                               |    360 + 10.5169    | no-pdf-link                   |    355 + 10.5194    |                               |    917 + 10.5194    | success                       |    887 + 10.5216    |                               |    213 + 10.5220    | no-pdf-link                   |    397 + 10.5220    |                               |    397 + 10.5281    |                               |  22551 + 10.5281    | terminal-bad-status           |  12158 + 10.5281    | success                       |   4901 + 10.5281    | no-pdf-link                   |   4754 + 10.5281    | spn2-error:unknown            |    360 + 10.5282    |                               |    228 + 10.5451    |                               |   2068 + 10.5451    | success                       |   1071 + 10.5451    | terminal-bad-status           |    817 + 10.5753    |                               |    268 + 10.5753    | success                       |    264 + 10.5771    |                               |    941 + 10.5771    | no-pdf-link                   |    397 + 10.5771    | bad-redirect                  |    269 + 10.5771    | link-loop                     |    238 + 10.6068    |                               |    441 + 10.6068    | no-pdf-link                   |    384 + 10.6084    |                               |    917 + 10.6084    | no-pdf-link                   |    520 + 10.6084    | success                       |    368 + 10.7287    |                               |    234 + 10.7287    | no-pdf-link                   |    212 + 10.7312    |                               |    382 + 10.7312    | link-loop                     |    291 + 10.7554    |                               |    205 + 10.7891    |                               |    380 + 10.7891    | no-pdf-link                   |    376 + 10.7916    |                               |    331 + 10.7916    | no-pdf-link                   |    201 + 10.7939    |                               |    535 + 10.7939    | no-pdf-link                   |    527 +            |                               | 272831 +            | success                       |  62298 +            | no-pdf-link                   |  60737 +            | link-loop                     |  48558 +            | redirect-loop                 |  26842 +            | terminal-bad-status           |  22685 +            | spn2-error:too-many-redirects |  11174 +            | forbidden                     |  10900 +            | spn2-wayback-error            |   7796 +            | blocked-cookie                |   6961 +            | not-found                     |   5468 +            | bad-redirect                  |   2666 +            | spn2-error                    |   2398 +            | spn2-cdx-lookup-failure       |   1374 +            | petabox-error                 |    678 +            | remote-server-error           |    461 +            | wrong-mimetype                |    443 +            | spn2-error:proxy-error        |    420 +            | spn2-error:unknown            |    360 +(323 rows) diff --git a/sql/ingest_stats/2020-11-16_weekly_ingest_terminal_domain.txt b/sql/ingest_stats/2020-11-16_weekly_ingest_terminal_domain.txt new file mode 100644 index 0000000..28dd0d0 --- /dev/null +++ b/sql/ingest_stats/2020-11-16_weekly_ingest_terminal_domain.txt @@ -0,0 +1,307 @@ +                              domain                               |            status             | count   +-------------------------------------------------------------------+-------------------------------+-------- + 202.148.31.178                                                    |                               |    298 + academic.oup.com                                                  |                               |   1624 + academic.oup.com                                                  | no-pdf-link                   |    673 + academic.oup.com                                                  | bad-redirect                  |    444 + academic.oup.com                                                  | link-loop                     |    358 + aip.scitation.org                                                 |                               |    257 + apps.crossref.org                                                 |                               |   1414 + apps.crossref.org                                                 | no-pdf-link                   |   1410 + article.sciencepublishinggroup.com                                |                               |    404 + article.sciencepublishinggroup.com                                | success                       |    404 + arxiv.org                                                         |                               |  24340 + arxiv.org                                                         | success                       |  22381 + arxiv.org                                                         | terminal-bad-status           |   1260 + arxiv.org                                                         | no-pdf-link                   |    412 + arxiv.org                                                         | no-capture                    |    262 + ashpublications.org                                               |                               |   2049 + ashpublications.org                                               | no-pdf-link                   |   2024 + asmedigitalcollection.asme.org                                    |                               |   1245 + asmedigitalcollection.asme.org                                    | bad-redirect                  |    707 + assets.researchsquare.com                                         |                               |   1549 + assets.researchsquare.com                                         | success                       |   1546 + bioone.org                                                        |                               |    201 + biorxiv.org                                                       | redirect-loop                 |    702 + biorxiv.org                                                       |                               |    702 + blogs.ethz.ch                                                     |                               |    687 + blogs.ethz.ch                                                     | no-pdf-link                   |    686 + books.openedition.org                                             |                               |    446 + books.openedition.org                                             | redirect-loop                 |    382 + brill.com                                                         |                               |   2203 + brill.com                                                         | link-loop                     |   1779 + brill.com                                                         | success                       |    359 + catalog.paradisec.org.au                                          |                               |    770 + catalog.paradisec.org.au                                          | redirect-loop                 |    756 + cdr.lib.unc.edu                                                   |                               |   9432 + cdr.lib.unc.edu                                                   | redirect-loop                 |   5720 + cdr.lib.unc.edu                                                   | spn2-wayback-error            |   3187 + cdr.lib.unc.edu                                                   | spn2-cdx-lookup-failure       |    201 + classiques-garnier.com                                            |                               |    246 + classiques-garnier.com                                            | success                       |    243 + content.iospress.com                                              |                               |    242 + content.taylorfrancis.com                                         |                               |    309 + content.taylorfrancis.com                                         | terminal-bad-status           |    309 + curve.carleton.ca                                                 | success                       |    201 + curve.carleton.ca                                                 |                               |    201 + cyberdoi.ru                                                       | redirect-loop                 |    647 + cyberdoi.ru                                                       |                               |    647 + czasopisma.kul.pl                                                 |                               |    402 + czasopisma.kul.pl                                                 | success                       |    294 + d.lib.msu.edu                                                     |                               |    550 + d.lib.msu.edu                                                     | no-pdf-link                   |    550 + d197for5662m48.cloudfront.net                                     | success                       |    276 + d197for5662m48.cloudfront.net                                     |                               |    276 + dergipark.org.tr                                                  |                               |    674 + dergipark.org.tr                                                  | no-pdf-link                   |    255 + dergipark.org.tr                                                  | success                       |    248 + digi.ub.uni-heidelberg.de                                         | no-pdf-link                   |    261 + digi.ub.uni-heidelberg.de                                         |                               |    261 + dl.acm.org                                                        |                               |    441 + dl.acm.org                                                        | blocked-cookie                |    361 + dlc.library.columbia.edu                                          |                               |    201 + dlc.library.columbia.edu                                          | no-pdf-link                   |    201 + doi.ala.org.au                                                    |                               |    308 + doi.ala.org.au                                                    | no-pdf-link                   |    308 + doi.org                                                           |                               |    474 + doi.org                                                           | terminal-bad-status           |    344 + downloads.hindawi.com                                             |                               |    479 + downloads.hindawi.com                                             | success                       |    478 + edoc.rki.de                                                       |                               |    238 + edoc.unibas.ch                                                    |                               |   2018 + edoc.unibas.ch                                                    | success                       |   1067 + edoc.unibas.ch                                                    | terminal-bad-status           |    817 + elib.spbstu.ru                                                    |                               |    205 + elifesciences.org                                                 |                               |    204 + era.library.ualberta.ca                                           |                               |    531 + era.library.ualberta.ca                                           | no-pdf-link                   |    527 + erj.ersjournals.com                                               |                               |    951 + erj.ersjournals.com                                               | redirect-loop                 |    829 + europepmc.org                                                     |                               |    289 + europepmc.org                                                     | success                       |    283 + figshare.com                                                      |                               |    233 + figshare.com                                                      | no-pdf-link                   |    208 + fjfsdata01prod.blob.core.windows.net                              |                               |   1430 + fjfsdata01prod.blob.core.windows.net                              | success                       |   1418 + hw.oeaw.ac.at                                                     |                               |    283 + hw.oeaw.ac.at                                                     | no-pdf-link                   |    283 + idb.ub.uni-tuebingen.de                                           |                               |    216 + idb.ub.uni-tuebingen.de                                           | terminal-bad-status           |    215 + ieeexplore.ieee.org                                               |                               |   7561 + ieeexplore.ieee.org                                               | spn2-error:too-many-redirects |   6732 + ieeexplore.ieee.org                                               | success                       |    683 + ijgc.bmj.com                                                      |                               |    411 + ijgc.bmj.com                                                      | success                       |    399 + jamanetwork.com                                                   |                               |    229 + jitc.bmj.com                                                      |                               |    849 + jitc.bmj.com                                                      | success                       |    773 + journals.aps.org                                                  |                               |    539 + journals.aps.org                                                  | not-found                     |    534 + journals.lww.com                                                  |                               |   1124 + journals.lww.com                                                  | no-pdf-link                   |    547 + journals.lww.com                                                  | link-loop                     |    399 + journals.openedition.org                                          |                               |   7366 + journals.openedition.org                                          | success                       |   3484 + journals.openedition.org                                          | spn2-wayback-error            |   2120 + journals.openedition.org                                          | redirect-loop                 |   1720 + journals.plos.org                                                 |                               |    552 + journals.plos.org                                                 | success                       |    542 + kiss.kstudy.com                                                   |                               |    306 + kiss.kstudy.com                                                   | no-pdf-link                   |    292 + lib.dr.iastate.edu                                                |                               |    297 + link.springer.com                                                 |                               |   2830 + link.springer.com                                                 | redirect-loop                 |   2625 + linkinghub.elsevier.com                                           |                               |    970 + linkinghub.elsevier.com                                           | forbidden                     |    415 + linkinghub.elsevier.com                                           | spn2-error:too-many-redirects |    357 + medrxiv.org                                                       |                               |    287 + medrxiv.org                                                       | redirect-loop                 |    287 + muse.jhu.edu                                                      |                               |    470 + muse.jhu.edu                                                      | terminal-bad-status           |    443 + ojs.ub.uni-konstanz.de                                            |                               |    463 + ojs.ub.uni-konstanz.de                                            | success                       |    462 + onlinelibrary.wiley.com                                           |                               |   2064 + onlinelibrary.wiley.com                                           | terminal-bad-status           |   1973 + osf.io                                                            |                               |   1394 + osf.io                                                            | redirect-loop                 |    589 + osf.io                                                            | spn2-wayback-error            |    425 + osf.io                                                            | not-found                     |    342 + othes.univie.ac.at                                                |                               |    424 + othes.univie.ac.at                                                | success                       |    424 + oxford.universitypressscholarship.com                             |                               |   8999 + oxford.universitypressscholarship.com                             | link-loop                     |   8282 + oxford.universitypressscholarship.com                             | no-pdf-link                   |    695 + oxfordhandbooks.com                                               | redirect-loop                 |    460 + oxfordhandbooks.com                                               |                               |    460 + papers.ssrn.com                                                   |                               |   1313 + papers.ssrn.com                                                   | link-loop                     |   1145 + peerj.com                                                         |                               |    313 + peerj.com                                                         | no-pdf-link                   |    212 + periodicos.urca.br                                                |                               |    446 + periodicos.urca.br                                                | success                       |    439 + pos.sissa.it                                                      |                               |    277 + pos.sissa.it                                                      | success                       |    262 + preprints.jmir.org                                                |                               |    242 + pressto.amu.edu.pl                                                |                               |    260 + pressto.amu.edu.pl                                                | success                       |    232 + publikationsserver.tu-braunschweig.de                             |                               |  15358 + publikationsserver.tu-braunschweig.de                             | no-pdf-link                   |  15228 + publons.com                                                       |                               |   2810 + publons.com                                                       | redirect-loop                 |   2359 + publons.com                                                       | no-pdf-link                   |    444 + pubs.acs.org                                                      |                               |   1647 + pubs.acs.org                                                      | blocked-cookie                |   1553 + pubs.rsc.org                                                      |                               |    765 + pubs.rsc.org                                                      | redirect-loop                 |    486 + pubs.rsc.org                                                      | spn2-wayback-error            |    214 + res.mdpi.com                                                      |                               |   3620 + res.mdpi.com                                                      | success                       |   3591 + revistas.usal.es                                                  |                               |    580 + revistas.usal.es                                                  | success                       |    298 + revues.imist.ma                                                   |                               |    229 + rsdjournal.org                                                    |                               |    213 + rsdjournal.org                                                    | success                       |    212 + s3-eu-west-1.amazonaws.com                                        |                               |    764 + s3-eu-west-1.amazonaws.com                                        | success                       |    763 + s3-euw1-ap-pe-ws4-capi2-distribution-p.s3-eu-west-1.amazonaws.com |                               |    324 + s3-euw1-ap-pe-ws4-capi2-distribution-p.s3-eu-west-1.amazonaws.com | success                       |    324 + saspublishers.com                                                 |                               |    213 + saspublishers.com                                                 | success                       |    213 + scholarshare.temple.edu                                           |                               |    524 + scholarshare.temple.edu                                           | success                       |    464 + sol.sbc.org.br                                                    |                               |    268 + sol.sbc.org.br                                                    | success                       |    264 + statisticaldatasets.data-planet.com                               |                               |    442 + statisticaldatasets.data-planet.com                               | no-pdf-link                   |    390 + watermark.silverchair.com                                         |                               |    521 + watermark.silverchair.com                                         | success                       |    514 + www.ahajournals.org                                               |                               |   1061 + www.ahajournals.org                                               | blocked-cookie                |   1011 + www.atlantis-press.com                                            |                               |    308 + www.atlantis-press.com                                            | spn2-wayback-error            |    228 + www.beck-elibrary.de                                              |                               |   1202 + www.beck-elibrary.de                                              | no-pdf-link                   |    953 + www.beck-elibrary.de                                              | bad-redirect                  |    249 + www.cairn.info                                                    |                               |    255 + www.cairn.info                                                    | redirect-loop                 |    208 + www.cambridge.org                                                 |                               |   2061 + www.cambridge.org                                                 | no-pdf-link                   |    727 + www.cambridge.org                                                 | success                       |    485 + www.cambridge.org                                                 | link-loop                     |    388 + www.cambridge.org                                                 | bad-redirect                  |    252 + www.confer.cz                                                     |                               |    227 + www.confer.cz                                                     | no-pdf-link                   |    226 + www.dbpia.co.kr                                                   |                               |    773 + www.dbpia.co.kr                                                   | no-pdf-link                   |    679 + www.degruyter.com                                                 |                               |  17046 + www.degruyter.com                                                 | link-loop                     |  14202 + www.degruyter.com                                                 | success                       |   2201 + www.degruyter.com                                                 | not-found                     |    235 + www.dovepress.com                                                 |                               |    316 + www.dovepress.com                                                 | success                       |    267 + www.e-manuscripta.ch                                              |                               |    384 + www.e-manuscripta.ch                                              | no-pdf-link                   |    383 + www.e-periodica.ch                                                |                               |    358 + www.e-periodica.ch                                                | no-pdf-link                   |    355 + www.e-rara.ch                                                     | no-pdf-link                   |    279 + www.e-rara.ch                                                     |                               |    279 + www.e3s-conferences.org                                           |                               |    426 + www.e3s-conferences.org                                           | success                       |    419 + www.elibrary.ru                                                   |                               |    303 + www.elibrary.ru                                                   | no-pdf-link                   |    301 + www.emerald.com                                                   |                               |    943 + www.emerald.com                                                   | no-pdf-link                   |    933 + www.etasr.com                                                     |                               |    466 + www.etasr.com                                                     | success                       |    466 + www.eurekaselect.com                                              |                               |    345 + www.eurekaselect.com                                              | no-pdf-link                   |    321 + www.europeanproceedings.com                                       |                               |    218 + www.europeanproceedings.com                                       | success                       |    218 + www.finersistemas.com                                             | success                       |    397 + www.finersistemas.com                                             |                               |    397 + www.humankineticslibrary.com                                      | no-pdf-link                   |    321 + www.humankineticslibrary.com                                      |                               |    321 + www.ijcmas.com                                                    |                               |    251 + www.ijcmas.com                                                    | no-pdf-link                   |    248 + www.inderscience.com                                              |                               |    524 + www.inderscience.com                                              | no-pdf-link                   |    501 + www.ingentaconnect.com                                            |                               |    366 + www.ingentaconnect.com                                            | no-pdf-link                   |    349 + www.jstage.jst.go.jp                                              |                               |   1591 + www.jstage.jst.go.jp                                              | success                       |    862 + www.jstage.jst.go.jp                                              | no-pdf-link                   |    567 + www.jstor.org                                                     |                               |    351 + www.karger.com                                                    |                               |    224 + www.liebertpub.com                                                |                               |    236 + www.liebertpub.com                                                | blocked-cookie                |    228 + www.mdpi.com                                                      |                               |    694 + www.mdpi.com                                                      | terminal-bad-status           |    480 + www.medlit.ru                                                     |                               |    458 + www.medlit.ru                                                     | redirect-loop                 |    366 + www.morressier.com                                                |                               |    285 + www.morressier.com                                                | no-pdf-link                   |    253 + www.njca.info                                                     |                               |    223 + www.njca.info                                                     | remote-server-error           |    222 + www.nomos-elibrary.de                                             |                               |    913 + www.nomos-elibrary.de                                             | no-pdf-link                   |    379 + www.nomos-elibrary.de                                             | bad-redirect                  |    265 + www.nomos-elibrary.de                                             | link-loop                     |    236 + www.onepetro.org                                                  |                               |    895 + www.onepetro.org                                                  | redirect-loop                 |    853 + www.osti.gov                                                      |                               |    212 + www.persee.fr                                                     |                               |    232 + www.persee.fr                                                     | terminal-bad-status           |    213 + www.repository.cam.ac.uk                                          |                               |    439 + www.research-collection.ethz.ch                                   |                               |    312 + www.research-collection.ethz.ch                                   | terminal-bad-status           |    310 + www.revistas.ufg.br                                               |                               |    212 + www.schoeningh.de                                                 |                               |    371 + www.schoeningh.de                                                 | link-loop                     |    366 + www.scialert.net                                                  |                               |    276 + www.scialert.net                                                  | redirect-loop                 |    254 + www.scielo.br                                                     |                               |    644 + www.scielo.br                                                     | success                       |    624 + www.sciencedirect.com                                             |                               |   6523 + www.sciencedirect.com                                             | no-pdf-link                   |   4668 + www.sciencedirect.com                                             | terminal-bad-status           |   1737 + www.scitepress.org                                                | no-pdf-link                   |    397 + www.scitepress.org                                                |                               |    397 + www.tandfonline.com                                               |                               |   3448 + www.tandfonline.com                                               | blocked-cookie                |   2446 + www.tandfonline.com                                               | terminal-bad-status           |    714 + www.taylorfrancis.com                                             |                               |  21292 + www.taylorfrancis.com                                             | link-loop                     |  18648 + www.taylorfrancis.com                                             | forbidden                     |   2022 + www.taylorfrancis.com                                             | terminal-bad-status           |    518 + www.thieme-connect.de                                             |                               |    513 + www.thieme-connect.de                                             | not-found                     |    292 + www.thieme-connect.de                                             | redirect-loop                 |    213 + www.whateveryoneneedstoknow.com                                   |                               |   1174 + www.whateveryoneneedstoknow.com                                   | redirect-loop                 |   1163 + www.worldscientific.com                                           |                               |    293 + www.worldscientific.com                                           | blocked-cookie                |    240 + www.zora.uzh.ch                                                   |                               |    290 + www.zora.uzh.ch                                                   | redirect-loop                 |    278 + zenodo.org                                                        |                               |  22202 + zenodo.org                                                        | terminal-bad-status           |  12158 + zenodo.org                                                        | success                       |   4923 + zenodo.org                                                        | no-pdf-link                   |   4788 +                                                                   |                               | 280719 +                                                                   | success                       |  85143 +                                                                   | no-pdf-link                   |  61335 +                                                                   | link-loop                     |  48566 +                                                                   | redirect-loop                 |  26845 +                                                                   | terminal-bad-status           |  23955 +                                                                   | spn2-wayback-error            |   7920 +                                                                   | spn2-error:too-many-redirects |   7175 +                                                                   | blocked-cookie                |   6980 +                                                                   | forbidden                     |   2912 +                                                                   | bad-redirect                  |   2666 +                                                                   | spn2-error                    |   1943 +                                                                   | not-found                     |   1762 +                                                                   | spn2-cdx-lookup-failure       |   1376 +                                                                   | wrong-mimetype                |    467 +                                                                   | remote-server-error           |    388 +                                                                   | spn2-error:proxy-error        |    295 +                                                                   | no-capture                    |    262 +(304 rows) diff --git a/sql/monitoring_queries.md b/sql/monitoring_queries.md index 1c872cc..cf3b190 100644 --- a/sql/monitoring_queries.md +++ b/sql/monitoring_queries.md @@ -61,9 +61,35 @@ Summary of significant domains and status, past 7 days:          WHERE t1.domain != ''          GROUP BY CUBE (domain, status)      ) t2 -    WHERE count > 500 +    WHERE count > 200      ORDER BY domain ASC , count DESC; +Summary of DOI prefix and status, past 7 days: + +    SELECT doi_prefix, status, count +    FROM ( +        SELECT doi_prefix, status, COUNT((doi_prefix, status)) as count +        FROM ( +            SELECT +                ingest_file_result.ingest_type, +                ingest_file_result.status, +                substring(ingest_request.link_source_id FROM '(10\.[^/]*)/.*') AS doi_prefix +            FROM ingest_file_result +            LEFT JOIN ingest_request +                ON ingest_file_result.ingest_type = ingest_request.ingest_type +                AND ingest_file_result.base_url = ingest_request.base_url +            WHERE +                ingest_file_result.updated >= NOW() - '7 day'::INTERVAL +                AND ingest_request.ingest_type = 'pdf' +                AND ingest_request.ingest_request_source = 'fatcat-changelog' +                AND ingest_request.link_source = 'doi' +        ) t1 +        WHERE t1.doi_prefix != '' +        GROUP BY CUBE (doi_prefix, status) +    ) t2 +    WHERE count > 200 +    ORDER BY doi_prefix ASC , count DESC; +  Throughput per day, and success, for past 30 days: | 
