diff options
Diffstat (limited to 'extra')
| -rw-r--r-- | extra/elasticsearch/elastic_queries.txt | 224 | ||||
| -rw-r--r-- | extra/elasticsearch/kibana_dashboards.json | 247 | ||||
| -rw-r--r-- | extra/elasticsearch/stats.py | 174 | 
3 files changed, 645 insertions, 0 deletions
| diff --git a/extra/elasticsearch/elastic_queries.txt b/extra/elasticsearch/elastic_queries.txt new file mode 100644 index 00000000..91bff199 --- /dev/null +++ b/extra/elasticsearch/elastic_queries.txt @@ -0,0 +1,224 @@ + +GET /fatcat_release/_search?request_cache=true +{ +  "size": 0, +  "aggs": { +    "popular_journals": { +      "terms": { +        "field": "container_issnl" +      } +    } +  } +} + +GET /fatcat_release/_count +{ +  "query": { +    "term": {"container_issnl": "1932-6203"} +  } +} + +"quick counts" +
GET /fatcat_container/_count + +GET /fatcat_release/_count + +GET /fatcat_release/_count +{ +  "query": { +    "terms": { +      "release_type": ["article-journal", "chapter", "paper-conference", "thesis"] +    } +  } +} + +"in-scope works on web (fulltext)": +GET /fatcat_release/_count +{ +  "query": { +    "bool": { +      "filter": [ +        { +          "terms": { +            "release_type": [ +              "article-journal", +              "chapter", +              "paper-conference", +              "thesis" +            ] +          } +        }, +        { +          "term": { +            "in_web": "true" +          } +        } +      ] +    } +  } +} + +"in-scope OA" +GET /fatcat_release/_count +{ +  "query": { +    "bool": { +      "filter": [ +        { +          "terms": { +            "release_type": [ +              "article-journal", +              "chapter", +              "paper-conference", +              "thesis" +            ] +          } +        }, +        { +          "term": { +            "is_oa": "true" +          } +        } +      ] +    } +  } +} + +"not in KBART, in web" +GET /fatcat_release/_count +{ +  "query": { +    "bool": { +      "filter": [ +        { +          "terms": { +            "release_type": [ +              "article-journal", +              "chapter", +              "paper-conference", +              "thesis" +            ] +          } +        }, +        { +          "term": { +            "in_kbart": "false" +          } +        }, +        { +          "term": { +            "in_web": "true" +          } +        } +      ] +    } +  } +} + +GET /fatcat_release/_search?request_cache=true +{ +  "size": 0, +  "aggs": { +    "release_ref_count": { +      "sum": { +        "field": "ref_count" +      } +    } +  } +} + +##### + +GET /fatcat_release/_search?request_cache=true +{ +  "size": 0, +  "aggs": { +    "release_ref_count": { +      "sum": { +        "field": "ref_count" +      } +    } +  } +} + +GET /fatcat_release/_search?request_cache=true +{ +  "size": 0, +  "query": { +    "terms": { +      "release_type": [ +        "article-journal", +        "chapter", +        "paper-conference", +        "thesis" +      ] +    } +  }, +  "aggs": { +    "paper_like": { +      "filters": { +        "filters": { +          "in_web": { +            "term": { +              "in_web": "true" +            } +          }, +          "is_oa": { +            "term": { +              "is_oa": "true" +            } +          }, +          "in_kbart": { +            "term": { +              "in_kbart": "true" +            } +          }, +          "in_web_not_kbart": { +            "bool": { +              "filter": [ +                { +                  "term": { +                    "in_web": "true" +                  } +                }, +                { +                  "term": { +                    "in_kbart": "false" +                  } +                } +              ] +            } +          } +        } +      } +    } +  } +} + + +################# +"search inside a container": + +GET /fatcat_release/_search?request_cache=true +{ +    "query": { +      "bool": { +        "must": { +          "query_string": { +            "query": "blood", +            "default_operator": "AND", +            "analyze_wildcard": "true", +            "lenient": "true", +            "fields": ["title^5", "contrib_names^2"] +          } +        }, +        "filter": { +          "term": { +            "container_issnl": "1932-6203" +          } +        } +      } +    } +} + + diff --git a/extra/elasticsearch/kibana_dashboards.json b/extra/elasticsearch/kibana_dashboards.json new file mode 100644 index 00000000..654497e4 --- /dev/null +++ b/extra/elasticsearch/kibana_dashboards.json @@ -0,0 +1,247 @@ +[ +  { +    "_id": "cc1da1c0-c054-11e8-85cd-77149200218b", +    "_type": "dashboard", +    "_source": { +      "title": "Generic Demo-y Dashboard", +      "hits": 0, +      "description": "Just for fun!", +      "panelsJSON": "[{\"embeddableConfig\":{\"vis\":{\"colors\":{\"false\":\"#E24D42\",\"true\":\"#629E51\"},\"legendOpen\":false}},\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":15,\"i\":\"3\"},\"id\":\"acd4e760-c054-11e8-85cd-77149200218b\",\"panelIndex\":\"3\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"embeddableConfig\":{},\"gridData\":{\"x\":0,\"y\":15,\"w\":24,\"h\":15,\"i\":\"6\"},\"id\":\"373d7a80-c13a-11e8-b25a-833364b3ed98\",\"panelIndex\":\"6\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"embeddableConfig\":{},\"gridData\":{\"x\":24,\"y\":21,\"w\":24,\"h\":7,\"i\":\"7\"},\"id\":\"e9388f30-c28f-11e8-971a-f175bb468f02\",\"panelIndex\":\"7\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"embeddableConfig\":{},\"gridData\":{\"x\":24,\"y\":0,\"w\":24,\"h\":14,\"i\":\"8\"},\"id\":\"9b21de00-c28f-11e8-971a-f175bb468f02\",\"panelIndex\":\"8\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"embeddableConfig\":{},\"gridData\":{\"x\":24,\"y\":28,\"w\":24,\"h\":15,\"i\":\"9\"},\"id\":\"b7486640-c28e-11e8-971a-f175bb468f02\",\"panelIndex\":\"9\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"gridData\":{\"x\":24,\"y\":14,\"w\":24,\"h\":7,\"i\":\"10\"},\"version\":\"6.4.1\",\"panelIndex\":\"10\",\"type\":\"visualization\",\"id\":\"c60dcdc0-c291-11e8-971a-f175bb468f02\",\"embeddableConfig\":{}},{\"gridData\":{\"x\":24,\"y\":43,\"w\":24,\"h\":15,\"i\":\"11\"},\"version\":\"6.4.1\",\"panelIndex\":\"11\",\"type\":\"visualization\",\"id\":\"9775f9f0-c053-11e8-85cd-77149200218b\",\"embeddableConfig\":{}}]", +      "optionsJSON": "{\"darkTheme\":false,\"hidePanelTitles\":false,\"useMargins\":true}", +      "version": 1, +      "timeRestore": false, +      "kibanaSavedObjectMeta": { +        "searchSourceJSON": "{\"query\":{\"language\":\"lucene\",\"query\":\"\"},\"filter\":[]}" +      } +    } +  }, +  { +    "_id": "76cffdd0-c135-11e8-b25a-833364b3ed98", +    "_type": "index-pattern", +    "_source": { +      "title": "fatcat", +      "fields": "[{\"name\":\"_id\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"any_abstract\",\"type\":\"boolean\",\"count\":1,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"author\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"container_is_longtail_oa\",\"type\":\"boolean\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"container_is_oa\",\"type\":\"boolean\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"container_issnl\",\"type\":\"string\",\"count\":2,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"container_name\",\"type\":\"string\",\"count\":2,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"contrib_count\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"contrib_names\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"core_id\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"date\",\"type\":\"date\",\"count\":2,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"doi\",\"type\":\"string\",\"count\":2,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"file_count\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"file_in_ia\",\"type\":\"boolean\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"file_in_webarchive\",\"type\":\"boolean\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"file_pdf_url\",\"type\":\"string\",\"count\":1,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"ident\",\"type\":\"string\",\"count\":2,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"in_shadow\",\"type\":\"boolean\",\"count\":2,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"is_kept\",\"type\":\"boolean\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"isbn13\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"issn\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"journal\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"language\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"longtail\",\"type\":\"boolean\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"oa\",\"type\":\"boolean\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"pmcid\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"pmid\",\"type\":\"string\",\"count\":1,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"publisher\",\"type\":\"string\",\"count\":2,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"ref_count\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"release_date\",\"type\":\"date\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"release_status\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"release_type\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"revision\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"title\",\"type\":\"string\",\"count\":1,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"wikidata_qid\",\"type\":\"string\",\"count\":1,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]" +    } +  }, +  { +    "_id": "13b3e8f0-f40d-11e8-9c3f-6727f10acf9f", +    "_type": "search", +    "_source": { +      "title": "Example Wikicite Entities (bigger)", +      "description": "", +      "hits": 0, +      "columns": [ +        "file_pdf_url", +        "wikidata_qid", +        "title", +        "doi", +        "date", +        "publisher", +        "ident", +        "container_name" +      ], +      "sort": [ +        "_score", +        "desc" +      ], +      "version": 1, +      "kibanaSavedObjectMeta": { +        "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"highlightAll\":true,\"version\":true,\"query\":{\"language\":\"lucene\",\"query\":\"\"},\"filter\":[{\"$state\":{\"store\":\"appState\"},\"exists\":{\"field\":\"wikidata_qid\"},\"meta\":{\"alias\":null,\"disabled\":false,\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"key\":\"wikidata_qid\",\"negate\":false,\"type\":\"exists\",\"value\":\"exists\"}},{\"$state\":{\"store\":\"appState\"},\"meta\":{\"alias\":null,\"disabled\":false,\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"key\":\"file_in_webarchive\",\"negate\":false,\"params\":{\"query\":true,\"type\":\"phrase\"},\"type\":\"phrase\",\"value\":\"true\"},\"query\":{\"match\":{\"file_in_webarchive\":{\"query\":true,\"type\":\"phrase\"}}}},{\"$state\":{\"store\":\"appState\"},\"meta\":{\"alias\":null,\"disabled\":false,\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"key\":\"container_is_oa\",\"negate\":false,\"params\":{\"query\":false,\"type\":\"phrase\"},\"type\":\"phrase\",\"value\":\"false\"},\"query\":{\"match\":{\"container_is_oa\":{\"query\":false,\"type\":\"phrase\"}}}},{\"$state\":{\"store\":\"appState\"},\"exists\":{\"field\":\"pmcid\"},\"meta\":{\"alias\":null,\"disabled\":false,\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"key\":\"pmcid\",\"negate\":true,\"type\":\"exists\",\"value\":\"exists\"}},{\"$state\":{\"store\":\"appState\"},\"meta\":{\"alias\":null,\"disabled\":false,\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"key\":\"release_status\",\"negate\":false,\"params\":{\"query\":\"published\",\"type\":\"phrase\"},\"type\":\"phrase\",\"value\":\"published\"},\"query\":{\"match\":{\"release_status\":{\"query\":\"published\",\"type\":\"phrase\"}}}}]}" +      } +    } +  }, +  { +    "_id": "b39dd390-f40c-11e8-9c3f-6727f10acf9f", +    "_type": "search", +    "_source": { +      "title": "Example Wikicite Entities (dance)", +      "description": "", +      "hits": 0, +      "columns": [ +        "file_pdf_url", +        "wikidata_qid", +        "title", +        "container_name", +        "date", +        "doi", +        "ident", +        "publisher" +      ], +      "sort": [ +        "_score", +        "desc" +      ], +      "version": 1, +      "kibanaSavedObjectMeta": { +        "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"highlightAll\":true,\"version\":true,\"query\":{\"query\":\"dance\",\"language\":\"lucene\"},\"filter\":[{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"exists\",\"key\":\"wikidata_qid\",\"value\":\"exists\"},\"exists\":{\"field\":\"wikidata_qid\"},\"$state\":{\"store\":\"appState\"}},{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"file_in_webarchive\",\"value\":\"true\",\"params\":{\"query\":true,\"type\":\"phrase\"}},\"query\":{\"match\":{\"file_in_webarchive\":{\"query\":true,\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}},{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"container_is_oa\",\"value\":\"false\",\"params\":{\"query\":false,\"type\":\"phrase\"}},\"query\":{\"match\":{\"container_is_oa\":{\"query\":false,\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}},{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"negate\":true,\"disabled\":false,\"alias\":null,\"type\":\"exists\",\"key\":\"pmcid\",\"value\":\"exists\"},\"exists\":{\"field\":\"pmcid\"},\"$state\":{\"store\":\"appState\"}},{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"release_status\",\"value\":\"published\",\"params\":{\"query\":\"published\",\"type\":\"phrase\"}},\"query\":{\"match\":{\"release_status\":{\"query\":\"published\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" +      } +    } +  }, +  { +    "_id": "567f9f70-f422-11e8-9c3f-6727f10acf9f", +    "_type": "search", +    "_source": { +      "title": "Example Wikicite Entities", +      "description": "", +      "hits": 0, +      "columns": [ +        "file_pdf_url", +        "wikidata_qid", +        "title", +        "doi", +        "date", +        "publisher", +        "ident", +        "container_name" +      ], +      "sort": [ +        "_score", +        "desc" +      ], +      "version": 1, +      "kibanaSavedObjectMeta": { +        "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"highlightAll\":true,\"version\":true,\"query\":{\"language\":\"lucene\",\"query\":\"zika\"},\"filter\":[{\"$state\":{\"store\":\"appState\"},\"exists\":{\"field\":\"wikidata_qid\"},\"meta\":{\"alias\":null,\"disabled\":false,\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"key\":\"wikidata_qid\",\"negate\":false,\"type\":\"exists\",\"value\":\"exists\"}},{\"$state\":{\"store\":\"appState\"},\"meta\":{\"alias\":null,\"disabled\":false,\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"key\":\"file_in_webarchive\",\"negate\":false,\"params\":{\"query\":true,\"type\":\"phrase\"},\"type\":\"phrase\",\"value\":\"true\"},\"query\":{\"match\":{\"file_in_webarchive\":{\"query\":true,\"type\":\"phrase\"}}}},{\"$state\":{\"store\":\"appState\"},\"meta\":{\"alias\":null,\"disabled\":false,\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"key\":\"container_is_oa\",\"negate\":false,\"params\":{\"query\":false,\"type\":\"phrase\"},\"type\":\"phrase\",\"value\":\"false\"},\"query\":{\"match\":{\"container_is_oa\":{\"query\":false,\"type\":\"phrase\"}}}},{\"$state\":{\"store\":\"appState\"},\"exists\":{\"field\":\"pmcid\"},\"meta\":{\"alias\":null,\"disabled\":false,\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"key\":\"pmcid\",\"negate\":true,\"type\":\"exists\",\"value\":\"exists\"}},{\"$state\":{\"store\":\"appState\"},\"meta\":{\"alias\":null,\"disabled\":false,\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"key\":\"release_status\",\"negate\":false,\"params\":{\"query\":\"published\",\"type\":\"phrase\"},\"type\":\"phrase\",\"value\":\"published\"},\"query\":{\"match\":{\"release_status\":{\"query\":\"published\",\"type\":\"phrase\"}}}}]}" +      } +    } +  }, +  { +    "_id": "9775f9f0-c053-11e8-85cd-77149200218b", +    "_type": "visualization", +    "_source": { +      "title": "Metadata Ingest", +      "visState": "{\"title\":\"Metadata Ingest\",\"type\":\"gauge\",\"params\":{\"type\":\"gauge\",\"addTooltip\":true,\"addLegend\":true,\"isDisplayWarning\":false,\"gauge\":{\"verticalSplit\":false,\"extendRange\":true,\"percentageMode\":false,\"gaugeType\":\"Arc\",\"gaugeStyle\":\"Full\",\"backStyle\":\"Full\",\"orientation\":\"vertical\",\"colorSchema\":\"Green to Red\",\"gaugeColorMode\":\"Labels\",\"colorsRange\":[{\"from\":0,\"to\":60000000},{\"from\":60000000,\"to\":100000000},{\"from\":100000000,\"to\":110000000}],\"invertColors\":false,\"labels\":{\"show\":true,\"color\":\"black\"},\"scale\":{\"show\":true,\"labels\":false,\"color\":\"#333\"},\"type\":\"meter\",\"style\":{\"bgWidth\":0.9,\"width\":0.9,\"mask\":false,\"bgMask\":false,\"maskBars\":50,\"bgFill\":\"#eee\",\"bgColor\":false,\"subText\":\"of 110 million (estimate)\",\"fontSize\":60,\"labelColor\":true}}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"Total Metadata\"}}]}", +      "uiStateJSON": "{\"vis\":{\"defaultColors\":{\"0 - 60000000\":\"rgb(0,104,55)\",\"60000000 - 100000000\":\"rgb(255,255,190)\",\"100000000 - 110000000\":\"rgb(165,0,38)\"},\"colors\":{\"0 - 70000000\":\"#F9934E\",\"70000000 - 100000000\":\"#E5AC0E\",\"100000000 - 120000000\":\"#629E51\",\"0 - 60000000\":\"#EA6460\",\"60000000 - 100000000\":\"#5195CE\",\"100000000 - 110000000\":\"#629E51\"}}}", +      "description": "", +      "version": 1, +      "kibanaSavedObjectMeta": { +        "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[]}" +      } +    } +  }, +  { +    "_id": "be6e8f70-c055-11e8-85cd-77149200218b", +    "_type": "visualization", +    "_source": { +      "title": "Releases (articles, etc) with at least one identified file", +      "visState": "{\"title\":\"Releases (articles, etc) with at least one identified file\",\"type\":\"metric\",\"params\":{\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\",\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"None\",\"colorsRange\":[{\"from\":0,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\",\"fontSize\":60}}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}}]}", +      "uiStateJSON": "{}", +      "description": "", +      "version": 1, +      "kibanaSavedObjectMeta": { +        "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[{\"meta\":{\"index\":\"7e335490-c050-11e8-85cd-77149200218b\",\"type\":\"phrases\",\"key\":\"release_type\",\"value\":\"journal-article, proceedings-article, book\",\"params\":[\"journal-article\",\"proceedings-article\",\"book\"],\"negate\":false,\"disabled\":false,\"alias\":null},\"query\":{\"bool\":{\"should\":[{\"match_phrase\":{\"release_type\":\"journal-article\"}},{\"match_phrase\":{\"release_type\":\"proceedings-article\"}},{\"match_phrase\":{\"release_type\":\"book\"}}],\"minimum_should_match\":1}},\"$state\":{\"store\":\"appState\"}},{\"meta\":{\"index\":\"7e335490-c050-11e8-85cd-77149200218b\",\"negate\":true,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"file_count\",\"value\":0,\"params\":{\"query\":0,\"type\":\"phrase\"}},\"query\":{\"match\":{\"file_count\":{\"query\":0,\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}},{\"meta\":{\"index\":\"7e335490-c050-11e8-85cd-77149200218b\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"exists\",\"key\":\"file_count\",\"value\":\"exists\"},\"exists\":{\"field\":\"file_count\"},\"$state\":{\"store\":\"appState\"}}]}" +      } +    } +  }, +  { +    "_id": "373d7a80-c13a-11e8-b25a-833364b3ed98", +    "_type": "visualization", +    "_source": { +      "title": "IA files by OA status", +      "visState": "{\"title\":\"IA files by OA status\",\"type\":\"pie\",\"params\":{\"type\":\"pie\",\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"isDonut\":true,\"labels\":{\"show\":false,\"values\":true,\"last_level\":true,\"truncate\":100}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"oa\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"Open Access\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"file_in_ia\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"IA Has Copy\"}}]}", +      "uiStateJSON": "{\"vis\":{\"colors\":{\"false\":\"#3F2B5B\",\"true\":\"#629E51\"},\"legendOpen\":false}}", +      "description": "", +      "version": 1, +      "kibanaSavedObjectMeta": { +        "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"type\":\"phrases\",\"key\":\"release_type\",\"value\":\"journal-article, proceedings-article\",\"params\":[\"journal-article\",\"proceedings-article\"],\"negate\":false,\"disabled\":false,\"alias\":null},\"query\":{\"bool\":{\"should\":[{\"match_phrase\":{\"release_type\":\"journal-article\"}},{\"match_phrase\":{\"release_type\":\"proceedings-article\"}}],\"minimum_should_match\":1}},\"$state\":{\"store\":\"appState\"}}]}" +      } +    } +  }, +  { +    "_id": "b7486640-c28e-11e8-971a-f175bb468f02", +    "_type": "visualization", +    "_source": { +      "title": "Release Types (with/without fulltext)", +      "visState": "{\"title\":\"Release Types (with/without fulltext)\",\"type\":\"pie\",\"params\":{\"type\":\"pie\",\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"isDonut\":false,\"labels\":{\"show\":false,\"values\":true,\"last_level\":true,\"truncate\":100}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"Releases\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"split\",\"params\":{\"field\":\"file_in_ia\",\"size\":2,\"order\":\"desc\",\"orderBy\":\"1\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"Does IA Have Fulltext?\",\"row\":false}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"release_type\",\"size\":8,\"order\":\"desc\",\"orderBy\":\"1\",\"otherBucket\":true,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"Release Type\"}}]}", +      "uiStateJSON": "{}", +      "description": "", +      "version": 1, +      "kibanaSavedObjectMeta": { +        "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[]}" +      } +    } +  }, +  { +    "_id": "e3720880-c053-11e8-85cd-77149200218b", +    "_type": "visualization", +    "_source": { +      "title": "Does IA Have a Copy (by year)", +      "visState": "{\"title\":\"Does IA Have a Copy (by year)\",\"type\":\"area\",\"params\":{\"type\":\"area\",\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"area\",\"mode\":\"stacked\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"interpolate\":\"linear\",\"valueAxis\":\"ValueAxis-1\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"release_date\",\"interval\":\"y\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"file_in_ia\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\",\"otherBucket\":true,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\"}}]}", +      "uiStateJSON": "{}", +      "description": "", +      "version": 1, +      "kibanaSavedObjectMeta": { +        "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[]}" +      } +    } +  }, +  { +    "_id": "9b21de00-c28f-11e8-971a-f175bb468f02", +    "_type": "visualization", +    "_source": { +      "title": "Fulltext Release Progress", +      "visState": "{\"title\":\"Fulltext Release Progress\",\"type\":\"goal\",\"params\":{\"addTooltip\":true,\"addLegend\":false,\"isDisplayWarning\":false,\"type\":\"gauge\",\"gauge\":{\"verticalSplit\":false,\"autoExtend\":false,\"percentageMode\":false,\"gaugeType\":\"Arc\",\"gaugeStyle\":\"Full\",\"backStyle\":\"Full\",\"orientation\":\"vertical\",\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"gaugeColorMode\":\"None\",\"colorsRange\":[{\"from\":0,\"to\":5000000},{\"from\":5000000,\"to\":15000000},{\"from\":15000000,\"to\":30000000},{\"from\":30000000,\"to\":50000000}],\"invertColors\":false,\"labels\":{\"show\":true,\"color\":\"black\"},\"scale\":{\"show\":true,\"labels\":false,\"color\":\"#333\",\"width\":2},\"type\":\"meter\",\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"Goal is 50 Million\",\"fontSize\":60},\"extendRange\":true}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}}]}", +      "uiStateJSON": "{\"vis\":{\"defaultColors\":{\"0 - 5000000\":\"rgb(0,104,55)\",\"5000000 - 15000000\":\"rgb(183,224,117)\",\"15000000 - 30000000\":\"rgb(253,191,111)\",\"30000000 - 50000000\":\"rgb(165,0,38)\"},\"colors\":{\"0 - 5000000\":\"#BF1B00\",\"5000000 - 15000000\":\"#EF843C\",\"15000000 - 40000000\":\"#629E51\",\"15000000 - 30000000\":\"#9AC48A\",\"30000000 - 50000000\":\"#3F6833\"},\"legendOpen\":false}}", +      "description": "", +      "version": 1, +      "kibanaSavedObjectMeta": { +        "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"type\":\"phrases\",\"key\":\"release_type\",\"value\":\"journal-article, proceedings-article\",\"params\":[\"journal-article\",\"proceedings-article\"],\"negate\":false,\"disabled\":false,\"alias\":null},\"query\":{\"bool\":{\"should\":[{\"match_phrase\":{\"release_type\":\"journal-article\"}},{\"match_phrase\":{\"release_type\":\"proceedings-article\"}}],\"minimum_should_match\":1}},\"$state\":{\"store\":\"appState\"}},{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"file_in_ia\",\"value\":\"true\",\"params\":{\"query\":true,\"type\":\"phrase\"}},\"query\":{\"match\":{\"file_in_ia\":{\"query\":true,\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" +      } +    } +  }, +  { +    "_id": "acd4e760-c054-11e8-85cd-77149200218b", +    "_type": "visualization", +    "_source": { +      "title": "Archival Status", +      "visState": "{\"title\":\"Archival Status\",\"type\":\"pie\",\"params\":{\"type\":\"pie\",\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"isDonut\":true,\"labels\":{\"show\":false,\"values\":true,\"last_level\":true,\"truncate\":100}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"is_kept\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"Formally Archived\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"file_in_webarchive\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"In Web Archive\"}}]}", +      "uiStateJSON": "{\"vis\":{\"colors\":{\"true\":\"#629E51\",\"false\":\"#E24D42\"},\"legendOpen\":false}}", +      "description": "", +      "version": 1, +      "kibanaSavedObjectMeta": { +        "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"type\":\"phrases\",\"key\":\"release_type\",\"value\":\"journal-article, proceedings-article\",\"params\":[\"journal-article\",\"proceedings-article\"],\"negate\":false,\"disabled\":false,\"alias\":null},\"query\":{\"bool\":{\"should\":[{\"match_phrase\":{\"release_type\":\"journal-article\"}},{\"match_phrase\":{\"release_type\":\"proceedings-article\"}}],\"minimum_should_match\":1}},\"$state\":{\"store\":\"appState\"}},{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"_type\",\"value\":\"release\",\"params\":{\"query\":\"release\",\"type\":\"phrase\"}},\"query\":{\"match\":{\"_type\":{\"query\":\"release\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" +      } +    } +  }, +  { +    "_id": "c60dcdc0-c291-11e8-971a-f175bb468f02", +    "_type": "visualization", +    "_source": { +      "title": "Long-Tail Fulltext Count", +      "visState": "{\"title\":\"Long-Tail Fulltext Count\",\"type\":\"metric\",\"params\":{\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\",\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"None\",\"colorsRange\":[{\"from\":0,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\",\"fontSize\":60}}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"Long-Tail Releases With Fulltext\"}}]}", +      "uiStateJSON": "{}", +      "description": "", +      "version": 1, +      "kibanaSavedObjectMeta": { +        "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"longtail\",\"value\":\"true\",\"params\":{\"query\":true,\"type\":\"phrase\"}},\"query\":{\"match\":{\"longtail\":{\"query\":true,\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}},{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"file_in_ia\",\"value\":\"true\",\"params\":{\"query\":true,\"type\":\"phrase\"}},\"query\":{\"match\":{\"file_in_ia\":{\"query\":true,\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" +      } +    } +  }, +  { +    "_id": "e9388f30-c28f-11e8-971a-f175bb468f02", +    "_type": "visualization", +    "_source": { +      "title": "Release Count Total", +      "visState": "{\"title\":\"Release Count Total\",\"type\":\"metric\",\"params\":{\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\",\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"None\",\"colorsRange\":[{\"from\":0,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\",\"fontSize\":60}}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"Release Metadata Records (all types)\"}}]}", +      "uiStateJSON": "{}", +      "description": "", +      "version": 1, +      "kibanaSavedObjectMeta": { +        "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"_type\",\"value\":\"release\",\"params\":{\"query\":\"release\",\"type\":\"phrase\"}},\"query\":{\"match\":{\"_type\":{\"query\":\"release\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" +      } +    } +  }, +  { +    "_id": "bfe7e6b0-c6b0-11e8-971a-f175bb468f02", +    "_type": "visualization", +    "_source": { +      "title": "Coverage by ISSN", +      "visState": "{\"title\":\"Coverage by ISSN\",\"type\":\"pie\",\"params\":{\"type\":\"pie\",\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"isDonut\":true,\"labels\":{\"show\":false,\"values\":true,\"last_level\":true,\"truncate\":100}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"file_in_ia\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\"}}]}", +      "uiStateJSON": "{}", +      "description": "", +      "version": 1, +      "kibanaSavedObjectMeta": { +        "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"container_issnl\",\"value\":\"0002-8762\",\"params\":{\"query\":\"0002-8762\",\"type\":\"phrase\"}},\"query\":{\"match\":{\"container_issnl\":{\"query\":\"0002-8762\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" +      } +    } +  } +]
\ No newline at end of file diff --git a/extra/elasticsearch/stats.py b/extra/elasticsearch/stats.py new file mode 100644 index 00000000..b6d1f8a5 --- /dev/null +++ b/extra/elasticsearch/stats.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 + +import sys +import requests +import datetime +from fatcat_tools import public_api + +#api_host_url = "http://localhost:9411/v0" +api_host_url = "https://api.fatcat.wiki/v0" + +# NOTE: must not have trailing slash +elastic_host_url = "https://search.fatcat.wiki" + +api = public_api(api_host_url) + +def get_changelog_stats(): + +    stats = {} + +    # 1. latest changelog +    latest_changelog = api.get_changelog(limit=1)[0] +    stats['changelog'] = {"latest": { +        "index": latest_changelog.index, +        "timestamp": latest_changelog.timestamp.isoformat(), +    }} +    return stats + +def get_elastic_entity_stats(): +    """ +    TODO: files, filesets, webcaptures (no schema yet) + +    Returns dict: +        changelog: {latest: {index, datetime}} +        release: {total, refs_total} +        papers: {total, in_web, in_oa, in_kbart, in_web_not_kbart} +    """ + +    stats = {} + +    # 2. releases +    #  x=> total count +    #  x=> total citation records +    #  x=> total (paper, chapter, proceeding) +    #  x=> with fulltext on web +    #  x=> open access +    #  x=> not in KBART, in IA +    #  +    # Can probably do the above with two queries: +    #  - all releases, aggregate count and sum(ref_count) +    #  - in-scope works, aggregate count by (fulltext, OA, kbart/ia) + +    # 2a. release totals +    query = { +        "size": 0, +        "aggs": { +            "release_ref_count": { "sum": { "field": "ref_count" } } +        } +    } +    resp = requests.get( +        "{}/fatcat_release/_search".format(elastic_host_url), +        json=query, +        params=dict(request_cache="true")) +    # TODO: abort() +    resp.raise_for_status() +    resp = resp.json() +    stats['release'] = { +        "total": resp['hits']['total'], +        "refs_total": resp['aggregations']['release_ref_count'], +    } + +    # 2b. paper counts +    query = { +        "size": 0, +        "query": { +            "terms": { "release_type": [ +                # "chapter", "thesis", +                "article-journal", "paper-conference", +            ] } }, +        "aggs": { "paper_like": { "filters": { "filters": { +                "in_web": { "term": { "in_web": "true" } }, +                "is_oa": { "term": { "is_oa": "true" } }, +                "in_kbart": { "term": { "in_kbart": "true" } }, +                "in_web_not_kbart": { "bool": { "filter": [ +                        { "term": { "in_web": "true" } }, +                        { "term": { "in_kbart": "false" } } +                ]}} +        }}}} +    } +    resp = requests.get( +        "{}/fatcat_release/_search".format(elastic_host_url), +        json=query, +        params=dict(request_cache="true")) +    # TODO: abort() +    resp.raise_for_status() +    print(resp.json()) +    resp = resp.json() +    buckets = resp['aggregations']['paper_like']['buckets'] +    stats['papers'] = { +        'total': resp['hits']['total'], +        'in_web': buckets['in_web']['doc_count'], +        'is_oa': buckets['is_oa']['doc_count'], +        'in_kbart': buckets['in_kbart']['doc_count'], +        'in_web_not_kbart': buckets['in_web_not_kbart']['doc_count'], +    } + +    # 3. containers +    #   => total count +    query = { +        "size": 0, +    } +    resp = requests.get( +        "{}/fatcat_container/_search".format(elastic_host_url), +        json=query, +        params=dict(request_cache="true")) +    # TODO: abort() +    resp.raise_for_status() +    resp = resp.json() +    stats['container'] = { +        "total": resp['hits']['total'], +    } + +    return stats + +def print_stats(stats): +    latest_changelog = stats['changelog']['latest'] +    print("Latest changelog: {} ({})".format( +        latest_changelog['index'], +        latest_changelog['timestamp'])) +    print(stats) + +stats = {} +stats.update(get_changelog_stats()) +stats.update(get_elastic_entity_stats()) +print_stats(stats) + +def get_elastic_container_stats(issnl): +    """ +    TODO: container_id, not issnl + +    Returns dict: +        total +        in_web +        preserved +    """ + +    query = { +        "size": 0, +        "query": { +            "term": { "container_issnl": issnl } +        }, +        "aggs": { "container_stats": { "filters": { "filters": { +                "in_web": { "term": { "in_web": "true" } }, +                "is_preserved": { "term": { "is_preserved": "true" } }, +        }}}} +    } +    resp = requests.get( +        "{}/fatcat_release/_search".format(elastic_host_url), +        json=query, +        params=dict(request_cache="true")) +    # TODO: abort() +    print(resp.json()) +    resp.raise_for_status() +    resp = resp.json() +    buckets = resp['aggregations']['container_stats']['buckets'] +    stats = { +        'issnl': issnl, +        'total': resp['hits']['total'], +        'in_web': buckets['in_web']['doc_count'], +        'is_preserved': buckets['is_preserved']['doc_count'], +    } + +    return stats + +print(get_elastic_container_stats("0140-6736")) | 
