From d275c5c28c69af37b1bdf0b502d5201a234f828a Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 7 May 2019 17:30:45 -0700 Subject: WIP: more/extra elasticsearch files --- extra/elasticsearch/elastic_queries.txt | 224 ++++++++++++++++++++++++++ extra/elasticsearch/kibana_dashboards.json | 247 +++++++++++++++++++++++++++++ extra/elasticsearch/stats.py | 174 ++++++++++++++++++++ 3 files changed, 645 insertions(+) create mode 100644 extra/elasticsearch/elastic_queries.txt create mode 100644 extra/elasticsearch/kibana_dashboards.json create mode 100644 extra/elasticsearch/stats.py diff --git a/extra/elasticsearch/elastic_queries.txt b/extra/elasticsearch/elastic_queries.txt new file mode 100644 index 00000000..91bff199 --- /dev/null +++ b/extra/elasticsearch/elastic_queries.txt @@ -0,0 +1,224 @@ + +GET /fatcat_release/_search?request_cache=true +{ + "size": 0, + "aggs": { + "popular_journals": { + "terms": { + "field": "container_issnl" + } + } + } +} + +GET /fatcat_release/_count +{ + "query": { + "term": {"container_issnl": "1932-6203"} + } +} + +"quick counts" +
GET /fatcat_container/_count + +GET /fatcat_release/_count + +GET /fatcat_release/_count +{ + "query": { + "terms": { + "release_type": ["article-journal", "chapter", "paper-conference", "thesis"] + } + } +} + +"in-scope works on web (fulltext)": +GET /fatcat_release/_count +{ + "query": { + "bool": { + "filter": [ + { + "terms": { + "release_type": [ + "article-journal", + "chapter", + "paper-conference", + "thesis" + ] + } + }, + { + "term": { + "in_web": "true" + } + } + ] + } + } +} + +"in-scope OA" +GET /fatcat_release/_count +{ + "query": { + "bool": { + "filter": [ + { + "terms": { + "release_type": [ + "article-journal", + "chapter", + "paper-conference", + "thesis" + ] + } + }, + { + "term": { + "is_oa": "true" + } + } + ] + } + } +} + +"not in KBART, in web" +GET /fatcat_release/_count +{ + "query": { + "bool": { + "filter": [ + { + "terms": { + "release_type": [ + "article-journal", + "chapter", + "paper-conference", + "thesis" + ] + } + }, + { + "term": { + "in_kbart": "false" + } + }, + { + "term": { + "in_web": "true" + } + } + ] + } + } +} + +GET /fatcat_release/_search?request_cache=true +{ + "size": 0, + "aggs": { + "release_ref_count": { + "sum": { + "field": "ref_count" + } + } + } +} + +##### + +GET /fatcat_release/_search?request_cache=true +{ + "size": 0, + "aggs": { + "release_ref_count": { + "sum": { + "field": "ref_count" + } + } + } +} + +GET /fatcat_release/_search?request_cache=true +{ + "size": 0, + "query": { + "terms": { + "release_type": [ + "article-journal", + "chapter", + "paper-conference", + "thesis" + ] + } + }, + "aggs": { + "paper_like": { + "filters": { + "filters": { + "in_web": { + "term": { + "in_web": "true" + } + }, + "is_oa": { + "term": { + "is_oa": "true" + } + }, + "in_kbart": { + "term": { + "in_kbart": "true" + } + }, + "in_web_not_kbart": { + "bool": { + "filter": [ + { + "term": { + "in_web": "true" + } + }, + { + "term": { + "in_kbart": "false" + } + } + ] + } + } + } + } + } + } +} + + +################# +"search inside a container": + +GET /fatcat_release/_search?request_cache=true +{ + "query": { + "bool": { + "must": { + "query_string": { + "query": "blood", + "default_operator": "AND", + "analyze_wildcard": "true", + "lenient": "true", + "fields": ["title^5", "contrib_names^2"] + } + }, + "filter": { + "term": { + "container_issnl": "1932-6203" + } + } + } + } +} + + diff --git a/extra/elasticsearch/kibana_dashboards.json b/extra/elasticsearch/kibana_dashboards.json new file mode 100644 index 00000000..654497e4 --- /dev/null +++ b/extra/elasticsearch/kibana_dashboards.json @@ -0,0 +1,247 @@ +[ + { + "_id": "cc1da1c0-c054-11e8-85cd-77149200218b", + "_type": "dashboard", + "_source": { + "title": "Generic Demo-y Dashboard", + "hits": 0, + "description": "Just for fun!", + "panelsJSON": "[{\"embeddableConfig\":{\"vis\":{\"colors\":{\"false\":\"#E24D42\",\"true\":\"#629E51\"},\"legendOpen\":false}},\"gridData\":{\"x\":0,\"y\":0,\"w\":24,\"h\":15,\"i\":\"3\"},\"id\":\"acd4e760-c054-11e8-85cd-77149200218b\",\"panelIndex\":\"3\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"embeddableConfig\":{},\"gridData\":{\"x\":0,\"y\":15,\"w\":24,\"h\":15,\"i\":\"6\"},\"id\":\"373d7a80-c13a-11e8-b25a-833364b3ed98\",\"panelIndex\":\"6\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"embeddableConfig\":{},\"gridData\":{\"x\":24,\"y\":21,\"w\":24,\"h\":7,\"i\":\"7\"},\"id\":\"e9388f30-c28f-11e8-971a-f175bb468f02\",\"panelIndex\":\"7\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"embeddableConfig\":{},\"gridData\":{\"x\":24,\"y\":0,\"w\":24,\"h\":14,\"i\":\"8\"},\"id\":\"9b21de00-c28f-11e8-971a-f175bb468f02\",\"panelIndex\":\"8\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"embeddableConfig\":{},\"gridData\":{\"x\":24,\"y\":28,\"w\":24,\"h\":15,\"i\":\"9\"},\"id\":\"b7486640-c28e-11e8-971a-f175bb468f02\",\"panelIndex\":\"9\",\"type\":\"visualization\",\"version\":\"6.4.1\"},{\"gridData\":{\"x\":24,\"y\":14,\"w\":24,\"h\":7,\"i\":\"10\"},\"version\":\"6.4.1\",\"panelIndex\":\"10\",\"type\":\"visualization\",\"id\":\"c60dcdc0-c291-11e8-971a-f175bb468f02\",\"embeddableConfig\":{}},{\"gridData\":{\"x\":24,\"y\":43,\"w\":24,\"h\":15,\"i\":\"11\"},\"version\":\"6.4.1\",\"panelIndex\":\"11\",\"type\":\"visualization\",\"id\":\"9775f9f0-c053-11e8-85cd-77149200218b\",\"embeddableConfig\":{}}]", + "optionsJSON": "{\"darkTheme\":false,\"hidePanelTitles\":false,\"useMargins\":true}", + "version": 1, + "timeRestore": false, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"query\":{\"language\":\"lucene\",\"query\":\"\"},\"filter\":[]}" + } + } + }, + { + "_id": "76cffdd0-c135-11e8-b25a-833364b3ed98", + "_type": "index-pattern", + "_source": { + "title": "fatcat", + "fields": "[{\"name\":\"_id\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_index\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"_score\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_source\",\"type\":\"_source\",\"count\":0,\"scripted\":false,\"searchable\":false,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"_type\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":false},{\"name\":\"any_abstract\",\"type\":\"boolean\",\"count\":1,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"author\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"container_is_longtail_oa\",\"type\":\"boolean\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"container_is_oa\",\"type\":\"boolean\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"container_issnl\",\"type\":\"string\",\"count\":2,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"container_name\",\"type\":\"string\",\"count\":2,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"contrib_count\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"contrib_names\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"core_id\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"date\",\"type\":\"date\",\"count\":2,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"doi\",\"type\":\"string\",\"count\":2,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"file_count\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"file_in_ia\",\"type\":\"boolean\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"file_in_webarchive\",\"type\":\"boolean\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"file_pdf_url\",\"type\":\"string\",\"count\":1,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"ident\",\"type\":\"string\",\"count\":2,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"in_shadow\",\"type\":\"boolean\",\"count\":2,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"is_kept\",\"type\":\"boolean\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"isbn13\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"issn\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"journal\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"language\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"longtail\",\"type\":\"boolean\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"oa\",\"type\":\"boolean\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"pmcid\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"pmid\",\"type\":\"string\",\"count\":1,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"publisher\",\"type\":\"string\",\"count\":2,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"ref_count\",\"type\":\"number\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"release_date\",\"type\":\"date\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"release_status\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"release_type\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"revision\",\"type\":\"string\",\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true},{\"name\":\"title\",\"type\":\"string\",\"count\":1,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false},{\"name\":\"wikidata_qid\",\"type\":\"string\",\"count\":1,\"scripted\":false,\"searchable\":true,\"aggregatable\":true,\"readFromDocValues\":true}]" + } + }, + { + "_id": "13b3e8f0-f40d-11e8-9c3f-6727f10acf9f", + "_type": "search", + "_source": { + "title": "Example Wikicite Entities (bigger)", + "description": "", + "hits": 0, + "columns": [ + "file_pdf_url", + "wikidata_qid", + "title", + "doi", + "date", + "publisher", + "ident", + "container_name" + ], + "sort": [ + "_score", + "desc" + ], + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"highlightAll\":true,\"version\":true,\"query\":{\"language\":\"lucene\",\"query\":\"\"},\"filter\":[{\"$state\":{\"store\":\"appState\"},\"exists\":{\"field\":\"wikidata_qid\"},\"meta\":{\"alias\":null,\"disabled\":false,\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"key\":\"wikidata_qid\",\"negate\":false,\"type\":\"exists\",\"value\":\"exists\"}},{\"$state\":{\"store\":\"appState\"},\"meta\":{\"alias\":null,\"disabled\":false,\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"key\":\"file_in_webarchive\",\"negate\":false,\"params\":{\"query\":true,\"type\":\"phrase\"},\"type\":\"phrase\",\"value\":\"true\"},\"query\":{\"match\":{\"file_in_webarchive\":{\"query\":true,\"type\":\"phrase\"}}}},{\"$state\":{\"store\":\"appState\"},\"meta\":{\"alias\":null,\"disabled\":false,\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"key\":\"container_is_oa\",\"negate\":false,\"params\":{\"query\":false,\"type\":\"phrase\"},\"type\":\"phrase\",\"value\":\"false\"},\"query\":{\"match\":{\"container_is_oa\":{\"query\":false,\"type\":\"phrase\"}}}},{\"$state\":{\"store\":\"appState\"},\"exists\":{\"field\":\"pmcid\"},\"meta\":{\"alias\":null,\"disabled\":false,\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"key\":\"pmcid\",\"negate\":true,\"type\":\"exists\",\"value\":\"exists\"}},{\"$state\":{\"store\":\"appState\"},\"meta\":{\"alias\":null,\"disabled\":false,\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"key\":\"release_status\",\"negate\":false,\"params\":{\"query\":\"published\",\"type\":\"phrase\"},\"type\":\"phrase\",\"value\":\"published\"},\"query\":{\"match\":{\"release_status\":{\"query\":\"published\",\"type\":\"phrase\"}}}}]}" + } + } + }, + { + "_id": "b39dd390-f40c-11e8-9c3f-6727f10acf9f", + "_type": "search", + "_source": { + "title": "Example Wikicite Entities (dance)", + "description": "", + "hits": 0, + "columns": [ + "file_pdf_url", + "wikidata_qid", + "title", + "container_name", + "date", + "doi", + "ident", + "publisher" + ], + "sort": [ + "_score", + "desc" + ], + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"highlightAll\":true,\"version\":true,\"query\":{\"query\":\"dance\",\"language\":\"lucene\"},\"filter\":[{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"exists\",\"key\":\"wikidata_qid\",\"value\":\"exists\"},\"exists\":{\"field\":\"wikidata_qid\"},\"$state\":{\"store\":\"appState\"}},{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"file_in_webarchive\",\"value\":\"true\",\"params\":{\"query\":true,\"type\":\"phrase\"}},\"query\":{\"match\":{\"file_in_webarchive\":{\"query\":true,\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}},{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"container_is_oa\",\"value\":\"false\",\"params\":{\"query\":false,\"type\":\"phrase\"}},\"query\":{\"match\":{\"container_is_oa\":{\"query\":false,\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}},{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"negate\":true,\"disabled\":false,\"alias\":null,\"type\":\"exists\",\"key\":\"pmcid\",\"value\":\"exists\"},\"exists\":{\"field\":\"pmcid\"},\"$state\":{\"store\":\"appState\"}},{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"release_status\",\"value\":\"published\",\"params\":{\"query\":\"published\",\"type\":\"phrase\"}},\"query\":{\"match\":{\"release_status\":{\"query\":\"published\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" + } + } + }, + { + "_id": "567f9f70-f422-11e8-9c3f-6727f10acf9f", + "_type": "search", + "_source": { + "title": "Example Wikicite Entities", + "description": "", + "hits": 0, + "columns": [ + "file_pdf_url", + "wikidata_qid", + "title", + "doi", + "date", + "publisher", + "ident", + "container_name" + ], + "sort": [ + "_score", + "desc" + ], + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"highlightAll\":true,\"version\":true,\"query\":{\"language\":\"lucene\",\"query\":\"zika\"},\"filter\":[{\"$state\":{\"store\":\"appState\"},\"exists\":{\"field\":\"wikidata_qid\"},\"meta\":{\"alias\":null,\"disabled\":false,\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"key\":\"wikidata_qid\",\"negate\":false,\"type\":\"exists\",\"value\":\"exists\"}},{\"$state\":{\"store\":\"appState\"},\"meta\":{\"alias\":null,\"disabled\":false,\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"key\":\"file_in_webarchive\",\"negate\":false,\"params\":{\"query\":true,\"type\":\"phrase\"},\"type\":\"phrase\",\"value\":\"true\"},\"query\":{\"match\":{\"file_in_webarchive\":{\"query\":true,\"type\":\"phrase\"}}}},{\"$state\":{\"store\":\"appState\"},\"meta\":{\"alias\":null,\"disabled\":false,\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"key\":\"container_is_oa\",\"negate\":false,\"params\":{\"query\":false,\"type\":\"phrase\"},\"type\":\"phrase\",\"value\":\"false\"},\"query\":{\"match\":{\"container_is_oa\":{\"query\":false,\"type\":\"phrase\"}}}},{\"$state\":{\"store\":\"appState\"},\"exists\":{\"field\":\"pmcid\"},\"meta\":{\"alias\":null,\"disabled\":false,\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"key\":\"pmcid\",\"negate\":true,\"type\":\"exists\",\"value\":\"exists\"}},{\"$state\":{\"store\":\"appState\"},\"meta\":{\"alias\":null,\"disabled\":false,\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"key\":\"release_status\",\"negate\":false,\"params\":{\"query\":\"published\",\"type\":\"phrase\"},\"type\":\"phrase\",\"value\":\"published\"},\"query\":{\"match\":{\"release_status\":{\"query\":\"published\",\"type\":\"phrase\"}}}}]}" + } + } + }, + { + "_id": "9775f9f0-c053-11e8-85cd-77149200218b", + "_type": "visualization", + "_source": { + "title": "Metadata Ingest", + "visState": "{\"title\":\"Metadata Ingest\",\"type\":\"gauge\",\"params\":{\"type\":\"gauge\",\"addTooltip\":true,\"addLegend\":true,\"isDisplayWarning\":false,\"gauge\":{\"verticalSplit\":false,\"extendRange\":true,\"percentageMode\":false,\"gaugeType\":\"Arc\",\"gaugeStyle\":\"Full\",\"backStyle\":\"Full\",\"orientation\":\"vertical\",\"colorSchema\":\"Green to Red\",\"gaugeColorMode\":\"Labels\",\"colorsRange\":[{\"from\":0,\"to\":60000000},{\"from\":60000000,\"to\":100000000},{\"from\":100000000,\"to\":110000000}],\"invertColors\":false,\"labels\":{\"show\":true,\"color\":\"black\"},\"scale\":{\"show\":true,\"labels\":false,\"color\":\"#333\"},\"type\":\"meter\",\"style\":{\"bgWidth\":0.9,\"width\":0.9,\"mask\":false,\"bgMask\":false,\"maskBars\":50,\"bgFill\":\"#eee\",\"bgColor\":false,\"subText\":\"of 110 million (estimate)\",\"fontSize\":60,\"labelColor\":true}}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"Total Metadata\"}}]}", + "uiStateJSON": "{\"vis\":{\"defaultColors\":{\"0 - 60000000\":\"rgb(0,104,55)\",\"60000000 - 100000000\":\"rgb(255,255,190)\",\"100000000 - 110000000\":\"rgb(165,0,38)\"},\"colors\":{\"0 - 70000000\":\"#F9934E\",\"70000000 - 100000000\":\"#E5AC0E\",\"100000000 - 120000000\":\"#629E51\",\"0 - 60000000\":\"#EA6460\",\"60000000 - 100000000\":\"#5195CE\",\"100000000 - 110000000\":\"#629E51\"}}}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[]}" + } + } + }, + { + "_id": "be6e8f70-c055-11e8-85cd-77149200218b", + "_type": "visualization", + "_source": { + "title": "Releases (articles, etc) with at least one identified file", + "visState": "{\"title\":\"Releases (articles, etc) with at least one identified file\",\"type\":\"metric\",\"params\":{\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\",\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"None\",\"colorsRange\":[{\"from\":0,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\",\"fontSize\":60}}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}}]}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[{\"meta\":{\"index\":\"7e335490-c050-11e8-85cd-77149200218b\",\"type\":\"phrases\",\"key\":\"release_type\",\"value\":\"journal-article, proceedings-article, book\",\"params\":[\"journal-article\",\"proceedings-article\",\"book\"],\"negate\":false,\"disabled\":false,\"alias\":null},\"query\":{\"bool\":{\"should\":[{\"match_phrase\":{\"release_type\":\"journal-article\"}},{\"match_phrase\":{\"release_type\":\"proceedings-article\"}},{\"match_phrase\":{\"release_type\":\"book\"}}],\"minimum_should_match\":1}},\"$state\":{\"store\":\"appState\"}},{\"meta\":{\"index\":\"7e335490-c050-11e8-85cd-77149200218b\",\"negate\":true,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"file_count\",\"value\":0,\"params\":{\"query\":0,\"type\":\"phrase\"}},\"query\":{\"match\":{\"file_count\":{\"query\":0,\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}},{\"meta\":{\"index\":\"7e335490-c050-11e8-85cd-77149200218b\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"exists\",\"key\":\"file_count\",\"value\":\"exists\"},\"exists\":{\"field\":\"file_count\"},\"$state\":{\"store\":\"appState\"}}]}" + } + } + }, + { + "_id": "373d7a80-c13a-11e8-b25a-833364b3ed98", + "_type": "visualization", + "_source": { + "title": "IA files by OA status", + "visState": "{\"title\":\"IA files by OA status\",\"type\":\"pie\",\"params\":{\"type\":\"pie\",\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"isDonut\":true,\"labels\":{\"show\":false,\"values\":true,\"last_level\":true,\"truncate\":100}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"oa\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"Open Access\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"file_in_ia\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"IA Has Copy\"}}]}", + "uiStateJSON": "{\"vis\":{\"colors\":{\"false\":\"#3F2B5B\",\"true\":\"#629E51\"},\"legendOpen\":false}}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"type\":\"phrases\",\"key\":\"release_type\",\"value\":\"journal-article, proceedings-article\",\"params\":[\"journal-article\",\"proceedings-article\"],\"negate\":false,\"disabled\":false,\"alias\":null},\"query\":{\"bool\":{\"should\":[{\"match_phrase\":{\"release_type\":\"journal-article\"}},{\"match_phrase\":{\"release_type\":\"proceedings-article\"}}],\"minimum_should_match\":1}},\"$state\":{\"store\":\"appState\"}}]}" + } + } + }, + { + "_id": "b7486640-c28e-11e8-971a-f175bb468f02", + "_type": "visualization", + "_source": { + "title": "Release Types (with/without fulltext)", + "visState": "{\"title\":\"Release Types (with/without fulltext)\",\"type\":\"pie\",\"params\":{\"type\":\"pie\",\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"isDonut\":false,\"labels\":{\"show\":false,\"values\":true,\"last_level\":true,\"truncate\":100}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"Releases\"}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"split\",\"params\":{\"field\":\"file_in_ia\",\"size\":2,\"order\":\"desc\",\"orderBy\":\"1\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"Does IA Have Fulltext?\",\"row\":false}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"release_type\",\"size\":8,\"order\":\"desc\",\"orderBy\":\"1\",\"otherBucket\":true,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"Release Type\"}}]}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[]}" + } + } + }, + { + "_id": "e3720880-c053-11e8-85cd-77149200218b", + "_type": "visualization", + "_source": { + "title": "Does IA Have a Copy (by year)", + "visState": "{\"title\":\"Does IA Have a Copy (by year)\",\"type\":\"area\",\"params\":{\"type\":\"area\",\"grid\":{\"categoryLines\":false,\"style\":{\"color\":\"#eee\"}},\"categoryAxes\":[{\"id\":\"CategoryAxis-1\",\"type\":\"category\",\"position\":\"bottom\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\"},\"labels\":{\"show\":true,\"truncate\":100},\"title\":{}}],\"valueAxes\":[{\"id\":\"ValueAxis-1\",\"name\":\"LeftAxis-1\",\"type\":\"value\",\"position\":\"left\",\"show\":true,\"style\":{},\"scale\":{\"type\":\"linear\",\"mode\":\"normal\"},\"labels\":{\"show\":true,\"rotate\":0,\"filter\":false,\"truncate\":100},\"title\":{\"text\":\"Count\"}}],\"seriesParams\":[{\"show\":\"true\",\"type\":\"area\",\"mode\":\"stacked\",\"data\":{\"label\":\"Count\",\"id\":\"1\"},\"drawLinesBetweenPoints\":true,\"showCircles\":true,\"interpolate\":\"linear\",\"valueAxis\":\"ValueAxis-1\"}],\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"times\":[],\"addTimeMarker\":false},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"release_date\",\"interval\":\"y\",\"min_doc_count\":1,\"extended_bounds\":{}}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"group\",\"params\":{\"field\":\"file_in_ia\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\",\"otherBucket\":true,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\"}}]}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[]}" + } + } + }, + { + "_id": "9b21de00-c28f-11e8-971a-f175bb468f02", + "_type": "visualization", + "_source": { + "title": "Fulltext Release Progress", + "visState": "{\"title\":\"Fulltext Release Progress\",\"type\":\"goal\",\"params\":{\"addTooltip\":true,\"addLegend\":false,\"isDisplayWarning\":false,\"type\":\"gauge\",\"gauge\":{\"verticalSplit\":false,\"autoExtend\":false,\"percentageMode\":false,\"gaugeType\":\"Arc\",\"gaugeStyle\":\"Full\",\"backStyle\":\"Full\",\"orientation\":\"vertical\",\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"gaugeColorMode\":\"None\",\"colorsRange\":[{\"from\":0,\"to\":5000000},{\"from\":5000000,\"to\":15000000},{\"from\":15000000,\"to\":30000000},{\"from\":30000000,\"to\":50000000}],\"invertColors\":false,\"labels\":{\"show\":true,\"color\":\"black\"},\"scale\":{\"show\":true,\"labels\":false,\"color\":\"#333\",\"width\":2},\"type\":\"meter\",\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"Goal is 50 Million\",\"fontSize\":60},\"extendRange\":true}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}}]}", + "uiStateJSON": "{\"vis\":{\"defaultColors\":{\"0 - 5000000\":\"rgb(0,104,55)\",\"5000000 - 15000000\":\"rgb(183,224,117)\",\"15000000 - 30000000\":\"rgb(253,191,111)\",\"30000000 - 50000000\":\"rgb(165,0,38)\"},\"colors\":{\"0 - 5000000\":\"#BF1B00\",\"5000000 - 15000000\":\"#EF843C\",\"15000000 - 40000000\":\"#629E51\",\"15000000 - 30000000\":\"#9AC48A\",\"30000000 - 50000000\":\"#3F6833\"},\"legendOpen\":false}}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"type\":\"phrases\",\"key\":\"release_type\",\"value\":\"journal-article, proceedings-article\",\"params\":[\"journal-article\",\"proceedings-article\"],\"negate\":false,\"disabled\":false,\"alias\":null},\"query\":{\"bool\":{\"should\":[{\"match_phrase\":{\"release_type\":\"journal-article\"}},{\"match_phrase\":{\"release_type\":\"proceedings-article\"}}],\"minimum_should_match\":1}},\"$state\":{\"store\":\"appState\"}},{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"file_in_ia\",\"value\":\"true\",\"params\":{\"query\":true,\"type\":\"phrase\"}},\"query\":{\"match\":{\"file_in_ia\":{\"query\":true,\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" + } + } + }, + { + "_id": "acd4e760-c054-11e8-85cd-77149200218b", + "_type": "visualization", + "_source": { + "title": "Archival Status", + "visState": "{\"title\":\"Archival Status\",\"type\":\"pie\",\"params\":{\"type\":\"pie\",\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"isDonut\":true,\"labels\":{\"show\":false,\"values\":true,\"last_level\":true,\"truncate\":100}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"is_kept\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"Formally Archived\"}},{\"id\":\"3\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"file_in_webarchive\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\",\"customLabel\":\"In Web Archive\"}}]}", + "uiStateJSON": "{\"vis\":{\"colors\":{\"true\":\"#629E51\",\"false\":\"#E24D42\"},\"legendOpen\":false}}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"type\":\"phrases\",\"key\":\"release_type\",\"value\":\"journal-article, proceedings-article\",\"params\":[\"journal-article\",\"proceedings-article\"],\"negate\":false,\"disabled\":false,\"alias\":null},\"query\":{\"bool\":{\"should\":[{\"match_phrase\":{\"release_type\":\"journal-article\"}},{\"match_phrase\":{\"release_type\":\"proceedings-article\"}}],\"minimum_should_match\":1}},\"$state\":{\"store\":\"appState\"}},{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"_type\",\"value\":\"release\",\"params\":{\"query\":\"release\",\"type\":\"phrase\"}},\"query\":{\"match\":{\"_type\":{\"query\":\"release\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" + } + } + }, + { + "_id": "c60dcdc0-c291-11e8-971a-f175bb468f02", + "_type": "visualization", + "_source": { + "title": "Long-Tail Fulltext Count", + "visState": "{\"title\":\"Long-Tail Fulltext Count\",\"type\":\"metric\",\"params\":{\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\",\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"None\",\"colorsRange\":[{\"from\":0,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\",\"fontSize\":60}}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"Long-Tail Releases With Fulltext\"}}]}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"longtail\",\"value\":\"true\",\"params\":{\"query\":true,\"type\":\"phrase\"}},\"query\":{\"match\":{\"longtail\":{\"query\":true,\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}},{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"file_in_ia\",\"value\":\"true\",\"params\":{\"query\":true,\"type\":\"phrase\"}},\"query\":{\"match\":{\"file_in_ia\":{\"query\":true,\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" + } + } + }, + { + "_id": "e9388f30-c28f-11e8-971a-f175bb468f02", + "_type": "visualization", + "_source": { + "title": "Release Count Total", + "visState": "{\"title\":\"Release Count Total\",\"type\":\"metric\",\"params\":{\"addTooltip\":true,\"addLegend\":false,\"type\":\"metric\",\"metric\":{\"percentageMode\":false,\"useRanges\":false,\"colorSchema\":\"Green to Red\",\"metricColorMode\":\"None\",\"colorsRange\":[{\"from\":0,\"to\":10000}],\"labels\":{\"show\":true},\"invertColors\":false,\"style\":{\"bgFill\":\"#000\",\"bgColor\":false,\"labelColor\":false,\"subText\":\"\",\"fontSize\":60}}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{\"customLabel\":\"Release Metadata Records (all types)\"}}]}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"_type\",\"value\":\"release\",\"params\":{\"query\":\"release\",\"type\":\"phrase\"}},\"query\":{\"match\":{\"_type\":{\"query\":\"release\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" + } + } + }, + { + "_id": "bfe7e6b0-c6b0-11e8-971a-f175bb468f02", + "_type": "visualization", + "_source": { + "title": "Coverage by ISSN", + "visState": "{\"title\":\"Coverage by ISSN\",\"type\":\"pie\",\"params\":{\"type\":\"pie\",\"addTooltip\":true,\"addLegend\":true,\"legendPosition\":\"right\",\"isDonut\":true,\"labels\":{\"show\":false,\"values\":true,\"last_level\":true,\"truncate\":100}},\"aggs\":[{\"id\":\"1\",\"enabled\":true,\"type\":\"count\",\"schema\":\"metric\",\"params\":{}},{\"id\":\"2\",\"enabled\":true,\"type\":\"terms\",\"schema\":\"segment\",\"params\":{\"field\":\"file_in_ia\",\"size\":5,\"order\":\"desc\",\"orderBy\":\"1\",\"otherBucket\":false,\"otherBucketLabel\":\"Other\",\"missingBucket\":false,\"missingBucketLabel\":\"Missing\"}}]}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"query\":{\"query\":\"\",\"language\":\"lucene\"},\"filter\":[{\"meta\":{\"index\":\"76cffdd0-c135-11e8-b25a-833364b3ed98\",\"negate\":false,\"disabled\":false,\"alias\":null,\"type\":\"phrase\",\"key\":\"container_issnl\",\"value\":\"0002-8762\",\"params\":{\"query\":\"0002-8762\",\"type\":\"phrase\"}},\"query\":{\"match\":{\"container_issnl\":{\"query\":\"0002-8762\",\"type\":\"phrase\"}}},\"$state\":{\"store\":\"appState\"}}]}" + } + } + } +] \ No newline at end of file diff --git a/extra/elasticsearch/stats.py b/extra/elasticsearch/stats.py new file mode 100644 index 00000000..b6d1f8a5 --- /dev/null +++ b/extra/elasticsearch/stats.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 + +import sys +import requests +import datetime +from fatcat_tools import public_api + +#api_host_url = "http://localhost:9411/v0" +api_host_url = "https://api.fatcat.wiki/v0" + +# NOTE: must not have trailing slash +elastic_host_url = "https://search.fatcat.wiki" + +api = public_api(api_host_url) + +def get_changelog_stats(): + + stats = {} + + # 1. latest changelog + latest_changelog = api.get_changelog(limit=1)[0] + stats['changelog'] = {"latest": { + "index": latest_changelog.index, + "timestamp": latest_changelog.timestamp.isoformat(), + }} + return stats + +def get_elastic_entity_stats(): + """ + TODO: files, filesets, webcaptures (no schema yet) + + Returns dict: + changelog: {latest: {index, datetime}} + release: {total, refs_total} + papers: {total, in_web, in_oa, in_kbart, in_web_not_kbart} + """ + + stats = {} + + # 2. releases + # x=> total count + # x=> total citation records + # x=> total (paper, chapter, proceeding) + # x=> with fulltext on web + # x=> open access + # x=> not in KBART, in IA + # + # Can probably do the above with two queries: + # - all releases, aggregate count and sum(ref_count) + # - in-scope works, aggregate count by (fulltext, OA, kbart/ia) + + # 2a. release totals + query = { + "size": 0, + "aggs": { + "release_ref_count": { "sum": { "field": "ref_count" } } + } + } + resp = requests.get( + "{}/fatcat_release/_search".format(elastic_host_url), + json=query, + params=dict(request_cache="true")) + # TODO: abort() + resp.raise_for_status() + resp = resp.json() + stats['release'] = { + "total": resp['hits']['total'], + "refs_total": resp['aggregations']['release_ref_count'], + } + + # 2b. paper counts + query = { + "size": 0, + "query": { + "terms": { "release_type": [ + # "chapter", "thesis", + "article-journal", "paper-conference", + ] } }, + "aggs": { "paper_like": { "filters": { "filters": { + "in_web": { "term": { "in_web": "true" } }, + "is_oa": { "term": { "is_oa": "true" } }, + "in_kbart": { "term": { "in_kbart": "true" } }, + "in_web_not_kbart": { "bool": { "filter": [ + { "term": { "in_web": "true" } }, + { "term": { "in_kbart": "false" } } + ]}} + }}}} + } + resp = requests.get( + "{}/fatcat_release/_search".format(elastic_host_url), + json=query, + params=dict(request_cache="true")) + # TODO: abort() + resp.raise_for_status() + print(resp.json()) + resp = resp.json() + buckets = resp['aggregations']['paper_like']['buckets'] + stats['papers'] = { + 'total': resp['hits']['total'], + 'in_web': buckets['in_web']['doc_count'], + 'is_oa': buckets['is_oa']['doc_count'], + 'in_kbart': buckets['in_kbart']['doc_count'], + 'in_web_not_kbart': buckets['in_web_not_kbart']['doc_count'], + } + + # 3. containers + # => total count + query = { + "size": 0, + } + resp = requests.get( + "{}/fatcat_container/_search".format(elastic_host_url), + json=query, + params=dict(request_cache="true")) + # TODO: abort() + resp.raise_for_status() + resp = resp.json() + stats['container'] = { + "total": resp['hits']['total'], + } + + return stats + +def print_stats(stats): + latest_changelog = stats['changelog']['latest'] + print("Latest changelog: {} ({})".format( + latest_changelog['index'], + latest_changelog['timestamp'])) + print(stats) + +stats = {} +stats.update(get_changelog_stats()) +stats.update(get_elastic_entity_stats()) +print_stats(stats) + +def get_elastic_container_stats(issnl): + """ + TODO: container_id, not issnl + + Returns dict: + total + in_web + preserved + """ + + query = { + "size": 0, + "query": { + "term": { "container_issnl": issnl } + }, + "aggs": { "container_stats": { "filters": { "filters": { + "in_web": { "term": { "in_web": "true" } }, + "is_preserved": { "term": { "is_preserved": "true" } }, + }}}} + } + resp = requests.get( + "{}/fatcat_release/_search".format(elastic_host_url), + json=query, + params=dict(request_cache="true")) + # TODO: abort() + print(resp.json()) + resp.raise_for_status() + resp = resp.json() + buckets = resp['aggregations']['container_stats']['buckets'] + stats = { + 'issnl': issnl, + 'total': resp['hits']['total'], + 'in_web': buckets['in_web']['doc_count'], + 'is_preserved': buckets['is_preserved']['doc_count'], + } + + return stats + +print(get_elastic_container_stats("0140-6736")) -- cgit v1.2.3