From e1030e29bbd192953ab742f593dd8da43a7af684 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 6 Apr 2021 15:56:22 -0700 Subject: change health check from .exists(index) to .mapping(index) In cases where the cluser leader node is unavilable, the health check was returning false even when the local node had full shard replicas and could return requests. A refinement of this change would be to use the //_count API endpoint to ensure that the "failed" and "skipped" shard numbers are 0 (aka, "successful == total"). However, not sure where that endpoint is exposed in the elasticsearch-py API. the CatClient method doesn't seem right. --- fatcat_scholar/search.py | 17 +++++++++++++---- fatcat_scholar/web.py | 4 ++-- 2 files changed, 15 insertions(+), 6 deletions(-) (limited to 'fatcat_scholar') diff --git a/fatcat_scholar/search.py b/fatcat_scholar/search.py index 2ae0ba9..e9f2bc8 100644 --- a/fatcat_scholar/search.py +++ b/fatcat_scholar/search.py @@ -417,16 +417,25 @@ def do_fulltext_search( ) -def es_scholar_index_exists() -> bool: +def es_scholar_index_alive() -> bool: """ - Checks if the configured back-end elasticsearch index exists. - Intended to be used in health checks. + Checks if the configured back-end elasticsearch index exists and can + service queries. Intended to be used in health checks. + + Note that the regular client.indices.exists(index) function call will + return an error if the cluster leader can not be reached, even if the local + node could service queries in a read-only manner. + + The client.indices.get_mapping(index) API, or the client.cat.count(index) + API, both return quickly and indicate that queries can be run against the + index. """ try: - resp = es_client.indices.exists(settings.ELASTICSEARCH_QUERY_FULLTEXT_INDEX) + resp = es_client.indices.get_mapping(settings.ELASTICSEARCH_QUERY_FULLTEXT_INDEX) except elasticsearch.exceptions.RequestError as e_raw: if e_raw.status_code == 404: return False else: raise e_raw return resp + diff --git a/fatcat_scholar/web.py b/fatcat_scholar/web.py index 6cda5b7..895af18 100644 --- a/fatcat_scholar/web.py +++ b/fatcat_scholar/web.py @@ -25,7 +25,7 @@ from fatcat_scholar.search import ( process_query, FulltextQuery, FulltextHits, - es_scholar_index_exists, + es_scholar_index_alive, ) from fatcat_scholar.schema import ScholarDoc @@ -118,7 +118,7 @@ def health_get() -> Any: """ Checks that connection back to elasticsearch index is working. """ - if not es_scholar_index_exists(): + if not es_scholar_index_alive(): raise HTTPException(status_code=503) return Response() -- cgit v1.2.3