From 5e79faaa4e741ca15f9f12306a81496aaccc6705 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Wed, 14 Apr 2021 22:33:34 +0200 Subject: tests: run es tests against public search endpoint --- tests/test_matching.py | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) (limited to 'tests') diff --git a/tests/test_matching.py b/tests/test_matching.py index 7d8b7af..48f7b3a 100644 --- a/tests/test_matching.py +++ b/tests/test_matching.py @@ -1,4 +1,5 @@ from fuzzycat.matching import anything_to_entity, match_release_fuzzy +from fuzzycat.entities import entity_from_dict from fatcat_openapi_client import ReleaseEntity import pytest import elasticsearch @@ -17,14 +18,36 @@ def es_client(): return elasticsearch.Elasticsearch([FATCAT_SEARCH_URL]) -@pytest.mark.skip(reason="we cannot use POST on es, which client uses: https://git.io/JLssk") -def test_match_release_fuzzy(es_client): - cases = (("wtv64ahbdzgwnan7rllwr3nurm", 2), ) - for case, count in cases: - entity = anything_to_entity(case, ReleaseEntity) - logger.info(entity.title) +def test_match_release_fuzzy(es_client, caplog): + cases = ( + ("wtv64ahbdzgwnan7rllwr3nurm", 1), + ("eqcgtpav3na5jh56o5vjsvb4ei", 1), + ) + for i, (ident, count) in enumerate(cases): + entity = anything_to_entity(ident, ReleaseEntity) result = match_release_fuzzy(entity, es=es_client) - logger.info("given: {}".format(entity.title)) - logger.info("found: {}".format(len(result))) + logger.info("[{}] given {}, found {}".format(i, entity.title, len(result))) + assert len(result) == count + + # Partial data. + cases = ( + ({ + "title": "digital libraries", + "ext_ids": {} + }, 5), + ({ + "title": "The Future of Digital Scholarship", + "contribs": [{ + "raw_name": "Costantino Thanos" + }], + "ext_ids": {} + }, 5), + ) + for i, (doc, count) in enumerate(cases): + entity = entity_from_dict(doc, ReleaseEntity) + result = match_release_fuzzy(entity, es=es_client) + with caplog.at_level(logging.INFO): + logging.info("[{}] given {}, found {}, {}".format(i, entity.title, len(result), + [v.title for v in result])) assert len(result) == count -- cgit v1.2.3