diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-04-14 22:33:34 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-04-14 22:33:34 +0200 |
commit | 5e79faaa4e741ca15f9f12306a81496aaccc6705 (patch) | |
tree | c76299624ce57a8171bac3a36650b25b3b20b16d | |
parent | b237584099e31dbf9e6998d90bf78a03b9c6093f (diff) | |
download | fuzzycat-5e79faaa4e741ca15f9f12306a81496aaccc6705.tar.gz fuzzycat-5e79faaa4e741ca15f9f12306a81496aaccc6705.zip |
tests: run es tests against public search endpoint
-rw-r--r-- | tests/test_matching.py | 39 |
1 files changed, 31 insertions, 8 deletions
diff --git a/tests/test_matching.py b/tests/test_matching.py index 7d8b7af..48f7b3a 100644 --- a/tests/test_matching.py +++ b/tests/test_matching.py @@ -1,4 +1,5 @@ from fuzzycat.matching import anything_to_entity, match_release_fuzzy +from fuzzycat.entities import entity_from_dict from fatcat_openapi_client import ReleaseEntity import pytest import elasticsearch @@ -17,14 +18,36 @@ def es_client(): return elasticsearch.Elasticsearch([FATCAT_SEARCH_URL]) -@pytest.mark.skip(reason="we cannot use POST on es, which client uses: https://git.io/JLssk") -def test_match_release_fuzzy(es_client): - cases = (("wtv64ahbdzgwnan7rllwr3nurm", 2), ) - for case, count in cases: - entity = anything_to_entity(case, ReleaseEntity) - logger.info(entity.title) +def test_match_release_fuzzy(es_client, caplog): + cases = ( + ("wtv64ahbdzgwnan7rllwr3nurm", 1), + ("eqcgtpav3na5jh56o5vjsvb4ei", 1), + ) + for i, (ident, count) in enumerate(cases): + entity = anything_to_entity(ident, ReleaseEntity) result = match_release_fuzzy(entity, es=es_client) - logger.info("given: {}".format(entity.title)) - logger.info("found: {}".format(len(result))) + logger.info("[{}] given {}, found {}".format(i, entity.title, len(result))) + assert len(result) == count + + # Partial data. + cases = ( + ({ + "title": "digital libraries", + "ext_ids": {} + }, 5), + ({ + "title": "The Future of Digital Scholarship", + "contribs": [{ + "raw_name": "Costantino Thanos" + }], + "ext_ids": {} + }, 5), + ) + for i, (doc, count) in enumerate(cases): + entity = entity_from_dict(doc, ReleaseEntity) + result = match_release_fuzzy(entity, es=es_client) + with caplog.at_level(logging.INFO): + logging.info("[{}] given {}, found {}, {}".format(i, entity.title, len(result), + [v.title for v in result])) assert len(result) == count |