diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-08-17 18:07:48 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-08-17 18:07:48 +0200 |
commit | 3e18c3afc037d2ca59339e10aadc3ba508c7db49 (patch) | |
tree | 8f50ea9633974095bb97e671e87a92bb8bd0030d | |
parent | 9fc8976c274a33f316a2152209135b464802b2ad (diff) | |
download | fuzzycat-3e18c3afc037d2ca59339e10aadc3ba508c7db49.tar.gz fuzzycat-3e18c3afc037d2ca59339e10aadc3ba508c7db49.zip |
matching: verify release match stub
-rw-r--r-- | fuzzycat/fatcat/matching.py | 26 |
1 files changed, 24 insertions, 2 deletions
diff --git a/fuzzycat/fatcat/matching.py b/fuzzycat/fatcat/matching.py index ba9b8a8..9b46935 100644 --- a/fuzzycat/fatcat/matching.py +++ b/fuzzycat/fatcat/matching.py @@ -22,7 +22,7 @@ from fatcat_openapi_client import (ApiException, ContainerEntity, DefaultApi, Re ReleaseExtIds, WorkEntity) from fatcat_openapi_client.api.default_api import DefaultApi -from fuzzycat.fatcat.common import MatchStatus, response_to_entity_list +from fuzzycat.fatcat.common import MatchStatus, response_to_entity_list, compare_ext_ids from fuzzycat.serials import serialsdb from fuzzycat import cleanups @@ -246,4 +246,26 @@ def verify_container_match(a: ContainerEntity, b: ContainerEntity) -> MatchStatu def verify_release_match(a: ReleaseEntity, b: ReleaseEntity) -> MatchStatus: - pass + assert isinstance(a, ReleaseEntity) + assert isinstance(b, ReleaseEntity) + + if a == b: + return MatchStatus.EXACT + + a_ext_ids, b_ext_ids = a.ext_ids, b.ext_ids + # Compare ext ids, result is a counter, we are interested in "hits" and + # "misses", only. + cmp_result = compare_ext_ids(a_ext_ids, b_ext_ids) + + # Assume that if more ids match than mismatch, it is a good signal, e.g. if + # only a DOI is defined and they match, it is an exact match. + if cmp_result["hits"] > 0 and cmp_result["misses"] == 0: + return MatchStatus.EXACT + if cmp_result["hits"] > cmp_result["misses"]: + return MatchStatus.STRONG + if cmp_result["hits"] == 0 and cmp_result["misses"] > 0: + return MatchStatus.DIFFERENT + if cmp_result["hits"] < cmp_result["misses"]: + return MatchStatus.AMBIGIOUS + + # TODO: do title verification, apply string cleanups, etc.
\ No newline at end of file |