diff options
| author | Martin Czygan <martin.czygan@gmail.com> | 2020-08-17 18:07:48 +0200 | 
|---|---|---|
| committer | Martin Czygan <martin.czygan@gmail.com> | 2020-08-17 18:07:48 +0200 | 
| commit | 3e18c3afc037d2ca59339e10aadc3ba508c7db49 (patch) | |
| tree | 8f50ea9633974095bb97e671e87a92bb8bd0030d | |
| parent | 9fc8976c274a33f316a2152209135b464802b2ad (diff) | |
| download | fuzzycat-3e18c3afc037d2ca59339e10aadc3ba508c7db49.tar.gz fuzzycat-3e18c3afc037d2ca59339e10aadc3ba508c7db49.zip | |
matching: verify release match stub
| -rw-r--r-- | fuzzycat/fatcat/matching.py | 26 | 
1 files changed, 24 insertions, 2 deletions
| diff --git a/fuzzycat/fatcat/matching.py b/fuzzycat/fatcat/matching.py index ba9b8a8..9b46935 100644 --- a/fuzzycat/fatcat/matching.py +++ b/fuzzycat/fatcat/matching.py @@ -22,7 +22,7 @@ from fatcat_openapi_client import (ApiException, ContainerEntity, DefaultApi, Re                                     ReleaseExtIds, WorkEntity)  from fatcat_openapi_client.api.default_api import DefaultApi -from fuzzycat.fatcat.common import MatchStatus, response_to_entity_list +from fuzzycat.fatcat.common import MatchStatus, response_to_entity_list, compare_ext_ids  from fuzzycat.serials import serialsdb  from fuzzycat import cleanups @@ -246,4 +246,26 @@ def verify_container_match(a: ContainerEntity, b: ContainerEntity) -> MatchStatu  def verify_release_match(a: ReleaseEntity, b: ReleaseEntity) -> MatchStatus: -    pass +    assert isinstance(a, ReleaseEntity) +    assert isinstance(b, ReleaseEntity) + +    if a == b: +        return MatchStatus.EXACT + +    a_ext_ids, b_ext_ids = a.ext_ids, b.ext_ids +    # Compare ext ids, result is a counter, we are interested in "hits" and +    # "misses", only. +    cmp_result = compare_ext_ids(a_ext_ids, b_ext_ids) + +    # Assume that if more ids match than mismatch, it is a good signal, e.g. if +    # only a DOI is defined and they match, it is an exact match. +    if cmp_result["hits"] > 0 and cmp_result["misses"] == 0: +        return MatchStatus.EXACT +    if cmp_result["hits"] > cmp_result["misses"]: +        return MatchStatus.STRONG +    if cmp_result["hits"] == 0 and cmp_result["misses"] > 0: +        return MatchStatus.DIFFERENT +    if cmp_result["hits"] < cmp_result["misses"]: +        return MatchStatus.AMBIGIOUS +     +    # TODO: do title verification, apply string cleanups, etc.
\ No newline at end of file | 
