diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-01-08 23:55:54 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-01-08 23:55:54 +0100 |
commit | 53c1170a0cb5f258c5c880ed3cca8fd5960bff9d (patch) | |
tree | bed917cde646a18b5342b0f5a76a810e794dd638 /fuzzycat/verify.py | |
parent | ef10ddaf597bc122da530b5e66a5cca9b7363346 (diff) | |
download | fuzzycat-53c1170a0cb5f258c5c880ed3cca8fd5960bff9d.tar.gz fuzzycat-53c1170a0cb5f258c5c880ed3cca8fd5960bff9d.zip |
case: translation in title
Diffstat (limited to 'fuzzycat/verify.py')
-rw-r--r-- | fuzzycat/verify.py | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py index ff4567b..a44154a 100644 --- a/fuzzycat/verify.py +++ b/fuzzycat/verify.py @@ -596,4 +596,14 @@ def verify(a: Dict, b: Dict, min_title_length=5) -> Tuple[str, str]: except (ValueError, PathAccessError): pass + # A variant of translated titles, e.g. https://fatcat.wiki/release/search?q=%22A+nova+classifica%C3%A7%C3%A3o+dos+tumores+da+mama+%22 + try: + a_container_id = glom(a, "container_id") + b_container_id = glom(b, "container_id") + if a_authors == b_authors and a_container_id == b_container_id and a_release_year == b_release_year and a_title != b_title and ( + a_title in b_title or b_title in a_title): + return Verify(Status.STRONG, Reason.TITLE_ARTIFACT) + except PathAccessError: + pass + return Verify(Status.AMBIGUOUS, Reason.UNKNOWN) |