diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-12-10 03:13:45 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-12-10 03:13:45 +0100 |
commit | 1cc2de7f7349b08d4e807cf5c022ab92c410fe2d (patch) | |
tree | 0aba48f77d8f8b35b214f74d94ce9d4daddc2416 /fuzzycat/verify.py | |
parent | 7ab933683f05a8de8ec416d520690d86b9a46a16 (diff) | |
download | fuzzycat-1cc2de7f7349b08d4e807cf5c022ab92c410fe2d.tar.gz fuzzycat-1cc2de7f7349b08d4e807cf5c022ab92c410fe2d.zip |
add cases
Diffstat (limited to 'fuzzycat/verify.py')
-rw-r--r-- | fuzzycat/verify.py | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py index 88e83d5..76571da 100644 --- a/fuzzycat/verify.py +++ b/fuzzycat/verify.py @@ -322,6 +322,8 @@ def compare(a, b): ]) if len(types & ignore_release_types) == 0: return (Status.DIFFERENT, Miss.RELEASE_TYPE) + if "dataset" in types and ("article" in types or "article-journal" in types): + return (Status.DIFFERENT, Miss.RELEASE_TYPE) except PathAccessError: pass @@ -543,4 +545,24 @@ def compare(a, b): except PathAccessError: pass + # If pages exists, but differ too much, bail out. + # https://fatcat.wiki/release/tm3gaiumkvb3xc7t3i6suna6u4 + # https://fatcat.wiki/release/r6dj63wh3zcrrolisn6xuacnve + try: + a_pages = glom(a, "pages") + b_pages = glom(b, "pages") + page_pattern = re.compile("([0-9]{1,})-([0-9]{1,})") + a_match = page_pattern.match(a_pages) + b_match = page_pattern.match(b_pages) + if a_match and b_match: + a_start, a_end = a_match.groups() + b_start, b_end = b_match.groups() + a_num_pages = int(a_end) - int(a_start) + b_num_pages = int(b_end) - int(b_start) + if a_num_pages >= 0 and b_num_pages >= 0: + if abs(a_num_pages - b_num_pages) > 5: + return (Status.DIFFERENT, Miss.PAGE_COUNT) + except PathAccessError: + pass + return (Status.AMBIGUOUS, OK.DUMMY) |