aboutsummaryrefslogtreecommitdiffstats
path: root/fuzzycat/verify.py
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-12-10 03:13:45 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-12-10 03:13:45 +0100
commit1cc2de7f7349b08d4e807cf5c022ab92c410fe2d (patch)
tree0aba48f77d8f8b35b214f74d94ce9d4daddc2416 /fuzzycat/verify.py
parent7ab933683f05a8de8ec416d520690d86b9a46a16 (diff)
downloadfuzzycat-1cc2de7f7349b08d4e807cf5c022ab92c410fe2d.tar.gz
fuzzycat-1cc2de7f7349b08d4e807cf5c022ab92c410fe2d.zip
add cases
Diffstat (limited to 'fuzzycat/verify.py')
-rw-r--r--fuzzycat/verify.py22
1 files changed, 22 insertions, 0 deletions
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py
index 88e83d5..76571da 100644
--- a/fuzzycat/verify.py
+++ b/fuzzycat/verify.py
@@ -322,6 +322,8 @@ def compare(a, b):
])
if len(types & ignore_release_types) == 0:
return (Status.DIFFERENT, Miss.RELEASE_TYPE)
+ if "dataset" in types and ("article" in types or "article-journal" in types):
+ return (Status.DIFFERENT, Miss.RELEASE_TYPE)
except PathAccessError:
pass
@@ -543,4 +545,24 @@ def compare(a, b):
except PathAccessError:
pass
+ # If pages exists, but differ too much, bail out.
+ # https://fatcat.wiki/release/tm3gaiumkvb3xc7t3i6suna6u4
+ # https://fatcat.wiki/release/r6dj63wh3zcrrolisn6xuacnve
+ try:
+ a_pages = glom(a, "pages")
+ b_pages = glom(b, "pages")
+ page_pattern = re.compile("([0-9]{1,})-([0-9]{1,})")
+ a_match = page_pattern.match(a_pages)
+ b_match = page_pattern.match(b_pages)
+ if a_match and b_match:
+ a_start, a_end = a_match.groups()
+ b_start, b_end = b_match.groups()
+ a_num_pages = int(a_end) - int(a_start)
+ b_num_pages = int(b_end) - int(b_start)
+ if a_num_pages >= 0 and b_num_pages >= 0:
+ if abs(a_num_pages - b_num_pages) > 5:
+ return (Status.DIFFERENT, Miss.PAGE_COUNT)
+ except PathAccessError:
+ pass
+
return (Status.AMBIGUOUS, OK.DUMMY)