aboutsummaryrefslogtreecommitdiffstats
path: root/fuzzycat/verify.py
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-12-02 18:59:59 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-12-02 18:59:59 +0100
commit03f65cfcb88451fecb106c2c750643931b9ecc77 (patch)
tree5922ce17de6c79c6e4f46de85acbfedbb4bc23df /fuzzycat/verify.py
parent50bf407e8f7e484eaef4a02dc44e59b3a8ceeef8 (diff)
downloadfuzzycat-03f65cfcb88451fecb106c2c750643931b9ecc77.tar.gz
fuzzycat-03f65cfcb88451fecb106c2c750643931b9ecc77.zip
add iop case
Diffstat (limited to 'fuzzycat/verify.py')
-rw-r--r--fuzzycat/verify.py14
1 files changed, 12 insertions, 2 deletions
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py
index f44d9db..993b7c9 100644
--- a/fuzzycat/verify.py
+++ b/fuzzycat/verify.py
@@ -159,12 +159,10 @@ def compare(a, b):
if fragment in a_title_lower:
return (Status.AMBIGUOUS, Miss.BLACKLISTED_FRAGMENT)
-
# https://fatcat.wiki/release/rnso2swxzvfonemgzrth3arumi,
# https://fatcat.wiki/release/caxa7qbfqvg3bkgz4nwvapgnvi
if "subject index" in a_title_lower and "subject index" in b_title_lower:
try:
- print(a, b)
if glom(a, "container_id") != glom(b, "container_id"):
return (Status.DIFFERENT, Miss.CONTAINER)
except PathAccessError:
@@ -198,6 +196,16 @@ def compare(a, b):
except PathAccessError:
pass
+ try:
+ a_doi = glom(a, "ext_ids.doi")
+ b_doi = glom(b, "ext_ids.doi")
+ if has_doi_prefix(a_doi, "10.1149") and has_doi_prefix(b_doi, "10.1149"):
+ if (a_doi.startswith("10.1149/ma") and not b_doi.startswith("10.1149/ma")
+ or b_doi.startswith("10.1149/ma") and not a_doi.startswith("10.1149/ma")):
+ return (Status.DIFFERENT, Miss.CUSTOM_IOP_MA_PATTERN)
+ except PathAccessError:
+ pass
+
if "Zweckverband Volkshochschule " in a_title and a_title != b_title:
return (Status.DIFFERENT, Miss.CUSTOM_VHS)
@@ -426,6 +434,8 @@ TITLE_FRAGMENT_BLACKLIST = set([
"nouvelles du corps médical",
"student government minutes:",
"untersuchung einzelner abdominaler regionen und organe",
+ "annual general meeting",
+ "records of meetings",
])
CONTAINER_NAME_BLACKLIST = set([