aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-11-17 14:29:31 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-11-17 14:29:31 +0100
commit5a34689c5ab6647f7fa01bdbffba7ae7173738a7 (patch)
tree72392480c53dfbeea507d1a50f877357820b52a1
parent1ea154018e45cb982d4c614d4df7fa0bd1245e3c (diff)
downloadfuzzycat-5a34689c5ab6647f7fa01bdbffba7ae7173738a7.tar.gz
fuzzycat-5a34689c5ab6647f7fa01bdbffba7ae7173738a7.zip
fix subtitle check
-rw-r--r--fuzzycat/verify.py13
1 files changed, 11 insertions, 2 deletions
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py
index e1ff4d8..2194cc8 100644
--- a/fuzzycat/verify.py
+++ b/fuzzycat/verify.py
@@ -1574,6 +1574,7 @@ class Miss(str, Enum):
RELEASE_TYPE = 'miss.release_type'
CHEM_FORMULA = 'miss.chem_formula'
SUBTITLE = 'miss.subtitle'
+ BOOK_CHAPTER = 'miss.book_chapter'
class GroupVerifier:
@@ -1651,6 +1652,12 @@ def compare(a, b):
and a.get("ext_ids", {}).get("doi") != b.get("ext_ids", {}).get("doi")):
return (Status.DIFFERENT, Miss.DATASET_DOI)
+ if (a.get("release_type") == "chapter" and b.get("release_type") == "chapter"
+ and a.get("extra", {}).get("container_name")
+ and b.get("extra", {}).get("container_name") and
+ a.get("extra", {}).get("container_name") != b.get("extra", {}).get("container_name")):
+ return (Status.DIFFERENT, Miss.BOOK_CHAPTER)
+
arxiv_id_a = a.get("ext_ids", {}).get("arxiv")
arxiv_id_b = b.get("ext_ids", {}).get("arxiv")
@@ -1677,8 +1684,10 @@ def compare(a, b):
b_slug_title = slugify_string(b.get("title", "")).replace("\n", " ")
if a_slug_title == b_slug_title:
- for a_sub in a.get("subtitle", []):
- for b_sub in a.get("subtitle", []):
+ a_subtitles = a.get("extra", {}).get("subtitle", []) or []
+ b_subtitles = b.get("extra", {}).get("subtitle", []) or []
+ for a_sub in a_subtitles:
+ for b_sub in b_subtitles:
if slugify_string(a_sub) != slugify_string(b_sub):
return (Status.DIFFERENT, Miss.SUBTITLE)