aboutsummaryrefslogtreecommitdiffstats
path: root/fuzzycat
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-12-03 21:58:11 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-12-03 21:58:11 +0100
commitf32c435c207f439abb66de5dbb6a1b67a75d0405 (patch)
tree8604255831074571437f7c346cf2f17c39458992 /fuzzycat
parent560091e02fd90a98cb79c808d8bb2000c60b9e67 (diff)
downloadfuzzycat-f32c435c207f439abb66de5dbb6a1b67a75d0405.tar.gz
fuzzycat-f32c435c207f439abb66de5dbb6a1b67a75d0405.zip
add case
Diffstat (limited to 'fuzzycat')
-rw-r--r--fuzzycat/common.py1
-rw-r--r--fuzzycat/verify.py12
2 files changed, 13 insertions, 0 deletions
diff --git a/fuzzycat/common.py b/fuzzycat/common.py
index 60f42ab..07f62df 100644
--- a/fuzzycat/common.py
+++ b/fuzzycat/common.py
@@ -45,6 +45,7 @@ class Miss(str, Enum):
CONTRIB_INTERSECTION_EMPTY = 'miss.contrib_intersection_empty'
CUSTOM_IOP_MA_PATTERN = 'miss.custom_iop_ma_pattern'
CUSTOM_PREFIX_10_14288 = 'miss.custom_prefix_10_14288'
+ CUSTOM_PREFIX_10_7916 = 'miss.custom_prefix_10_7916'
CUSTOM_VHS = 'miss.vhs' # https://fatcat.wiki/release/44gk5ben5vghljq6twm7lwmxla
DATASET_DOI = 'miss.dataset_doi'
NUM_DIFF = 'miss.num_diff'
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py
index 993b7c9..6d824d4 100644
--- a/fuzzycat/verify.py
+++ b/fuzzycat/verify.py
@@ -212,6 +212,7 @@ def compare(a, b):
if re.match(r"appendix ?[^ ]*$", a_title_lower):
return (Status.AMBIGUOUS, Miss.APPENDIX)
+
try:
# TODO: figshare versions, "xxx.v1"
FIGSHARE_PREFIX = "10.6084/"
@@ -332,6 +333,17 @@ def compare(a, b):
if a_slug_title == b_slug_title:
try:
+ # https://dlc.library.columbia.edu/lcaaj/cul:p5hqbzkhxb,
+ # https://dlc.library.columbia.edu/lcaaj/cul:5tb2rbp0nj
+ a_doi = glom(a, "ext_ids.doi")
+ b_doi = glom(b, "ext_ids.doi")
+ if has_doi_prefix(a_doi, "10.7916") and has_doi_prefix(b_doi, "10.7916"):
+ return (Status.AMBIGUOUS, Miss.CUSTOM_PREFIX_10_7916)
+ except PathAccessError:
+ pass
+
+ if a_slug_title == b_slug_title:
+ try:
a_subtitles = glom(a, "extra.subtitle") or []
b_subtitles = glom(b, "extra.subtitle") or []
for a_sub in a_subtitles: