diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-12-03 21:58:11 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-12-03 21:58:11 +0100 |
commit | f32c435c207f439abb66de5dbb6a1b67a75d0405 (patch) | |
tree | 8604255831074571437f7c346cf2f17c39458992 /fuzzycat | |
parent | 560091e02fd90a98cb79c808d8bb2000c60b9e67 (diff) | |
download | fuzzycat-f32c435c207f439abb66de5dbb6a1b67a75d0405.tar.gz fuzzycat-f32c435c207f439abb66de5dbb6a1b67a75d0405.zip |
add case
Diffstat (limited to 'fuzzycat')
-rw-r--r-- | fuzzycat/common.py | 1 | ||||
-rw-r--r-- | fuzzycat/verify.py | 12 |
2 files changed, 13 insertions, 0 deletions
diff --git a/fuzzycat/common.py b/fuzzycat/common.py index 60f42ab..07f62df 100644 --- a/fuzzycat/common.py +++ b/fuzzycat/common.py @@ -45,6 +45,7 @@ class Miss(str, Enum): CONTRIB_INTERSECTION_EMPTY = 'miss.contrib_intersection_empty' CUSTOM_IOP_MA_PATTERN = 'miss.custom_iop_ma_pattern' CUSTOM_PREFIX_10_14288 = 'miss.custom_prefix_10_14288' + CUSTOM_PREFIX_10_7916 = 'miss.custom_prefix_10_7916' CUSTOM_VHS = 'miss.vhs' # https://fatcat.wiki/release/44gk5ben5vghljq6twm7lwmxla DATASET_DOI = 'miss.dataset_doi' NUM_DIFF = 'miss.num_diff' diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py index 993b7c9..6d824d4 100644 --- a/fuzzycat/verify.py +++ b/fuzzycat/verify.py @@ -212,6 +212,7 @@ def compare(a, b): if re.match(r"appendix ?[^ ]*$", a_title_lower): return (Status.AMBIGUOUS, Miss.APPENDIX) + try: # TODO: figshare versions, "xxx.v1" FIGSHARE_PREFIX = "10.6084/" @@ -332,6 +333,17 @@ def compare(a, b): if a_slug_title == b_slug_title: try: + # https://dlc.library.columbia.edu/lcaaj/cul:p5hqbzkhxb, + # https://dlc.library.columbia.edu/lcaaj/cul:5tb2rbp0nj + a_doi = glom(a, "ext_ids.doi") + b_doi = glom(b, "ext_ids.doi") + if has_doi_prefix(a_doi, "10.7916") and has_doi_prefix(b_doi, "10.7916"): + return (Status.AMBIGUOUS, Miss.CUSTOM_PREFIX_10_7916) + except PathAccessError: + pass + + if a_slug_title == b_slug_title: + try: a_subtitles = glom(a, "extra.subtitle") or [] b_subtitles = glom(b, "extra.subtitle") or [] for a_sub in a_subtitles: |