diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-12-02 02:23:40 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-12-02 02:23:40 +0100 |
commit | 50bf407e8f7e484eaef4a02dc44e59b3a8ceeef8 (patch) | |
tree | 5cda74051221e36c081f8d2cd050231f5b3059b5 /fuzzycat | |
parent | ba68fa4d91fbcd1dda3363b78bc24ca64ca2546b (diff) | |
download | fuzzycat-50bf407e8f7e484eaef4a02dc44e59b3a8ceeef8.tar.gz fuzzycat-50bf407e8f7e484eaef4a02dc44e59b3a8ceeef8.zip |
add cases
Diffstat (limited to 'fuzzycat')
-rw-r--r-- | fuzzycat/common.py | 1 | ||||
-rw-r--r-- | fuzzycat/verify.py | 12 |
2 files changed, 12 insertions, 1 deletions
diff --git a/fuzzycat/common.py b/fuzzycat/common.py index 5cef684..8ebc43e 100644 --- a/fuzzycat/common.py +++ b/fuzzycat/common.py @@ -41,6 +41,7 @@ class Miss(str, Enum): BOOK_CHAPTER = 'miss.book_chapter' CHEM_FORMULA = 'miss.chem_formula' COMPONENT = 'miss.component' + CONTAINER = 'miss.container' CONTRIB_INTERSECTION_EMPTY = 'miss.contrib_intersection_empty' CUSTOM_VHS = 'miss.vhs' # https://fatcat.wiki/release/44gk5ben5vghljq6twm7lwmxla DATASET_DOI = 'miss.dataset_doi' diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py index 5977f8e..f44d9db 100644 --- a/fuzzycat/verify.py +++ b/fuzzycat/verify.py @@ -159,6 +159,17 @@ def compare(a, b): if fragment in a_title_lower: return (Status.AMBIGUOUS, Miss.BLACKLISTED_FRAGMENT) + + # https://fatcat.wiki/release/rnso2swxzvfonemgzrth3arumi, + # https://fatcat.wiki/release/caxa7qbfqvg3bkgz4nwvapgnvi + if "subject index" in a_title_lower and "subject index" in b_title_lower: + try: + print(a, b) + if glom(a, "container_id") != glom(b, "container_id"): + return (Status.DIFFERENT, Miss.CONTAINER) + except PathAccessError: + pass + try: if a_title and a_title == b_title and glom(a, "extra.datacite.metadataVersion") != glom( b, "extra.datacite.metadataVersion"): @@ -296,7 +307,6 @@ def compare(a, b): # https://fatcat.wiki/release/tur236mqljdfdnlzbbnks2sily def ieee_arxiv_pair_check(a, b): try: - print(a_slug_title, glom(a, "ext_ids.doi")) if (glom(a, "ext_ids.doi").split("/")[0] == "10.1109" and glom(b, "ext_ids.arxiv") != ""): return (Status.STRONG, OK.CUSTOM_IEEE_ARXIV) |