diff options
Diffstat (limited to 'fuzzycat')
-rw-r--r-- | fuzzycat/common.py | 1 | ||||
-rw-r--r-- | fuzzycat/verify.py | 10 |
2 files changed, 10 insertions, 1 deletions
diff --git a/fuzzycat/common.py b/fuzzycat/common.py index c50962b..2257e9d 100644 --- a/fuzzycat/common.py +++ b/fuzzycat/common.py @@ -17,6 +17,7 @@ class OK(str, Enum): Reason for assuming we have a match. """ ARXIV_VERSION = 'ok.arxiv_version' + CUSTOM_BSI_SUBDOC = 'ok.custom_bsi_subdoc' CUSTOM_BSI_UNDATED = 'ok.custom_bsi_undated' CUSTOM_IEEE_ARXIV = 'ok.custom_ieee_arxiv' DATACITE_RELATED_ID = 'ok.datacite_related_id' diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py index 6fa4100..4daed97 100644 --- a/fuzzycat/verify.py +++ b/fuzzycat/verify.py @@ -76,7 +76,7 @@ from glom import PathAccessError, glom from fuzzycat.common import OK, Miss, Status from fuzzycat.utils import (author_similarity_score, contains_chemical_formula, has_doi_prefix, - num_project, slugify_string, jaccard) + jaccard, num_project, slugify_string) # The result of clustering are documents that have a key k and a list of values # (of the cluster) v. @@ -211,6 +211,14 @@ def compare(a, b): if has_doi_prefix(a_doi, "10.3403") and has_doi_prefix(b_doi, "10.3403"): if a_doi + "u" == b_doi or b_doi + "u" == a_doi: return (Status.STRONG, OK.CUSTOM_BSI_UNDATED) + # Reference to subdocument. + # https://api.fatcat.wiki/v0/release/tcro5wr6brhqnf5wettyiauw34 + # https://api.fatcat.wiki/v0/release/s7a4o5v5gfg4tbzna6poyg7nzy + if a_title == b_title and ((dict_key_exists(a, "extra.subtitle") + and not dict_key_exists(b, "extra.subtitle")) or + (dict_key_exists(b, "extra.subtitle") + and not dict_key_exists(a, "extra.subtitle"))): + return (Status.STRONG, OK.CUSTOM_BSI_SUBDOC) except PathAccessError: pass |