diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-12-09 22:23:52 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-12-09 22:23:52 +0100 |
commit | 5c9210b99f8db9686e3f2cefe34df3d512f692de (patch) | |
tree | 15c6d5ae820adb766cd5688d4bb2d08231eaed8e /fuzzycat/verify.py | |
parent | 0604a8e01f8e18ece68c903201ed79c1433d88d6 (diff) | |
download | fuzzycat-5c9210b99f8db9686e3f2cefe34df3d512f692de.tar.gz fuzzycat-5c9210b99f8db9686e3f2cefe34df3d512f692de.zip |
add subdoc case
Diffstat (limited to 'fuzzycat/verify.py')
-rw-r--r-- | fuzzycat/verify.py | 10 |
1 files changed, 9 insertions, 1 deletions
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py index 6fa4100..4daed97 100644 --- a/fuzzycat/verify.py +++ b/fuzzycat/verify.py @@ -76,7 +76,7 @@ from glom import PathAccessError, glom from fuzzycat.common import OK, Miss, Status from fuzzycat.utils import (author_similarity_score, contains_chemical_formula, has_doi_prefix, - num_project, slugify_string, jaccard) + jaccard, num_project, slugify_string) # The result of clustering are documents that have a key k and a list of values # (of the cluster) v. @@ -211,6 +211,14 @@ def compare(a, b): if has_doi_prefix(a_doi, "10.3403") and has_doi_prefix(b_doi, "10.3403"): if a_doi + "u" == b_doi or b_doi + "u" == a_doi: return (Status.STRONG, OK.CUSTOM_BSI_UNDATED) + # Reference to subdocument. + # https://api.fatcat.wiki/v0/release/tcro5wr6brhqnf5wettyiauw34 + # https://api.fatcat.wiki/v0/release/s7a4o5v5gfg4tbzna6poyg7nzy + if a_title == b_title and ((dict_key_exists(a, "extra.subtitle") + and not dict_key_exists(b, "extra.subtitle")) or + (dict_key_exists(b, "extra.subtitle") + and not dict_key_exists(a, "extra.subtitle"))): + return (Status.STRONG, OK.CUSTOM_BSI_SUBDOC) except PathAccessError: pass |