aboutsummaryrefslogtreecommitdiffstats
path: root/fuzzycat
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-12-09 22:23:52 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-12-09 22:23:52 +0100
commit5c9210b99f8db9686e3f2cefe34df3d512f692de (patch)
tree15c6d5ae820adb766cd5688d4bb2d08231eaed8e /fuzzycat
parent0604a8e01f8e18ece68c903201ed79c1433d88d6 (diff)
downloadfuzzycat-5c9210b99f8db9686e3f2cefe34df3d512f692de.tar.gz
fuzzycat-5c9210b99f8db9686e3f2cefe34df3d512f692de.zip
add subdoc case
Diffstat (limited to 'fuzzycat')
-rw-r--r--fuzzycat/common.py1
-rw-r--r--fuzzycat/verify.py10
2 files changed, 10 insertions, 1 deletions
diff --git a/fuzzycat/common.py b/fuzzycat/common.py
index c50962b..2257e9d 100644
--- a/fuzzycat/common.py
+++ b/fuzzycat/common.py
@@ -17,6 +17,7 @@ class OK(str, Enum):
Reason for assuming we have a match.
"""
ARXIV_VERSION = 'ok.arxiv_version'
+ CUSTOM_BSI_SUBDOC = 'ok.custom_bsi_subdoc'
CUSTOM_BSI_UNDATED = 'ok.custom_bsi_undated'
CUSTOM_IEEE_ARXIV = 'ok.custom_ieee_arxiv'
DATACITE_RELATED_ID = 'ok.datacite_related_id'
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py
index 6fa4100..4daed97 100644
--- a/fuzzycat/verify.py
+++ b/fuzzycat/verify.py
@@ -76,7 +76,7 @@ from glom import PathAccessError, glom
from fuzzycat.common import OK, Miss, Status
from fuzzycat.utils import (author_similarity_score, contains_chemical_formula, has_doi_prefix,
- num_project, slugify_string, jaccard)
+ jaccard, num_project, slugify_string)
# The result of clustering are documents that have a key k and a list of values
# (of the cluster) v.
@@ -211,6 +211,14 @@ def compare(a, b):
if has_doi_prefix(a_doi, "10.3403") and has_doi_prefix(b_doi, "10.3403"):
if a_doi + "u" == b_doi or b_doi + "u" == a_doi:
return (Status.STRONG, OK.CUSTOM_BSI_UNDATED)
+ # Reference to subdocument.
+ # https://api.fatcat.wiki/v0/release/tcro5wr6brhqnf5wettyiauw34
+ # https://api.fatcat.wiki/v0/release/s7a4o5v5gfg4tbzna6poyg7nzy
+ if a_title == b_title and ((dict_key_exists(a, "extra.subtitle")
+ and not dict_key_exists(b, "extra.subtitle")) or
+ (dict_key_exists(b, "extra.subtitle")
+ and not dict_key_exists(a, "extra.subtitle"))):
+ return (Status.STRONG, OK.CUSTOM_BSI_SUBDOC)
except PathAccessError:
pass