aboutsummaryrefslogtreecommitdiffstats
path: root/fuzzycat
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-12-09 01:11:19 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-12-09 01:11:19 +0100
commit26c71fe52d4cabc3cca8b29f6fa3d99c66e5f912 (patch)
treeb8952a9234e6396a140b2d2a63b703ef9a911526 /fuzzycat
parentda27c5b093af923b0cb462903b273af798f39403 (diff)
downloadfuzzycat-26c71fe52d4cabc3cca8b29f6fa3d99c66e5f912.tar.gz
fuzzycat-26c71fe52d4cabc3cca8b29f6fa3d99c66e5f912.zip
another case
Diffstat (limited to 'fuzzycat')
-rw-r--r--fuzzycat/common.py1
-rw-r--r--fuzzycat/verify.py18
2 files changed, 19 insertions, 0 deletions
diff --git a/fuzzycat/common.py b/fuzzycat/common.py
index f03b7cc..d87bef4 100644
--- a/fuzzycat/common.py
+++ b/fuzzycat/common.py
@@ -24,6 +24,7 @@ class OK(str, Enum):
DOI = 'ok.doi'
DUMMY = 'ok.dummy'
FIGSHARE_VERSION = 'ok.figshare_version'
+ PMID_DOI_PAIR = 'ok.pmid_doi_pair'
PREPRINT_PUBLISHED = 'ok.preprint_published'
SLUG_TITLE_AUTHOR_MATCH = 'ok.slug_title_author_match'
TITLE_AUTHOR_MATCH = 'ok.title_author_match'
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py
index a223e48..e945a15 100644
--- a/fuzzycat/verify.py
+++ b/fuzzycat/verify.py
@@ -135,6 +135,18 @@ class GroupVerifier:
self.counter["total"] = sum(v for _, v in self.counter.items())
+def dict_key_exists(doc, path):
+ """
+ Return true, if a value at a given path exists. XXX: probably in glom, too.
+ """
+ try:
+ _ = glom(doc, path)
+ except PathAccessError:
+ return False
+ else:
+ return True
+
+
def compare(a, b):
"""
Compare two entities, return match status and reason.
@@ -419,6 +431,12 @@ def compare(a, b):
if len(a_slug_authors & b_slug_authors) > 0:
return (Status.STRONG, OK.SLUG_TITLE_AUTHOR_MATCH)
+ if any([a_authors, b_authors]) and not (a_authors and b_authors):
+ if a_release_year == b_release_year and a_title_lower == b_title_lower:
+ if ((dict_key_exists(a, "ext_ids.pmid") and not dict_key_exists(a, "ext_ids.doi")) or
+ (dict_key_exists(b, "ext_ids.pmid") and not dict_key_exists(b, "ext_ids.doi"))):
+ return (Status.STRONG, OK.PMID_DOI_PAIR)
+
if a_authors and len(a_slug_authors & b_slug_authors) == 0:
# Before we bail out, run an authors similarity check. TODO: This is
# not the right place, but lives here now, since these cases popped up