diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-12-09 01:11:19 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-12-09 01:11:19 +0100 |
commit | 26c71fe52d4cabc3cca8b29f6fa3d99c66e5f912 (patch) | |
tree | b8952a9234e6396a140b2d2a63b703ef9a911526 /fuzzycat | |
parent | da27c5b093af923b0cb462903b273af798f39403 (diff) | |
download | fuzzycat-26c71fe52d4cabc3cca8b29f6fa3d99c66e5f912.tar.gz fuzzycat-26c71fe52d4cabc3cca8b29f6fa3d99c66e5f912.zip |
another case
Diffstat (limited to 'fuzzycat')
-rw-r--r-- | fuzzycat/common.py | 1 | ||||
-rw-r--r-- | fuzzycat/verify.py | 18 |
2 files changed, 19 insertions, 0 deletions
diff --git a/fuzzycat/common.py b/fuzzycat/common.py index f03b7cc..d87bef4 100644 --- a/fuzzycat/common.py +++ b/fuzzycat/common.py @@ -24,6 +24,7 @@ class OK(str, Enum): DOI = 'ok.doi' DUMMY = 'ok.dummy' FIGSHARE_VERSION = 'ok.figshare_version' + PMID_DOI_PAIR = 'ok.pmid_doi_pair' PREPRINT_PUBLISHED = 'ok.preprint_published' SLUG_TITLE_AUTHOR_MATCH = 'ok.slug_title_author_match' TITLE_AUTHOR_MATCH = 'ok.title_author_match' diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py index a223e48..e945a15 100644 --- a/fuzzycat/verify.py +++ b/fuzzycat/verify.py @@ -135,6 +135,18 @@ class GroupVerifier: self.counter["total"] = sum(v for _, v in self.counter.items()) +def dict_key_exists(doc, path): + """ + Return true, if a value at a given path exists. XXX: probably in glom, too. + """ + try: + _ = glom(doc, path) + except PathAccessError: + return False + else: + return True + + def compare(a, b): """ Compare two entities, return match status and reason. @@ -419,6 +431,12 @@ def compare(a, b): if len(a_slug_authors & b_slug_authors) > 0: return (Status.STRONG, OK.SLUG_TITLE_AUTHOR_MATCH) + if any([a_authors, b_authors]) and not (a_authors and b_authors): + if a_release_year == b_release_year and a_title_lower == b_title_lower: + if ((dict_key_exists(a, "ext_ids.pmid") and not dict_key_exists(a, "ext_ids.doi")) or + (dict_key_exists(b, "ext_ids.pmid") and not dict_key_exists(b, "ext_ids.doi"))): + return (Status.STRONG, OK.PMID_DOI_PAIR) + if a_authors and len(a_slug_authors & b_slug_authors) == 0: # Before we bail out, run an authors similarity check. TODO: This is # not the right place, but lives here now, since these cases popped up |