diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-12-10 01:02:41 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-12-10 01:02:41 +0100 |
commit | 7ab933683f05a8de8ec416d520690d86b9a46a16 (patch) | |
tree | 8f8fb8b47ddd1cfb356ca72949e65a6394160ee4 /fuzzycat | |
parent | 4a74839914527d191cc7610657c1c25e92a6e19e (diff) | |
download | fuzzycat-7ab933683f05a8de8ec416d520690d86b9a46a16.tar.gz fuzzycat-7ab933683f05a8de8ec416d520690d86b9a46a16.zip |
add versioned doi pattern
Diffstat (limited to 'fuzzycat')
-rw-r--r-- | fuzzycat/common.py | 1 | ||||
-rw-r--r-- | fuzzycat/verify.py | 13 |
2 files changed, 13 insertions, 1 deletions
diff --git a/fuzzycat/common.py b/fuzzycat/common.py index b31a233..5bf033c 100644 --- a/fuzzycat/common.py +++ b/fuzzycat/common.py @@ -31,6 +31,7 @@ class OK(str, Enum): SLUG_TITLE_AUTHOR_MATCH = 'ok.slug_title_author_match' TITLE_AUTHOR_MATCH = 'ok.title_author_match' TOKENIZED_AUTHORS = 'ok.tokenized_authors' + VERSIONED_DOI = 'ok.versioned_doi' WORK_ID = 'ok.work_id' diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py index bf59ed8..88e83d5 100644 --- a/fuzzycat/verify.py +++ b/fuzzycat/verify.py @@ -252,6 +252,17 @@ def compare(a, b): except PathAccessError: pass + try: + # https://fatcat.wiki/release/cwqujxztefdghhssb7ysxj7b5m + # https://fatcat.wiki/release/hwnqyz7n65eabhlivvkipkytji + a_doi = glom(a, "ext_ids.doi") + b_doi = glom(b, "ext_ids.doi") + versioned_doi_pattern = '10[.].*/v[0-9]{1,}$' + if re.match(versioned_doi_pattern, a_doi) and re.match(versioned_doi_pattern, b_doi): + return (Status.STRONG, OK.VERSIONED_DOI) + except PathAccessError: + pass + # TODO: datacite specific vocabulary # extra.datacite.relations[].{relationType=IsNewerVersionOf,relatedIdentifier=10...} # beware: we have versions and "isPartOf", e.g. https://api.fatcat.wiki/v0/release/ybxygpeypbaq5pfrztu3z2itw4 @@ -335,7 +346,7 @@ def compare(a, b): pass try: - if glom(a, "extra.crossref.type") == "component" and glom(b, "extra.crossref.type"): + if glom(a, "release_type") == "component" and glom(b, "release_type") == "component": a_doi = glom(a, "ext_ids.doi") b_doi = glom(b, "ext_ids.doi") if a_doi != b_doi: |