From 7ab933683f05a8de8ec416d520690d86b9a46a16 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Thu, 10 Dec 2020 01:02:41 +0100 Subject: add versioned doi pattern --- fuzzycat/common.py | 1 + fuzzycat/verify.py | 13 ++++- notes/2020_11_testruns.md | 9 ++++ tests/data/release/cd5aik2whrd5jlvleyvdq6iwja | 20 ++++++++ tests/data/release/cwqujxztefdghhssb7ysxj7b5m | 70 +++++++++++++++++++++++++++ tests/data/release/hwnqyz7n65eabhlivvkipkytji | 31 ++++++++++++ tests/data/release/kfttghqcsbddvofqd7l4bhtavy | 20 ++++++++ tests/data/verify.csv | 2 + 8 files changed, 165 insertions(+), 1 deletion(-) create mode 100644 tests/data/release/cd5aik2whrd5jlvleyvdq6iwja create mode 100644 tests/data/release/cwqujxztefdghhssb7ysxj7b5m create mode 100644 tests/data/release/hwnqyz7n65eabhlivvkipkytji create mode 100644 tests/data/release/kfttghqcsbddvofqd7l4bhtavy diff --git a/fuzzycat/common.py b/fuzzycat/common.py index b31a233..5bf033c 100644 --- a/fuzzycat/common.py +++ b/fuzzycat/common.py @@ -31,6 +31,7 @@ class OK(str, Enum): SLUG_TITLE_AUTHOR_MATCH = 'ok.slug_title_author_match' TITLE_AUTHOR_MATCH = 'ok.title_author_match' TOKENIZED_AUTHORS = 'ok.tokenized_authors' + VERSIONED_DOI = 'ok.versioned_doi' WORK_ID = 'ok.work_id' diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py index bf59ed8..88e83d5 100644 --- a/fuzzycat/verify.py +++ b/fuzzycat/verify.py @@ -252,6 +252,17 @@ def compare(a, b): except PathAccessError: pass + try: + # https://fatcat.wiki/release/cwqujxztefdghhssb7ysxj7b5m + # https://fatcat.wiki/release/hwnqyz7n65eabhlivvkipkytji + a_doi = glom(a, "ext_ids.doi") + b_doi = glom(b, "ext_ids.doi") + versioned_doi_pattern = '10[.].*/v[0-9]{1,}$' + if re.match(versioned_doi_pattern, a_doi) and re.match(versioned_doi_pattern, b_doi): + return (Status.STRONG, OK.VERSIONED_DOI) + except PathAccessError: + pass + # TODO: datacite specific vocabulary # extra.datacite.relations[].{relationType=IsNewerVersionOf,relatedIdentifier=10...} # beware: we have versions and "isPartOf", e.g. https://api.fatcat.wiki/v0/release/ybxygpeypbaq5pfrztu3z2itw4 @@ -335,7 +346,7 @@ def compare(a, b): pass try: - if glom(a, "extra.crossref.type") == "component" and glom(b, "extra.crossref.type"): + if glom(a, "release_type") == "component" and glom(b, "release_type") == "component": a_doi = glom(a, "ext_ids.doi") b_doi = glom(b, "ext_ids.doi") if a_doi != b_doi: diff --git a/notes/2020_11_testruns.md b/notes/2020_11_testruns.md index 11655bc..cfa6c6f 100644 --- a/notes/2020_11_testruns.md +++ b/notes/2020_11_testruns.md @@ -214,10 +214,19 @@ Component. * [ ] https://fatcat.wiki/release/5rcu6myqx5ezjjytzpvsauyut4 https://fatcat.wiki/release/zvsffdeufjb5dbchww7ydqdq3a Status.AMBIGUOUS OK.DUMMY +> pmid * [ ] https://fatcat.wiki/release/f5ebjc63j5dzpct5hsme5j3ote https://fatcat.wiki/release/zeoquc2f4nbmdbmbcbkmkxmtzi Status.AMBIGUOUS OK.DUMMY + +Hard to say (but seem to be a rerun of an article in a "similar" journal). + * [ ] https://fatcat.wiki/release/cd5aik2whrd5jlvleyvdq6iwja https://fatcat.wiki/release/kfttghqcsbddvofqd7l4bhtavy Status.AMBIGUOUS OK.DUMMY + +Ok. + * [ ] https://fatcat.wiki/release/hwnqyz7n65eabhlivvkipkytji https://fatcat.wiki/release/cwqujxztefdghhssb7ysxj7b5m Status.AMBIGUOUS OK.DUMMY + + * [ ] https://fatcat.wiki/release/yespzqkm2zed7n4vhjpkddap5e https://fatcat.wiki/release/5yixxzyl3vh4xd56lwcraowgty Status.AMBIGUOUS OK.DUMMY * [ ] https://fatcat.wiki/release/47opwjqugjecjmiqgukahw6p2m https://fatcat.wiki/release/real7tmfxjan7j3fgkilt7fze4 Status.AMBIGUOUS OK.DUMMY * [ ] https://fatcat.wiki/release/gaf7gjwetrbpzntrp4bt4nxaiy https://fatcat.wiki/release/htsa3mrirndbdjtdangr4mzrdu Status.AMBIGUOUS OK.DUMMY diff --git a/tests/data/release/cd5aik2whrd5jlvleyvdq6iwja b/tests/data/release/cd5aik2whrd5jlvleyvdq6iwja new file mode 100644 index 0000000..b91f1b4 --- /dev/null +++ b/tests/data/release/cd5aik2whrd5jlvleyvdq6iwja @@ -0,0 +1,20 @@ +{ + "abstracts": [], + "contribs": [], + "ext_ids": { + "doi": "10.1021/acs.jmedchem.7b00422.s002" + }, + "extra": { + "crossref": { + "type": "component" + } + }, + "ident": "cd5aik2whrd5jlvleyvdq6iwja", + "publisher": "American Chemical Society (ACS)", + "refs": [], + "release_type": "component", + "revision": "8beac216-ae3c-4ac7-b66f-eb14b6a97326", + "state": "active", + "title": "Tolvaptan-Type Vasopressin Receptor Ligands: Important Role of Axial Chirality in the Active Form", + "work_id": "6wmoa47fynf77olr3unq7trhja" +} diff --git a/tests/data/release/cwqujxztefdghhssb7ysxj7b5m b/tests/data/release/cwqujxztefdghhssb7ysxj7b5m new file mode 100644 index 0000000..050e2b5 --- /dev/null +++ b/tests/data/release/cwqujxztefdghhssb7ysxj7b5m @@ -0,0 +1,70 @@ +{ + "abstracts": [ + { + "content": "Abstract\n

\n Background: Measurement of quality of health care has been largely overlooked and continues to be a major health system bottleneck in monitoring performance and quality to evaluate progress against defined targets for better decision making. Hence, metrics of maternity care are needed to advance from health service contact alone to content of care. We assessed the accuracy of indicators that describe the quality of basic care for childbirth functions both at the individual level as well as at the population level in Northern Ethiopia. Methods: A validation study was conducted by comparing women's self-reported coverage of maternal and newborn health interventions during intra-partum and immediate postpartum care received in primary level care facilities of Northern Ethiopia against a gold standard of direct observation by a trained third party (n=478). Sensitivity, specificity and individual-level reporting accuracy via the area under the receiver operating curve (AUC) and inflation factor (IF) to estimate population-level accuracy for each indicator was applied for validity analysis. Findings: 455(97.5%) of women completed the survey describing health interventions. Thirty-two (43.2%) of the 93-basic quality child birth care indicators that were assessed could be accurately measure at the facility and population level (AUC>0.60 and 0.75\n

", + "mimetype": "application/xml+jats", + "sha1": "6d69e892d6cb1bd11222b6a9e41b03dc6a77bed5" + } + ], + "contribs": [ + { + "creator_id": "htz2j4a44fch5juh5c7dj5eody", + "extra": { + "seq": "first" + }, + "given_name": "Haftom", + "index": 0, + "raw_name": "Haftom Weldearegay", + "role": "author", + "surname": "Weldearegay" + }, + { + "given_name": "Araya", + "index": 1, + "raw_name": "Araya Medhanyie", + "role": "author", + "surname": "Medhanyie" + }, + { + "given_name": "Hagos", + "index": 2, + "raw_name": "Hagos Godefay", + "role": "author", + "surname": "Godefay" + }, + { + "given_name": "Alemayehu", + "index": 3, + "raw_name": "Alemayehu Kahsay", + "role": "author", + "surname": "Kahsay" + } + ], + "ext_ids": { + "doi": "10.21203/rs.2.18406/v2" + }, + "extra": { + "crossref": { + "license": [ + { + "URL": "https://creativecommons.org/licenses/by/4.0/", + "content-version": "unspecified", + "delay-in-days": 0, + "start": "2020-03-26T00:00:00Z" + } + ], + "type": "posted-content" + } + }, + "ident": "cwqujxztefdghhssb7ysxj7b5m", + "license_slug": "CC-BY", + "publisher": "Research Square", + "refs": [], + "release_date": "2020-03-26", + "release_type": "post", + "release_year": 2020, + "revision": "9b5ced4c-5988-483c-857a-44d1c206f9bf", + "state": "active", + "title": "Beyond Health System Contact: Measuring and Validating Quality of Childbirth Care Indicators in Primary Level Facilities of Northern Ethiopia", + "work_id": "4lqckr4udfamneczsq7mogy5yq" +} diff --git a/tests/data/release/hwnqyz7n65eabhlivvkipkytji b/tests/data/release/hwnqyz7n65eabhlivvkipkytji new file mode 100644 index 0000000..8d49edb --- /dev/null +++ b/tests/data/release/hwnqyz7n65eabhlivvkipkytji @@ -0,0 +1,31 @@ +{ + "abstracts": [], + "contribs": [], + "ext_ids": { + "doi": "10.21203/rs.2.18406/v1" + }, + "extra": { + "crossref": { + "license": [ + { + "URL": "https://creativecommons.org/licenses/by/4.0/", + "content-version": "unspecified", + "delay-in-days": 0, + "start": "2019-12-10T00:00:00Z" + } + ], + "type": "posted-content" + } + }, + "ident": "hwnqyz7n65eabhlivvkipkytji", + "license_slug": "CC-BY", + "publisher": "Research Square", + "refs": [], + "release_date": "2019-12-10", + "release_type": "post", + "release_year": 2019, + "revision": "fafa138b-cad0-447d-a3e4-5d666d63943b", + "state": "active", + "title": "Beyond Health System Contact: Measuring and Validating Quality of Childbirth Care Indicators in Primary Level Facilities of Northern Ethiopia", + "work_id": "hlaan73lp5hezd4w25ec4ohnhq" +} diff --git a/tests/data/release/kfttghqcsbddvofqd7l4bhtavy b/tests/data/release/kfttghqcsbddvofqd7l4bhtavy new file mode 100644 index 0000000..04965a8 --- /dev/null +++ b/tests/data/release/kfttghqcsbddvofqd7l4bhtavy @@ -0,0 +1,20 @@ +{ + "abstracts": [], + "contribs": [], + "ext_ids": { + "doi": "10.1021/acs.jmedchem.7b00422.s003" + }, + "extra": { + "crossref": { + "type": "component" + } + }, + "ident": "kfttghqcsbddvofqd7l4bhtavy", + "publisher": "American Chemical Society (ACS)", + "refs": [], + "release_type": "component", + "revision": "d72eaec1-8cf5-40fc-a322-e671c640f125", + "state": "active", + "title": "Tolvaptan-Type Vasopressin Receptor Ligands: Important Role of Axial Chirality in the Active Form", + "work_id": "o5migolsxrhrpbaelya3m4orx4" +} diff --git a/tests/data/verify.csv b/tests/data/verify.csv index 1b6d7f0..37f4b6e 100644 --- a/tests/data/verify.csv +++ b/tests/data/verify.csv @@ -141,3 +141,5 @@ s7a4o5v5gfg4tbzna6poyg7nzy,tcro5wr6brhqnf5wettyiauw34,Status.STRONG,OK.CUSTOM_BS eomug3y4afbynhcyzrtcvki55u,zcu2pugta5fttnqltgersan52q,Status.DIFFERENT,Miss.COMPONENT f5ebjc63j5dzpct5hsme5j3ote,zeoquc2f4nbmdbmbcbkmkxmtzi,Status.AMBIGUOUS, zvsffdeufjb5dbchww7ydqdq3a,5rcu6myqx5ezjjytzpvsauyut4,Status.STRONG,OK.PMID_DOI_PAIR +cd5aik2whrd5jlvleyvdq6iwja,kfttghqcsbddvofqd7l4bhtavy,Status.DIFFERENT,Miss.COMPONENT +hwnqyz7n65eabhlivvkipkytji,cwqujxztefdghhssb7ysxj7b5m,Status.STRONG,OK.VERSIONED_DOI -- cgit v1.2.3