aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-12-10 01:02:41 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-12-10 01:02:41 +0100
commit7ab933683f05a8de8ec416d520690d86b9a46a16 (patch)
tree8f8fb8b47ddd1cfb356ca72949e65a6394160ee4
parent4a74839914527d191cc7610657c1c25e92a6e19e (diff)
downloadfuzzycat-7ab933683f05a8de8ec416d520690d86b9a46a16.tar.gz
fuzzycat-7ab933683f05a8de8ec416d520690d86b9a46a16.zip
add versioned doi pattern
-rw-r--r--fuzzycat/common.py1
-rw-r--r--fuzzycat/verify.py13
-rw-r--r--notes/2020_11_testruns.md9
-rw-r--r--tests/data/release/cd5aik2whrd5jlvleyvdq6iwja20
-rw-r--r--tests/data/release/cwqujxztefdghhssb7ysxj7b5m70
-rw-r--r--tests/data/release/hwnqyz7n65eabhlivvkipkytji31
-rw-r--r--tests/data/release/kfttghqcsbddvofqd7l4bhtavy20
-rw-r--r--tests/data/verify.csv2
8 files changed, 165 insertions, 1 deletions
diff --git a/fuzzycat/common.py b/fuzzycat/common.py
index b31a233..5bf033c 100644
--- a/fuzzycat/common.py
+++ b/fuzzycat/common.py
@@ -31,6 +31,7 @@ class OK(str, Enum):
SLUG_TITLE_AUTHOR_MATCH = 'ok.slug_title_author_match'
TITLE_AUTHOR_MATCH = 'ok.title_author_match'
TOKENIZED_AUTHORS = 'ok.tokenized_authors'
+ VERSIONED_DOI = 'ok.versioned_doi'
WORK_ID = 'ok.work_id'
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py
index bf59ed8..88e83d5 100644
--- a/fuzzycat/verify.py
+++ b/fuzzycat/verify.py
@@ -252,6 +252,17 @@ def compare(a, b):
except PathAccessError:
pass
+ try:
+ # https://fatcat.wiki/release/cwqujxztefdghhssb7ysxj7b5m
+ # https://fatcat.wiki/release/hwnqyz7n65eabhlivvkipkytji
+ a_doi = glom(a, "ext_ids.doi")
+ b_doi = glom(b, "ext_ids.doi")
+ versioned_doi_pattern = '10[.].*/v[0-9]{1,}$'
+ if re.match(versioned_doi_pattern, a_doi) and re.match(versioned_doi_pattern, b_doi):
+ return (Status.STRONG, OK.VERSIONED_DOI)
+ except PathAccessError:
+ pass
+
# TODO: datacite specific vocabulary
# extra.datacite.relations[].{relationType=IsNewerVersionOf,relatedIdentifier=10...}
# beware: we have versions and "isPartOf", e.g. https://api.fatcat.wiki/v0/release/ybxygpeypbaq5pfrztu3z2itw4
@@ -335,7 +346,7 @@ def compare(a, b):
pass
try:
- if glom(a, "extra.crossref.type") == "component" and glom(b, "extra.crossref.type"):
+ if glom(a, "release_type") == "component" and glom(b, "release_type") == "component":
a_doi = glom(a, "ext_ids.doi")
b_doi = glom(b, "ext_ids.doi")
if a_doi != b_doi:
diff --git a/notes/2020_11_testruns.md b/notes/2020_11_testruns.md
index 11655bc..cfa6c6f 100644
--- a/notes/2020_11_testruns.md
+++ b/notes/2020_11_testruns.md
@@ -214,10 +214,19 @@ Component.
* [ ] https://fatcat.wiki/release/5rcu6myqx5ezjjytzpvsauyut4 https://fatcat.wiki/release/zvsffdeufjb5dbchww7ydqdq3a Status.AMBIGUOUS OK.DUMMY
+> pmid
* [ ] https://fatcat.wiki/release/f5ebjc63j5dzpct5hsme5j3ote https://fatcat.wiki/release/zeoquc2f4nbmdbmbcbkmkxmtzi Status.AMBIGUOUS OK.DUMMY
+
+Hard to say (but seem to be a rerun of an article in a "similar" journal).
+
* [ ] https://fatcat.wiki/release/cd5aik2whrd5jlvleyvdq6iwja https://fatcat.wiki/release/kfttghqcsbddvofqd7l4bhtavy Status.AMBIGUOUS OK.DUMMY
+
+Ok.
+
* [ ] https://fatcat.wiki/release/hwnqyz7n65eabhlivvkipkytji https://fatcat.wiki/release/cwqujxztefdghhssb7ysxj7b5m Status.AMBIGUOUS OK.DUMMY
+
+
* [ ] https://fatcat.wiki/release/yespzqkm2zed7n4vhjpkddap5e https://fatcat.wiki/release/5yixxzyl3vh4xd56lwcraowgty Status.AMBIGUOUS OK.DUMMY
* [ ] https://fatcat.wiki/release/47opwjqugjecjmiqgukahw6p2m https://fatcat.wiki/release/real7tmfxjan7j3fgkilt7fze4 Status.AMBIGUOUS OK.DUMMY
* [ ] https://fatcat.wiki/release/gaf7gjwetrbpzntrp4bt4nxaiy https://fatcat.wiki/release/htsa3mrirndbdjtdangr4mzrdu Status.AMBIGUOUS OK.DUMMY
diff --git a/tests/data/release/cd5aik2whrd5jlvleyvdq6iwja b/tests/data/release/cd5aik2whrd5jlvleyvdq6iwja
new file mode 100644
index 0000000..b91f1b4
--- /dev/null
+++ b/tests/data/release/cd5aik2whrd5jlvleyvdq6iwja
@@ -0,0 +1,20 @@
+{
+ "abstracts": [],
+ "contribs": [],
+ "ext_ids": {
+ "doi": "10.1021/acs.jmedchem.7b00422.s002"
+ },
+ "extra": {
+ "crossref": {
+ "type": "component"
+ }
+ },
+ "ident": "cd5aik2whrd5jlvleyvdq6iwja",
+ "publisher": "American Chemical Society (ACS)",
+ "refs": [],
+ "release_type": "component",
+ "revision": "8beac216-ae3c-4ac7-b66f-eb14b6a97326",
+ "state": "active",
+ "title": "Tolvaptan-Type Vasopressin Receptor Ligands: Important Role of Axial Chirality in the Active Form",
+ "work_id": "6wmoa47fynf77olr3unq7trhja"
+}
diff --git a/tests/data/release/cwqujxztefdghhssb7ysxj7b5m b/tests/data/release/cwqujxztefdghhssb7ysxj7b5m
new file mode 100644
index 0000000..050e2b5
--- /dev/null
+++ b/tests/data/release/cwqujxztefdghhssb7ysxj7b5m
@@ -0,0 +1,70 @@
+{
+ "abstracts": [
+ {
+ "content": "<title>Abstract</title>\n <p>\n Background: Measurement of quality of health care has been largely overlooked and continues to be a major health system bottleneck in monitoring performance and quality to evaluate progress against defined targets for better decision making. Hence, metrics of maternity care are needed to advance from health service contact alone to content of care. We assessed the accuracy of indicators that describe the quality of basic care for childbirth functions both at the individual level as well as at the population level in Northern Ethiopia. Methods: A validation study was conducted by comparing women's self-reported coverage of maternal and newborn health interventions during intra-partum and immediate postpartum care received in primary level care facilities of Northern Ethiopia against a gold standard of direct observation by a trained third party (n=478). Sensitivity, specificity and individual-level reporting accuracy via the area under the receiver operating curve (AUC) and inflation factor (IF) to estimate population-level accuracy for each indicator was applied for validity analysis. Findings: 455(97.5%) of women completed the survey describing health interventions. Thirty-two (43.2%) of the 93-basic quality child birth care indicators that were assessed could be accurately measure at the facility and population level (AUC&gt;0.60 and 0.75\n </p>",
+ "mimetype": "application/xml+jats",
+ "sha1": "6d69e892d6cb1bd11222b6a9e41b03dc6a77bed5"
+ }
+ ],
+ "contribs": [
+ {
+ "creator_id": "htz2j4a44fch5juh5c7dj5eody",
+ "extra": {
+ "seq": "first"
+ },
+ "given_name": "Haftom",
+ "index": 0,
+ "raw_name": "Haftom Weldearegay",
+ "role": "author",
+ "surname": "Weldearegay"
+ },
+ {
+ "given_name": "Araya",
+ "index": 1,
+ "raw_name": "Araya Medhanyie",
+ "role": "author",
+ "surname": "Medhanyie"
+ },
+ {
+ "given_name": "Hagos",
+ "index": 2,
+ "raw_name": "Hagos Godefay",
+ "role": "author",
+ "surname": "Godefay"
+ },
+ {
+ "given_name": "Alemayehu",
+ "index": 3,
+ "raw_name": "Alemayehu Kahsay",
+ "role": "author",
+ "surname": "Kahsay"
+ }
+ ],
+ "ext_ids": {
+ "doi": "10.21203/rs.2.18406/v2"
+ },
+ "extra": {
+ "crossref": {
+ "license": [
+ {
+ "URL": "https://creativecommons.org/licenses/by/4.0/",
+ "content-version": "unspecified",
+ "delay-in-days": 0,
+ "start": "2020-03-26T00:00:00Z"
+ }
+ ],
+ "type": "posted-content"
+ }
+ },
+ "ident": "cwqujxztefdghhssb7ysxj7b5m",
+ "license_slug": "CC-BY",
+ "publisher": "Research Square",
+ "refs": [],
+ "release_date": "2020-03-26",
+ "release_type": "post",
+ "release_year": 2020,
+ "revision": "9b5ced4c-5988-483c-857a-44d1c206f9bf",
+ "state": "active",
+ "title": "Beyond Health System Contact: Measuring and Validating Quality of Childbirth Care Indicators in Primary Level Facilities of Northern Ethiopia",
+ "work_id": "4lqckr4udfamneczsq7mogy5yq"
+}
diff --git a/tests/data/release/hwnqyz7n65eabhlivvkipkytji b/tests/data/release/hwnqyz7n65eabhlivvkipkytji
new file mode 100644
index 0000000..8d49edb
--- /dev/null
+++ b/tests/data/release/hwnqyz7n65eabhlivvkipkytji
@@ -0,0 +1,31 @@
+{
+ "abstracts": [],
+ "contribs": [],
+ "ext_ids": {
+ "doi": "10.21203/rs.2.18406/v1"
+ },
+ "extra": {
+ "crossref": {
+ "license": [
+ {
+ "URL": "https://creativecommons.org/licenses/by/4.0/",
+ "content-version": "unspecified",
+ "delay-in-days": 0,
+ "start": "2019-12-10T00:00:00Z"
+ }
+ ],
+ "type": "posted-content"
+ }
+ },
+ "ident": "hwnqyz7n65eabhlivvkipkytji",
+ "license_slug": "CC-BY",
+ "publisher": "Research Square",
+ "refs": [],
+ "release_date": "2019-12-10",
+ "release_type": "post",
+ "release_year": 2019,
+ "revision": "fafa138b-cad0-447d-a3e4-5d666d63943b",
+ "state": "active",
+ "title": "Beyond Health System Contact: Measuring and Validating Quality of Childbirth Care Indicators in Primary Level Facilities of Northern Ethiopia",
+ "work_id": "hlaan73lp5hezd4w25ec4ohnhq"
+}
diff --git a/tests/data/release/kfttghqcsbddvofqd7l4bhtavy b/tests/data/release/kfttghqcsbddvofqd7l4bhtavy
new file mode 100644
index 0000000..04965a8
--- /dev/null
+++ b/tests/data/release/kfttghqcsbddvofqd7l4bhtavy
@@ -0,0 +1,20 @@
+{
+ "abstracts": [],
+ "contribs": [],
+ "ext_ids": {
+ "doi": "10.1021/acs.jmedchem.7b00422.s003"
+ },
+ "extra": {
+ "crossref": {
+ "type": "component"
+ }
+ },
+ "ident": "kfttghqcsbddvofqd7l4bhtavy",
+ "publisher": "American Chemical Society (ACS)",
+ "refs": [],
+ "release_type": "component",
+ "revision": "d72eaec1-8cf5-40fc-a322-e671c640f125",
+ "state": "active",
+ "title": "Tolvaptan-Type Vasopressin Receptor Ligands: Important Role of Axial Chirality in the Active Form",
+ "work_id": "o5migolsxrhrpbaelya3m4orx4"
+}
diff --git a/tests/data/verify.csv b/tests/data/verify.csv
index 1b6d7f0..37f4b6e 100644
--- a/tests/data/verify.csv
+++ b/tests/data/verify.csv
@@ -141,3 +141,5 @@ s7a4o5v5gfg4tbzna6poyg7nzy,tcro5wr6brhqnf5wettyiauw34,Status.STRONG,OK.CUSTOM_BS
eomug3y4afbynhcyzrtcvki55u,zcu2pugta5fttnqltgersan52q,Status.DIFFERENT,Miss.COMPONENT
f5ebjc63j5dzpct5hsme5j3ote,zeoquc2f4nbmdbmbcbkmkxmtzi,Status.AMBIGUOUS,
zvsffdeufjb5dbchww7ydqdq3a,5rcu6myqx5ezjjytzpvsauyut4,Status.STRONG,OK.PMID_DOI_PAIR
+cd5aik2whrd5jlvleyvdq6iwja,kfttghqcsbddvofqd7l4bhtavy,Status.DIFFERENT,Miss.COMPONENT
+hwnqyz7n65eabhlivvkipkytji,cwqujxztefdghhssb7ysxj7b5m,Status.STRONG,OK.VERSIONED_DOI