aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-12-09 01:42:16 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-12-09 01:42:16 +0100
commit4b5d71b8ca085c102fff1cf6629fa58582873d27 (patch)
tree982b306b9a4a1230f51c11d27b3a18a37716937b
parent26c71fe52d4cabc3cca8b29f6fa3d99c66e5f912 (diff)
downloadfuzzycat-4b5d71b8ca085c102fff1cf6629fa58582873d27.tar.gz
fuzzycat-4b5d71b8ca085c102fff1cf6629fa58582873d27.zip
add another case
-rw-r--r--Makefile2
-rw-r--r--fuzzycat/common.py1
-rw-r--r--fuzzycat/verify.py14
-rw-r--r--notes/2020_11_testruns.md6
-rw-r--r--tests/data/release/idpgijvcsnbqrgs2dg36vzzdzm29
-rw-r--r--tests/data/release/wm2p5fznwffknjx56lvmr7hn4q29
-rw-r--r--tests/data/verify.csv3
7 files changed, 80 insertions, 4 deletions
diff --git a/Makefile b/Makefile
index a0e2946..9354f76 100644
--- a/Makefile
+++ b/Makefile
@@ -31,7 +31,7 @@ cov: ## Run coverage report
.PHONY: test
test: ## Run coverage report
- pytest -o log_cli=true -v fuzzycat/*.py tests/*.py
+ pytest -o log_cli=true -s -vvv fuzzycat/*.py tests/*.py
.PHONY: lint
lint: $(PY_FILES)
diff --git a/fuzzycat/common.py b/fuzzycat/common.py
index d87bef4..977db2c 100644
--- a/fuzzycat/common.py
+++ b/fuzzycat/common.py
@@ -57,3 +57,4 @@ class Miss(str, Enum):
SUBTITLE = 'miss.subtitle'
TITLE_FILENAME = 'miss.title_filename'
YEAR = 'miss.year'
+ SHARED_DOI_PREFIX = 'miss.shared_doi_prefix'
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py
index e945a15..dd2ced8 100644
--- a/fuzzycat/verify.py
+++ b/fuzzycat/verify.py
@@ -437,6 +437,20 @@ def compare(a, b):
(dict_key_exists(b, "ext_ids.pmid") and not dict_key_exists(b, "ext_ids.doi"))):
return (Status.STRONG, OK.PMID_DOI_PAIR)
+ # Publication from same publisher and different DOI or year a probably
+ # different.
+ try:
+ a_container_id = glom(a, "container_id")
+ b_container_id = glom(b, "container_id")
+ a_doi = glom(a, "ext_ids.doi")
+ b_doi = glom(b, "ext_ids.doi")
+
+ if a_container_id == b_container_id and a_doi != b_doi and not has_doi_prefix(
+ a_doi, "10.1126"):
+ return (Status.DIFFERENT, Miss.SHARED_DOI_PREFIX)
+ except PathAccessError:
+ pass
+
if a_authors and len(a_slug_authors & b_slug_authors) == 0:
# Before we bail out, run an authors similarity check. TODO: This is
# not the right place, but lives here now, since these cases popped up
diff --git a/notes/2020_11_testruns.md b/notes/2020_11_testruns.md
index edc39d0..6dd2ec8 100644
--- a/notes/2020_11_testruns.md
+++ b/notes/2020_11_testruns.md
@@ -133,10 +133,12 @@ Defer.
* [ ] https://fatcat.wiki/release/7x7tszf54zggvp4xkrhakp667u https://fatcat.wiki/release/eqcgtpav3na5jh56o5vjsvb4ei Status.AMBIGUOUS OK.DUMMY
-Same, pubmed id only and oxfordjournals doi, same year.
-
+Same, pubmed id only and oxfordjournals doi, same year. New: `PMID_DOI_PAIR`
* [ ] https://fatcat.wiki/release/idpgijvcsnbqrgs2dg36vzzdzm https://fatcat.wiki/release/wm2p5fznwffknjx56lvmr7hn4q Status.AMBIGUOUS OK.DUMMY
+
+
+
* [ ] https://fatcat.wiki/release/nqcfu4il45aixekvk3rwflahdm https://fatcat.wiki/release/72uzveph65ce7kfdct2wpgh5j4 Status.AMBIGUOUS OK.DUMMY
* [ ] https://fatcat.wiki/release/zizw6bgxu5cnxfx5h3v7q7gute https://fatcat.wiki/release/jwh6xci4m5dktmea6bphhc3mjy Status.AMBIGUOUS OK.DUMMY
* [ ] https://fatcat.wiki/release/b7bbygyawzdsthai7j7rmztrxe https://fatcat.wiki/release/mvvbim7kdffvtosuldtv5m3uy4 Status.AMBIGUOUS OK.DUMMY
diff --git a/tests/data/release/idpgijvcsnbqrgs2dg36vzzdzm b/tests/data/release/idpgijvcsnbqrgs2dg36vzzdzm
new file mode 100644
index 0000000..d3be763
--- /dev/null
+++ b/tests/data/release/idpgijvcsnbqrgs2dg36vzzdzm
@@ -0,0 +1,29 @@
+{
+ "abstracts": [],
+ "container_id": "ytowgmaklbbvlgtjcokeyrawb4",
+ "contribs": [],
+ "ext_ids": {
+ "doi": "10.1016/s0026-0576(00)83224-9"
+ },
+ "extra": {
+ "crossref": {
+ "alternative-id": [
+ "S0026057600832249"
+ ],
+ "type": "journal-article"
+ }
+ },
+ "ident": "idpgijvcsnbqrgs2dg36vzzdzm",
+ "language": "en",
+ "pages": "33",
+ "publisher": "Elsevier BV",
+ "refs": [],
+ "release_stage": "published",
+ "release_type": "article-journal",
+ "release_year": 2000,
+ "revision": "150ceb07-2033-4488-8144-d14bf2241c84",
+ "state": "active",
+ "title": "Maxi-Blast Inc.",
+ "volume": "98",
+ "work_id": "xj5uedooxza3xjftmf5rurkvoi"
+}
diff --git a/tests/data/release/wm2p5fznwffknjx56lvmr7hn4q b/tests/data/release/wm2p5fznwffknjx56lvmr7hn4q
new file mode 100644
index 0000000..f06160e
--- /dev/null
+++ b/tests/data/release/wm2p5fznwffknjx56lvmr7hn4q
@@ -0,0 +1,29 @@
+{
+ "abstracts": [],
+ "container_id": "ytowgmaklbbvlgtjcokeyrawb4",
+ "contribs": [],
+ "ext_ids": {
+ "doi": "10.1016/s0026-0576(01)80334-2"
+ },
+ "extra": {
+ "crossref": {
+ "alternative-id": [
+ "S0026057601803342"
+ ],
+ "type": "journal-article"
+ }
+ },
+ "ident": "wm2p5fznwffknjx56lvmr7hn4q",
+ "language": "en",
+ "pages": "36",
+ "publisher": "Elsevier BV",
+ "refs": [],
+ "release_stage": "published",
+ "release_type": "article-journal",
+ "release_year": 1999,
+ "revision": "805d5835-bcb3-439a-916f-0e9996fbad87",
+ "state": "active",
+ "title": "Maxi-Blast Inc.",
+ "volume": "97",
+ "work_id": "frfzf2g6srcs3b7wcdp6wgamzi"
+}
diff --git a/tests/data/verify.csv b/tests/data/verify.csv
index e03eec4..f631cf4 100644
--- a/tests/data/verify.csv
+++ b/tests/data/verify.csv
@@ -94,7 +94,7 @@ fuaz2iolhjegfpdmob3i3efvgm,uxzn4nznrfbttivwzdc7noptku,Status.EXACT,OK.TITLE_AUTH
7j2dsplr45bhvdtrhqa7hykwka,pxzy4k45xjhgfgw6znf5xjayfa,,
7j2dsplr45bhvdtrhqa7hykwka,ud3tzdfacncvnkj232lkvvg34q,Status.EXACT,OK.DOI
pxzy4k45xjhgfgw6znf5xjayfa,ud3tzdfacncvnkj232lkvvg34q,,
-b5p5i7phjfejhiecjaz4arkp3m,rzicki3gcjayxaic7ckyx6bcmq,Status.DIFFERENT,Miss.CONTRIB_INTERSECTION_EMPTY
+b5p5i7phjfejhiecjaz4arkp3m,rzicki3gcjayxaic7ckyx6bcmq,Status.DIFFERENT,Miss.SHARED_DOI_PREFIX
bkonm3q2dbegde3i4sl6h6lkvq,hhyknlu55vdezg3xsqxa4as2eu,Status.EXACT,OK.TITLE_AUTHOR_MATCH
bkonm3q2dbegde3i4sl6h6lkvq,s7mpon5havhx3eil563hcz6anu,Status.EXACT,OK.DOI
hhyknlu55vdezg3xsqxa4as2eu,s7mpon5havhx3eil563hcz6anu,Status.EXACT,OK.TITLE_AUTHOR_MATCH
@@ -128,3 +128,4 @@ mz6a32xbp5f67i2cnbco2hmzj4,fo5dsqeocfekfhqdzgqyng3z6q,Status.AMBIGUOUS,Miss.CUST
g2swo5fewnhv3ihmlpl32sojr4,ab2q56gokfdmzpccrmwfcdljgy,Status.AMBIGUOUS,Miss.CUSTOM_PREFIX_10_5860_CHOICE_REVIEW
tmlg73royrdwdhl6nijf6m7vzy,3w4tibll4rdernjrn4hkkyqsem,Status.DIFFERENT,Miss.CONTRIB_INTERSECTION_EMPTY
eqcgtpav3na5jh56o5vjsvb4ei,7x7tszf54zggvp4xkrhakp667u,Status.STRONG,OK.PMID_DOI_PAIR
+wm2p5fznwffknjx56lvmr7hn4q,idpgijvcsnbqrgs2dg36vzzdzm,Status.DIFFERENT,Miss.SHARED_DOI_PREFIX