aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-12-02 00:58:20 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-12-02 00:58:20 +0100
commitba68fa4d91fbcd1dda3363b78bc24ca64ca2546b (patch)
treeb49b02f5c148d6bd7045580a7f9919e7ec364935
parent34c441b7828694492904108ac307d4aa640ac64d (diff)
downloadfuzzycat-ba68fa4d91fbcd1dda3363b78bc24ca64ca2546b.tar.gz
fuzzycat-ba68fa4d91fbcd1dda3363b78bc24ca64ca2546b.zip
add case
-rw-r--r--fuzzycat/common.py5
-rw-r--r--fuzzycat/verify.py7
-rw-r--r--tests/data/release/he334wpbobegxhptpkvvrufioq24
-rw-r--r--tests/data/release/td3ouhgtzbbe7ctevfnldqkoba24
-rw-r--r--tests/data/verify.csv3
5 files changed, 60 insertions, 3 deletions
diff --git a/fuzzycat/common.py b/fuzzycat/common.py
index 34508b7..5cef684 100644
--- a/fuzzycat/common.py
+++ b/fuzzycat/common.py
@@ -17,7 +17,10 @@ class OK(str, Enum):
Reason for assuming we have a match.
"""
ARXIV_VERSION = 'ok.arxiv_version'
+ CUSTOM_BSI_UNDATED = 'ok.custom_bsi_undated'
+ CUSTOM_IEEE_ARXIV = 'ok.custom_ieee_arxiv'
DATACITE_RELATED_ID = 'ok.datacite_related_id'
+ DATACITE_VERSION = 'ok.datacite_version'
DOI = 'ok.doi'
DUMMY = 'ok.dummy'
FIGSHARE_VERSION = 'ok.figshare_version'
@@ -25,8 +28,6 @@ class OK(str, Enum):
SLUG_TITLE_AUTHOR_MATCH = 'ok.slug_title_author_match'
TITLE_AUTHOR_MATCH = 'ok.title_author_match'
TOKENIZED_AUTHORS = 'ok.tokenized_authors'
- CUSTOM_IEEE_ARXIV = 'ok.custom_ieee_arxiv'
- CUSTOM_BSI_UNDATED = 'ok.custom_bsi_undated'
class Miss(str, Enum):
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py
index e688d49..5977f8e 100644
--- a/fuzzycat/verify.py
+++ b/fuzzycat/verify.py
@@ -160,6 +160,13 @@ def compare(a, b):
return (Status.AMBIGUOUS, Miss.BLACKLISTED_FRAGMENT)
try:
+ if a_title and a_title == b_title and glom(a, "extra.datacite.metadataVersion") != glom(
+ b, "extra.datacite.metadataVersion"):
+ return (Status.EXACT, OK.DATACITE_VERSION)
+ except PathAccessError:
+ pass
+
+ try:
a_doi = glom(a, "ext_ids.doi")
b_doi = glom(b, "ext_ids.doi")
if a_doi.startswith("10.14288/") and b_doi.startswith("10.14288/") and a_doi != b_doi:
diff --git a/tests/data/release/he334wpbobegxhptpkvvrufioq b/tests/data/release/he334wpbobegxhptpkvvrufioq
new file mode 100644
index 0000000..2c72e00
--- /dev/null
+++ b/tests/data/release/he334wpbobegxhptpkvvrufioq
@@ -0,0 +1,24 @@
+{
+ "abstracts": [],
+ "contribs": [],
+ "ext_ids": {
+ "doi": "10.7916/d88d0n1z"
+ },
+ "extra": {
+ "datacite": {
+ "metadataVersion": 4
+ },
+ "release_month": 8
+ },
+ "ident": "he334wpbobegxhptpkvvrufioq",
+ "publisher": "Columbia University",
+ "refs": [],
+ "release_date": "2017-08-07",
+ "release_stage": "published",
+ "release_type": "article",
+ "release_year": 2017,
+ "revision": "143a4dcf-eab2-4293-b079-d32769e4c550",
+ "state": "active",
+ "title": "Eastern questionnaire, answer sheet for Interviewee 51207, page 048",
+ "work_id": "r5gbob7wuzexdarwje6fiyefsu"
+}
diff --git a/tests/data/release/td3ouhgtzbbe7ctevfnldqkoba b/tests/data/release/td3ouhgtzbbe7ctevfnldqkoba
new file mode 100644
index 0000000..42bebbf
--- /dev/null
+++ b/tests/data/release/td3ouhgtzbbe7ctevfnldqkoba
@@ -0,0 +1,24 @@
+{
+ "abstracts": [],
+ "contribs": [],
+ "ext_ids": {
+ "doi": "10.7916/d8hx24f7"
+ },
+ "extra": {
+ "datacite": {
+ "metadataVersion": 3
+ },
+ "release_month": 8
+ },
+ "ident": "td3ouhgtzbbe7ctevfnldqkoba",
+ "publisher": "Columbia University",
+ "refs": [],
+ "release_date": "2017-08-08",
+ "release_stage": "published",
+ "release_type": "article",
+ "release_year": 2017,
+ "revision": "8eab4949-e0e8-4339-9bb7-5d339e3d5639",
+ "state": "active",
+ "title": "Eastern questionnaire, answer sheet for Interviewee 51207, page 048",
+ "work_id": "ktmybzspenavxevth7artcbx7q"
+}
diff --git a/tests/data/verify.csv b/tests/data/verify.csv
index 741cf15..874baa0 100644
--- a/tests/data/verify.csv
+++ b/tests/data/verify.csv
@@ -7,7 +7,7 @@ s46mfwvb4rdyhlforb6yxg3abi,5hvdhbszafhw5fbu4jnrmesdmu,Status.DIFFERENT,Miss.BOOK
mn26hwbmqvh23jhsecoder3ixq,544v67u75fazfp5qssqzmh6fta,Status.DIFFERENT,Miss.YEAR
4srjsirjhvhvtenz23lg6bqnqu,3czbwace7bh4hkfehzntnddt2i,Status.STRONG,OK.ARXIV_VERSION
vokr6qxyqrc55kyn45dyavr2lq,b5helm53ljdxjpxdnn5zjqpjve,Status.EXACT,OK.TITLE_AUTHOR_MATCH
-kgeynply6vcxdeiluu6es6w72m,cm536ige6bfdfhhesp26ibfdva,Status.EXACT,OK.TITLE_AUTHOR_MATCH
+kgeynply6vcxdeiluu6es6w72m,cm536ige6bfdfhhesp26ibfdva,Status.EXACT,OK.DATACITE_VERSION
knwc764q25f33ib6qnwo7pyaui,n74tqiqi5jcx5d6vl5f7lpokaa,Status.DIFFERENT,Miss.CONTRIB_INTERSECTION_EMPTY
eo4qptzoqrholjslj7nemlne2y,zisq3tsezjcejinlpf7qgk6z2i,Status.DIFFERENT,Miss.YEAR
crsd5c2fhvd7hodbd4trne3lgi,4547ybo5hvf4xhlh5triaccxai,Status.DIFFERENT,Miss.YEAR
@@ -104,3 +104,4 @@ omjj75lv4rayvcqmgmicnzf5ye,xxfujnvafrazbjw7kvh7bhmuvy,,
63g4ukdxajcqhdytqla6du3t3u,rz72bzfevzeofdeb342c6z45qu,Status.DIFFERENT,Miss.CUSTOM_PREFIX_10_14288
ij3yuoh6lrh3tkrv5o7gfk6yyi,tur236mqljdfdnlzbbnks2sily,Status.STRONG,OK.CUSTOM_IEEE_ARXIV
neznj5fb4nf3tdqnotnbe34b6e,gcqdvvjiq5bphl7lpc4invi4vy,Status.STRONG,OK.CUSTOM_BSI_UNDATED
+he334wpbobegxhptpkvvrufioq,td3ouhgtzbbe7ctevfnldqkoba,Status.EXACT,OK.DATACITE_VERSION