aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fuzzycat/common.py1
-rw-r--r--fuzzycat/verify.py12
-rw-r--r--notes/todo.md1
-rw-r--r--tests/data/release/phqelg6oc5hs5dehhgmodcnh5u24
-rw-r--r--tests/data/release/zlywxoy7cfexvaatziqp4ip5m424
-rw-r--r--tests/data/verify.csv1
-rw-r--r--tests/test_verify.py4
7 files changed, 65 insertions, 2 deletions
diff --git a/fuzzycat/common.py b/fuzzycat/common.py
index 60f42ab..07f62df 100644
--- a/fuzzycat/common.py
+++ b/fuzzycat/common.py
@@ -45,6 +45,7 @@ class Miss(str, Enum):
CONTRIB_INTERSECTION_EMPTY = 'miss.contrib_intersection_empty'
CUSTOM_IOP_MA_PATTERN = 'miss.custom_iop_ma_pattern'
CUSTOM_PREFIX_10_14288 = 'miss.custom_prefix_10_14288'
+ CUSTOM_PREFIX_10_7916 = 'miss.custom_prefix_10_7916'
CUSTOM_VHS = 'miss.vhs' # https://fatcat.wiki/release/44gk5ben5vghljq6twm7lwmxla
DATASET_DOI = 'miss.dataset_doi'
NUM_DIFF = 'miss.num_diff'
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py
index 993b7c9..6d824d4 100644
--- a/fuzzycat/verify.py
+++ b/fuzzycat/verify.py
@@ -212,6 +212,7 @@ def compare(a, b):
if re.match(r"appendix ?[^ ]*$", a_title_lower):
return (Status.AMBIGUOUS, Miss.APPENDIX)
+
try:
# TODO: figshare versions, "xxx.v1"
FIGSHARE_PREFIX = "10.6084/"
@@ -332,6 +333,17 @@ def compare(a, b):
if a_slug_title == b_slug_title:
try:
+ # https://dlc.library.columbia.edu/lcaaj/cul:p5hqbzkhxb,
+ # https://dlc.library.columbia.edu/lcaaj/cul:5tb2rbp0nj
+ a_doi = glom(a, "ext_ids.doi")
+ b_doi = glom(b, "ext_ids.doi")
+ if has_doi_prefix(a_doi, "10.7916") and has_doi_prefix(b_doi, "10.7916"):
+ return (Status.AMBIGUOUS, Miss.CUSTOM_PREFIX_10_7916)
+ except PathAccessError:
+ pass
+
+ if a_slug_title == b_slug_title:
+ try:
a_subtitles = glom(a, "extra.subtitle") or []
b_subtitles = glom(b, "extra.subtitle") or []
for a_sub in a_subtitles:
diff --git a/notes/todo.md b/notes/todo.md
index 97313ce..b3474f8 100644
--- a/notes/todo.md
+++ b/notes/todo.md
@@ -91,3 +91,4 @@ Blacklist fragment.
* [ ] https://fatcat.wiki/release/zlywxoy7cfexvaatziqp4ip5m4 https://fatcat.wiki/release/phqelg6oc5hs5dehhgmodcnh5u Status.AMBIGUOUS OK.DUMMY
+> one item contains more md, but the physical entity seems to be the same; 0058904_001 vs 0058904
diff --git a/tests/data/release/phqelg6oc5hs5dehhgmodcnh5u b/tests/data/release/phqelg6oc5hs5dehhgmodcnh5u
new file mode 100644
index 0000000..2386a7f
--- /dev/null
+++ b/tests/data/release/phqelg6oc5hs5dehhgmodcnh5u
@@ -0,0 +1,24 @@
+{
+ "abstracts": [],
+ "contribs": [],
+ "ext_ids": {
+ "doi": "10.7916/d8rr2t04"
+ },
+ "extra": {
+ "datacite": {
+ "metadataVersion": 3
+ },
+ "release_month": 8
+ },
+ "ident": "phqelg6oc5hs5dehhgmodcnh5u",
+ "publisher": "Columbia University",
+ "refs": [],
+ "release_date": "2017-08-17",
+ "release_stage": "published",
+ "release_type": "article",
+ "release_year": 2017,
+ "revision": "7fa57bde-8630-47a2-9f33-f87257e77679",
+ "state": "active",
+ "title": "Eastern questionnaire, answer sheet for Interviewee 52223, page 194",
+ "work_id": "sqm5fkxxnjhf7fo2ln4lzgevja"
+}
diff --git a/tests/data/release/zlywxoy7cfexvaatziqp4ip5m4 b/tests/data/release/zlywxoy7cfexvaatziqp4ip5m4
new file mode 100644
index 0000000..4ed1fe3
--- /dev/null
+++ b/tests/data/release/zlywxoy7cfexvaatziqp4ip5m4
@@ -0,0 +1,24 @@
+{
+ "abstracts": [],
+ "contribs": [],
+ "ext_ids": {
+ "doi": "10.7916/d8n0229j"
+ },
+ "extra": {
+ "datacite": {
+ "metadataVersion": 2
+ },
+ "release_month": 8
+ },
+ "ident": "zlywxoy7cfexvaatziqp4ip5m4",
+ "publisher": "Columbia University",
+ "refs": [],
+ "release_date": "2017-08-17",
+ "release_stage": "published",
+ "release_type": "article",
+ "release_year": 2017,
+ "revision": "766922dd-17c4-4d2c-bdf5-ece79413d300",
+ "state": "active",
+ "title": "Eastern questionnaire, answer sheet for Interviewee 52223, page 194",
+ "work_id": "rnhzlq2y5fbe5dnynfh7qnnlf4"
+}
diff --git a/tests/data/verify.csv b/tests/data/verify.csv
index 058a471..16ee457 100644
--- a/tests/data/verify.csv
+++ b/tests/data/verify.csv
@@ -112,3 +112,4 @@ zkqujozrx5cnjitmglclt6heqq,urr2gs4dsbbwdl7asgyqnwwtxy,Status.AMBIGUOUS,Miss.BLAC
iwtrxnov2repzlgoi2at2md6tm,s5hm65waingwjmgf3plu76hzu4,Status.AMBIGUOUS,
iwtrxnov2repzlgoi2at2md6tm,t6k5mec4xjdebcs3iv3uzs3yvu,Status.AMBIGUOUS,
s5hm65waingwjmgf3plu76hzu4,t6k5mec4xjdebcs3iv3uzs3yvu,Status.AMBIGUOUS,
+zlywxoy7cfexvaatziqp4ip5m4,phqelg6oc5hs5dehhgmodcnh5u,Status.EXACT,OK.DATACITE_VERSION
diff --git a/tests/test_verify.py b/tests/test_verify.py
index a80dcda..533a5ba 100644
--- a/tests/test_verify.py
+++ b/tests/test_verify.py
@@ -46,8 +46,8 @@ def test_compare():
.format(a=a, b=b, base=FATCAT_BASE_URL, status=status, reason=reason))
continue
assert status_mapping[
- expected_status] == status, "status: want {}, got {} for {} {}".format(
- expected_status, status, a, b)
+ expected_status] == status, "status: want {}, got {} {} for {} {}".format(
+ expected_status, status, reason, a, b)
if expected_reason:
assert expected_reason.lower() == reason.lower(
), "reason [{base}release/{a} {base}release/{b}]: want {reason}, got {expected_reason}".format(