aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fuzzycat/common.py1
-rw-r--r--fuzzycat/verify.py12
-rw-r--r--notes/todo.md12
-rw-r--r--tests/data/release/63g4ukdxajcqhdytqla6du3t3u21
-rw-r--r--tests/data/release/rz72bzfevzeofdeb342c6z45qu21
-rw-r--r--tests/data/verify.csv1
6 files changed, 68 insertions, 0 deletions
diff --git a/fuzzycat/common.py b/fuzzycat/common.py
index 2f3d1fd..3973b1e 100644
--- a/fuzzycat/common.py
+++ b/fuzzycat/common.py
@@ -47,3 +47,4 @@ class Miss(str, Enum):
SUBTITLE = 'miss.subtitle'
TITLE_FILENAME = 'miss.title_filename'
YEAR = 'miss.year'
+ CUSTOM_PREFIX_10_14288 = 'miss.custom_prefix_10_14288'
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py
index 9848735..7543065 100644
--- a/fuzzycat/verify.py
+++ b/fuzzycat/verify.py
@@ -159,6 +159,18 @@ def compare(a, b):
if fragment in a_title_lower:
return (Status.AMBIGUOUS, Miss.BLACKLISTED_FRAGMENT)
+ try:
+ a_doi = glom(a, "ext_ids.doi")
+ b_doi = glom(b, "ext_ids.doi")
+ if a_doi.startswith("10.14288/") and b_doi.startswith("10.14288/") and a_doi != b_doi:
+ # UBC metadata slightly off;
+ # https://fatcat.wiki/release/63g4ukdxajcqhdytqla6du3t3u,
+ # https://fatcat.wiki/release/rz72bzfevzeofdeb342c6z45qu;
+ # https://api.datacite.org/application/vnd.datacite.datacite+json/10.14288/1.0011045
+ return (Status.DIFFERENT, Miss.CUSTOM_PREFIX_10_14288)
+ except PathAccessError:
+ pass
+
if "Zweckverband Volkshochschule " in a_title and a_title != b_title:
return (Status.DIFFERENT, Miss.CUSTOM_VHS)
diff --git a/notes/todo.md b/notes/todo.md
index 08c863c..b3b47ee 100644
--- a/notes/todo.md
+++ b/notes/todo.md
@@ -11,6 +11,18 @@ Examples.
> "reference-entry", "entry" - vs other type, e.g. article
* https://fatcat.wiki/release/63g4ukdxajcqhdytqla6du3t3u https://fatcat.wiki/release/rz72bzfevzeofdeb342c6z45qu
+
+This example comes from datacite, the original md:
+
+* [https://api.datacite.org/dois/10.14288/1.0151581](https://api.datacite.org/dois/10.14288/1.0151581)
+
+Metadata similarly off on:
+
+* [https://commons.datacite.org/doi.org/10.14288/1.0011045?query=%2210.14288%22](https://commons.datacite.org/doi.org/10.14288/1.0011045?query=%2210.14288%22)
+* [https://api.datacite.org/application/vnd.datacite.datacite+json/10.14288/1.0011045](https://api.datacite.org/application/vnd.datacite.datacite+json/10.14288/1.0011045)
+
+Picture categorized as article.
+
* https://fatcat.wiki/release/fwghjz4q7bdulismftuvagmgfu https://fatcat.wiki/release/jwbn7qohu5ggtc5okm4m7s5vja
* https://fatcat.wiki/release/hhyyhosajjflpkufecx26gncwe https://fatcat.wiki/release/yxqwe4ns5vbntjzcse5igkgxk4
* https://fatcat.wiki/release/ij3yuoh6lrh3tkrv5o7gfk6yyi https://fatcat.wiki/release/tur236mqljdfdnlzbbnks2sily
diff --git a/tests/data/release/63g4ukdxajcqhdytqla6du3t3u b/tests/data/release/63g4ukdxajcqhdytqla6du3t3u
new file mode 100644
index 0000000..76a4533
--- /dev/null
+++ b/tests/data/release/63g4ukdxajcqhdytqla6du3t3u
@@ -0,0 +1,21 @@
+{
+ "abstracts": [],
+ "contribs": [],
+ "ext_ids": {
+ "doi": "10.14288/1.0151574"
+ },
+ "extra": {
+ "datacite": {}
+ },
+ "ident": "63g4ukdxajcqhdytqla6du3t3u",
+ "language": "en",
+ "publisher": "The University of British Columbia",
+ "refs": [],
+ "release_stage": "published",
+ "release_type": "article",
+ "release_year": 1975,
+ "revision": "ebf304ea-50f8-4813-89b7-c3c6d82d7379",
+ "state": "active",
+ "title": "Museum of Anthropology construction",
+ "work_id": "2hsro5lmtjd3vaqofqngau2qca"
+}
diff --git a/tests/data/release/rz72bzfevzeofdeb342c6z45qu b/tests/data/release/rz72bzfevzeofdeb342c6z45qu
new file mode 100644
index 0000000..000297b
--- /dev/null
+++ b/tests/data/release/rz72bzfevzeofdeb342c6z45qu
@@ -0,0 +1,21 @@
+{
+ "abstracts": [],
+ "contribs": [],
+ "ext_ids": {
+ "doi": "10.14288/1.0151581"
+ },
+ "extra": {
+ "datacite": {}
+ },
+ "ident": "rz72bzfevzeofdeb342c6z45qu",
+ "language": "en",
+ "publisher": "The University of British Columbia",
+ "refs": [],
+ "release_stage": "published",
+ "release_type": "article",
+ "release_year": 1975,
+ "revision": "4f512bb4-bc11-414e-bd3b-879642093be1",
+ "state": "active",
+ "title": "Museum of Anthropology construction",
+ "work_id": "oi7du7s72bhhve54ot5xnjl2tq"
+}
diff --git a/tests/data/verify.csv b/tests/data/verify.csv
index eb5a473..2f2bf9b 100644
--- a/tests/data/verify.csv
+++ b/tests/data/verify.csv
@@ -101,3 +101,4 @@ hhyknlu55vdezg3xsqxa4as2eu,s7mpon5havhx3eil563hcz6anu,Status.EXACT,OK.TITLE_AUTH
r63fa4mqkfb3leafypdwnnj7jq,usifdrmhdbfhlodgaqgzwzi4da,,
bg4gzikycnfvtkfwl5qnxeywwa,fbdg4rdgw5halgkhr4qcsex25y,Status.EXACT,
omjj75lv4rayvcqmgmicnzf5ye,xxfujnvafrazbjw7kvh7bhmuvy,,
+63g4ukdxajcqhdytqla6du3t3u,rz72bzfevzeofdeb342c6z45qu,Status.DIFFERENT,Miss.CUSTOM_PREFIX_10_14288