diff options
-rw-r--r-- | fuzzycat/common.py | 1 | ||||
-rw-r--r-- | fuzzycat/verify.py | 12 | ||||
-rw-r--r-- | notes/todo.md | 12 | ||||
-rw-r--r-- | tests/data/release/63g4ukdxajcqhdytqla6du3t3u | 21 | ||||
-rw-r--r-- | tests/data/release/rz72bzfevzeofdeb342c6z45qu | 21 | ||||
-rw-r--r-- | tests/data/verify.csv | 1 |
6 files changed, 68 insertions, 0 deletions
diff --git a/fuzzycat/common.py b/fuzzycat/common.py index 2f3d1fd..3973b1e 100644 --- a/fuzzycat/common.py +++ b/fuzzycat/common.py @@ -47,3 +47,4 @@ class Miss(str, Enum): SUBTITLE = 'miss.subtitle' TITLE_FILENAME = 'miss.title_filename' YEAR = 'miss.year' + CUSTOM_PREFIX_10_14288 = 'miss.custom_prefix_10_14288' diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py index 9848735..7543065 100644 --- a/fuzzycat/verify.py +++ b/fuzzycat/verify.py @@ -159,6 +159,18 @@ def compare(a, b): if fragment in a_title_lower: return (Status.AMBIGUOUS, Miss.BLACKLISTED_FRAGMENT) + try: + a_doi = glom(a, "ext_ids.doi") + b_doi = glom(b, "ext_ids.doi") + if a_doi.startswith("10.14288/") and b_doi.startswith("10.14288/") and a_doi != b_doi: + # UBC metadata slightly off; + # https://fatcat.wiki/release/63g4ukdxajcqhdytqla6du3t3u, + # https://fatcat.wiki/release/rz72bzfevzeofdeb342c6z45qu; + # https://api.datacite.org/application/vnd.datacite.datacite+json/10.14288/1.0011045 + return (Status.DIFFERENT, Miss.CUSTOM_PREFIX_10_14288) + except PathAccessError: + pass + if "Zweckverband Volkshochschule " in a_title and a_title != b_title: return (Status.DIFFERENT, Miss.CUSTOM_VHS) diff --git a/notes/todo.md b/notes/todo.md index 08c863c..b3b47ee 100644 --- a/notes/todo.md +++ b/notes/todo.md @@ -11,6 +11,18 @@ Examples. > "reference-entry", "entry" - vs other type, e.g. article * https://fatcat.wiki/release/63g4ukdxajcqhdytqla6du3t3u https://fatcat.wiki/release/rz72bzfevzeofdeb342c6z45qu + +This example comes from datacite, the original md: + +* [https://api.datacite.org/dois/10.14288/1.0151581](https://api.datacite.org/dois/10.14288/1.0151581) + +Metadata similarly off on: + +* [https://commons.datacite.org/doi.org/10.14288/1.0011045?query=%2210.14288%22](https://commons.datacite.org/doi.org/10.14288/1.0011045?query=%2210.14288%22) +* [https://api.datacite.org/application/vnd.datacite.datacite+json/10.14288/1.0011045](https://api.datacite.org/application/vnd.datacite.datacite+json/10.14288/1.0011045) + +Picture categorized as article. + * https://fatcat.wiki/release/fwghjz4q7bdulismftuvagmgfu https://fatcat.wiki/release/jwbn7qohu5ggtc5okm4m7s5vja * https://fatcat.wiki/release/hhyyhosajjflpkufecx26gncwe https://fatcat.wiki/release/yxqwe4ns5vbntjzcse5igkgxk4 * https://fatcat.wiki/release/ij3yuoh6lrh3tkrv5o7gfk6yyi https://fatcat.wiki/release/tur236mqljdfdnlzbbnks2sily diff --git a/tests/data/release/63g4ukdxajcqhdytqla6du3t3u b/tests/data/release/63g4ukdxajcqhdytqla6du3t3u new file mode 100644 index 0000000..76a4533 --- /dev/null +++ b/tests/data/release/63g4ukdxajcqhdytqla6du3t3u @@ -0,0 +1,21 @@ +{ + "abstracts": [], + "contribs": [], + "ext_ids": { + "doi": "10.14288/1.0151574" + }, + "extra": { + "datacite": {} + }, + "ident": "63g4ukdxajcqhdytqla6du3t3u", + "language": "en", + "publisher": "The University of British Columbia", + "refs": [], + "release_stage": "published", + "release_type": "article", + "release_year": 1975, + "revision": "ebf304ea-50f8-4813-89b7-c3c6d82d7379", + "state": "active", + "title": "Museum of Anthropology construction", + "work_id": "2hsro5lmtjd3vaqofqngau2qca" +} diff --git a/tests/data/release/rz72bzfevzeofdeb342c6z45qu b/tests/data/release/rz72bzfevzeofdeb342c6z45qu new file mode 100644 index 0000000..000297b --- /dev/null +++ b/tests/data/release/rz72bzfevzeofdeb342c6z45qu @@ -0,0 +1,21 @@ +{ + "abstracts": [], + "contribs": [], + "ext_ids": { + "doi": "10.14288/1.0151581" + }, + "extra": { + "datacite": {} + }, + "ident": "rz72bzfevzeofdeb342c6z45qu", + "language": "en", + "publisher": "The University of British Columbia", + "refs": [], + "release_stage": "published", + "release_type": "article", + "release_year": 1975, + "revision": "4f512bb4-bc11-414e-bd3b-879642093be1", + "state": "active", + "title": "Museum of Anthropology construction", + "work_id": "oi7du7s72bhhve54ot5xnjl2tq" +} diff --git a/tests/data/verify.csv b/tests/data/verify.csv index eb5a473..2f2bf9b 100644 --- a/tests/data/verify.csv +++ b/tests/data/verify.csv @@ -101,3 +101,4 @@ hhyknlu55vdezg3xsqxa4as2eu,s7mpon5havhx3eil563hcz6anu,Status.EXACT,OK.TITLE_AUTH r63fa4mqkfb3leafypdwnnj7jq,usifdrmhdbfhlodgaqgzwzi4da,, bg4gzikycnfvtkfwl5qnxeywwa,fbdg4rdgw5halgkhr4qcsex25y,Status.EXACT, omjj75lv4rayvcqmgmicnzf5ye,xxfujnvafrazbjw7kvh7bhmuvy,, +63g4ukdxajcqhdytqla6du3t3u,rz72bzfevzeofdeb342c6z45qu,Status.DIFFERENT,Miss.CUSTOM_PREFIX_10_14288 |