From 7c6febf20c84dd4f5778e1fb02369456f7dad344 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Mon, 20 Apr 2020 20:52:10 +0200 Subject: datacite: fix a raw name constraint violation It was possible that contribs got added which had no raw name. One example would be a name consisting of whitespace only. This fix adds a final check for this case. --- python/fatcat_tools/importers/datacite.py | 8 ++++ python/tests/files/datacite/datacite_doc_31.json | 53 ++++++++++++++++++++++ .../tests/files/datacite/datacite_result_31.json | 24 ++++++++++ python/tests/import_datacite.py | 2 +- 4 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 python/tests/files/datacite/datacite_doc_31.json create mode 100644 python/tests/files/datacite/datacite_result_31.json diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 81f00876..244984f5 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -758,6 +758,14 @@ class DataciteImporter(EntityImporter): given_name = clean(given_name) if surname: surname = clean(surname) + + # Perform a final assertion that name does not reduce to zero + # (e.g. whitespace only name). + if name: + name = name.strip() + if not name: + continue + if raw_affiliation == '': continue diff --git a/python/tests/files/datacite/datacite_doc_31.json b/python/tests/files/datacite/datacite_doc_31.json new file mode 100644 index 00000000..83af3e4d --- /dev/null +++ b/python/tests/files/datacite/datacite_doc_31.json @@ -0,0 +1,53 @@ +{ + "id": "10.17912/micropub.biology.000143", + "type": "dois", + "attributes": { + "doi": "10.17912/micropub.biology.000143", + "identifiers": null, + "creators": [ + { + "raw_name": " ", + "givenName": "", + "familyName": "", + "affiliation": [], + "role": "author" + } + ], + "titles": [ + { + "title": "Sample" + } + ], + "publisher": "microPublication Biology", + "publicationYear": 2019, + "types": { + "resourceTypeGeneral": "DataPaper" + }, + "relatedIdentifiers": [], + "sizes": [], + "formats": [], + "version": null, + "rightsList": [], + "descriptions": [ + { + "description": "Biological liquid-liquid phase separation", + "descriptionType": "Abstract" + } + ], + "geoLocations": [], + "fundingReferences": [], + "url": "https://www.micropublication.org/journals/biology/micropub.biology.000143", + "created": "2019-08-19T14:43:08.000Z", + "registered": "2019-08-19T14:43:09.000Z", + "published": "2019", + "updated": "2019-11-09T12:32:02.000Z" + }, + "relationships": { + "client": { + "data": { + "id": "caltech.micropub", + "type": "clients" + } + } + } +} diff --git a/python/tests/files/datacite/datacite_result_31.json b/python/tests/files/datacite/datacite_result_31.json new file mode 100644 index 00000000..193104b0 --- /dev/null +++ b/python/tests/files/datacite/datacite_result_31.json @@ -0,0 +1,24 @@ +{ + "abstracts": [ + { + "content": "Biological liquid-liquid phase separation", + "lang": "fr", + "mimetype": "text/plain" + } + ], + "contribs": [], + "ext_ids": { + "doi": "10.17912/micropub.biology.000143" + }, + "extra": { + "datacite": { + "resourceTypeGeneral": "DataPaper" + }, + "container_name": "microPublication Biology" + }, + "refs": [], + "release_stage": "published", + "release_year": 2019, + "publisher": "microPublication Biology", + "title": "Sample" +} diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py index 15650375..7fdd8230 100644 --- a/python/tests/import_datacite.py +++ b/python/tests/import_datacite.py @@ -287,7 +287,7 @@ def test_datacite_conversions(datacite_importer): for now. """ datacite_importer.debug = True - for i in range(31): + for i in range(32): src = 'tests/files/datacite/datacite_doc_{0:02d}.json'.format(i) dst = 'tests/files/datacite/datacite_result_{0:02d}.json'.format(i) with open(src, 'r') as f: -- cgit v1.2.3