From 811a18ef7609d49d97aba3d61d359da979100246 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Fri, 11 Jun 2021 22:26:28 +0200 Subject: datacite: more careful title string access; fixes sentry #88350 Caused by a partial "title entry without title" coming *first* (e.g. just holding, e.g. a language, like: {'lang': 'da'} --- python/fatcat_tools/importers/datacite.py | 2 +- python/tests/files/datacite/datacite_doc_35.json | 65 ++++++++++++++++++++++ .../tests/files/datacite/datacite_result_35.json | 30 ++++++++++ python/tests/import_datacite.py | 2 +- 4 files changed, 97 insertions(+), 2 deletions(-) create mode 100644 python/tests/files/datacite/datacite_doc_35.json create mode 100644 python/tests/files/datacite/datacite_result_35.json diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 221ac8f5..703dbc27 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -1088,7 +1088,7 @@ def parse_datacite_titles(titles): for entry in titles: if not title and ('titleType' not in entry or not entry.get('titleType')): - title = entry.get('title').strip() + title = (entry.get('title') or '').strip() if not subtitle and entry.get('titleType') == 'Subtitle': subtitle = entry.get('title', '').strip() if not original_language_title: diff --git a/python/tests/files/datacite/datacite_doc_35.json b/python/tests/files/datacite/datacite_doc_35.json new file mode 100644 index 00000000..e2b65e13 --- /dev/null +++ b/python/tests/files/datacite/datacite_doc_35.json @@ -0,0 +1,65 @@ +{ + "id": "10.17912/micropub.biology.000143", + "type": "dois", + "attributes": { + "doi": "10.17912/micropub.biology.000143", + "identifiers": null, + "creators": [ + { + "name": "Paul Katz", + "givenName": "", + "familyName": "", + "affiliation": [], + "role": "author" + } + ], + "titles": [ + { + "lang": "da" + }, + { + "title": "Sample" + } + ], + "publisher": "microPublication Biology", + "publicationYear": 2019, + "types": { + "resourceTypeGeneral": "DataPaper" + }, + "relatedIdentifiers": [], + "sizes": [], + "formats": [], + "version": null, + "rightsList": [], + "descriptions": [ + { + "description": 1234567890, + "descriptionType": "Abstract" + } + ], + "geoLocations": [], + "fundingReferences": [], + "url": "https://www.micropublication.org/journals/biology/micropub.biology.000143", + "created": "2019-08-19T14:43:08.000Z", + "registered": "2019-08-19T14:43:09.000Z", + "published": "2019", + "updated": "2019-11-09T12:32:02.000Z", + "contributors": [ + { + "name": "Paul Katz", + "givenName": "", + "familyName": "", + "affiliation": [], + "role": "illustrator" + } + ] + }, + "relationships": { + "client": { + "data": { + "id": "caltech.micropub", + "type": "clients" + } + } + } +} diff --git a/python/tests/files/datacite/datacite_result_35.json b/python/tests/files/datacite/datacite_result_35.json new file mode 100644 index 00000000..85641157 --- /dev/null +++ b/python/tests/files/datacite/datacite_result_35.json @@ -0,0 +1,30 @@ +{ + "abstracts": [ + { + "content": "1234567890", + "mimetype": "text/plain" + } + ], + "contribs": [ + { + "given_name": "", + "index": 0, + "raw_name": "Paul Katz", + "role": "author" + } + ], + "ext_ids": { + "doi": "10.17912/micropub.biology.000143" + }, + "extra": { + "datacite": { + "resourceTypeGeneral": "DataPaper" + }, + "container_name": "microPublication Biology" + }, + "refs": [], + "release_stage": "published", + "release_year": 2019, + "publisher": "microPublication Biology", + "title": "Sample" +} diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py index 6bc0e7b8..8b6797ef 100644 --- a/python/tests/import_datacite.py +++ b/python/tests/import_datacite.py @@ -400,7 +400,7 @@ def test_datacite_conversions(datacite_importer): for now. """ datacite_importer.debug = True - for i in range(35): + for i in range(36): src = "tests/files/datacite/datacite_doc_{0:02d}.json".format(i) dst = "tests/files/datacite/datacite_result_{0:02d}.json".format(i) with open(src, "r") as f: -- cgit v1.2.3