summaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-06-11 22:26:28 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-06-11 22:30:12 +0200
commit811a18ef7609d49d97aba3d61d359da979100246 (patch)
treea72f3fd2065b423b0b356f68b0c14036a939441e /python
parentb67cac61d815634969e91999d4bc2954b99bf2b2 (diff)
downloadfatcat-811a18ef7609d49d97aba3d61d359da979100246.tar.gz
fatcat-811a18ef7609d49d97aba3d61d359da979100246.zip
datacite: more careful title string access; fixes sentry #88350
Caused by a partial "title entry without title" coming *first* (e.g. just holding, e.g. a language, like: {'lang': 'da'}
Diffstat (limited to 'python')
-rw-r--r--python/fatcat_tools/importers/datacite.py2
-rw-r--r--python/tests/files/datacite/datacite_doc_35.json65
-rw-r--r--python/tests/files/datacite/datacite_result_35.json30
-rw-r--r--python/tests/import_datacite.py2
4 files changed, 97 insertions, 2 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 221ac8f5..703dbc27 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -1088,7 +1088,7 @@ def parse_datacite_titles(titles):
for entry in titles:
if not title and ('titleType' not in entry
or not entry.get('titleType')):
- title = entry.get('title').strip()
+ title = (entry.get('title') or '').strip()
if not subtitle and entry.get('titleType') == 'Subtitle':
subtitle = entry.get('title', '').strip()
if not original_language_title:
diff --git a/python/tests/files/datacite/datacite_doc_35.json b/python/tests/files/datacite/datacite_doc_35.json
new file mode 100644
index 00000000..e2b65e13
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_35.json
@@ -0,0 +1,65 @@
+{
+ "id": "10.17912/micropub.biology.000143",
+ "type": "dois",
+ "attributes": {
+ "doi": "10.17912/micropub.biology.000143",
+ "identifiers": null,
+ "creators": [
+ {
+ "name": "Paul Katz",
+ "givenName": "",
+ "familyName": "",
+ "affiliation": [],
+ "role": "author"
+ }
+ ],
+ "titles": [
+ {
+ "lang": "da"
+ },
+ {
+ "title": "Sample"
+ }
+ ],
+ "publisher": "microPublication Biology",
+ "publicationYear": 2019,
+ "types": {
+ "resourceTypeGeneral": "DataPaper"
+ },
+ "relatedIdentifiers": [],
+ "sizes": [],
+ "formats": [],
+ "version": null,
+ "rightsList": [],
+ "descriptions": [
+ {
+ "description": 1234567890,
+ "descriptionType": "Abstract"
+ }
+ ],
+ "geoLocations": [],
+ "fundingReferences": [],
+ "url": "https://www.micropublication.org/journals/biology/micropub.biology.000143",
+ "created": "2019-08-19T14:43:08.000Z",
+ "registered": "2019-08-19T14:43:09.000Z",
+ "published": "2019",
+ "updated": "2019-11-09T12:32:02.000Z",
+ "contributors": [
+ {
+ "name": "Paul Katz",
+ "givenName": "",
+ "familyName": "",
+ "affiliation": [],
+ "role": "illustrator"
+ }
+ ]
+ },
+ "relationships": {
+ "client": {
+ "data": {
+ "id": "caltech.micropub",
+ "type": "clients"
+ }
+ }
+ }
+}
diff --git a/python/tests/files/datacite/datacite_result_35.json b/python/tests/files/datacite/datacite_result_35.json
new file mode 100644
index 00000000..85641157
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_35.json
@@ -0,0 +1,30 @@
+{
+ "abstracts": [
+ {
+ "content": "1234567890",
+ "mimetype": "text/plain"
+ }
+ ],
+ "contribs": [
+ {
+ "given_name": "",
+ "index": 0,
+ "raw_name": "Paul Katz",
+ "role": "author"
+ }
+ ],
+ "ext_ids": {
+ "doi": "10.17912/micropub.biology.000143"
+ },
+ "extra": {
+ "datacite": {
+ "resourceTypeGeneral": "DataPaper"
+ },
+ "container_name": "microPublication Biology"
+ },
+ "refs": [],
+ "release_stage": "published",
+ "release_year": 2019,
+ "publisher": "microPublication Biology",
+ "title": "Sample"
+}
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index 6bc0e7b8..8b6797ef 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -400,7 +400,7 @@ def test_datacite_conversions(datacite_importer):
for now.
"""
datacite_importer.debug = True
- for i in range(35):
+ for i in range(36):
src = "tests/files/datacite/datacite_doc_{0:02d}.json".format(i)
dst = "tests/files/datacite/datacite_result_{0:02d}.json".format(i)
with open(src, "r") as f: