From da1daa8568be65c39265c1819d28e8796785e600 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Thu, 9 Jan 2020 16:16:20 +0100 Subject: datacite: ignore known unknown values in resourceType* --- python/fatcat_tools/importers/datacite.py | 4 +- python/tests/files/datacite/datacite_doc_29.json | 61 ++++++++++++++++++++++ .../tests/files/datacite/datacite_result_29.json | 33 ++++++++++++ python/tests/import_datacite.py | 2 +- 4 files changed, 97 insertions(+), 3 deletions(-) create mode 100644 python/tests/files/datacite/datacite_doc_29.json create mode 100644 python/tests/files/datacite/datacite_result_29.json (limited to 'python') diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 4128b3ca..12e0beba 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -550,9 +550,9 @@ class DataciteImporter(EntityImporter): resource_type = types.get('resourceType', '') or '' resource_type_general = types.get('resourceTypeGeneral', '') or '' - if resource_type: + if resource_type and resource_type.lower() not in UNKNOWN_MARKERS_LOWER: extra_datacite['resourceType'] = resource_type - if resource_type_general: + if resource_type_general and resource_type_general.lower() not in UNKNOWN_MARKERS_LOWER: extra_datacite['resourceTypeGeneral'] = resource_type_general # Include certain relations from relatedIdentifiers. Keeping the diff --git a/python/tests/files/datacite/datacite_doc_29.json b/python/tests/files/datacite/datacite_doc_29.json new file mode 100644 index 00000000..6646325f --- /dev/null +++ b/python/tests/files/datacite/datacite_doc_29.json @@ -0,0 +1,61 @@ +{ + "attributes": { + "contributors": [ + { + "affiliation": [], + "contributorType": "Editor", + "familyName": "Wemmer", + "givenName": "David", + "name": "Wemmer, David", + "nameType": "Personal" + } + ], + "creators": [ + { + "affiliation": [ + "Department of pataphysics" + ], + "name": "Anton Welch", + "nameIdentifiers": [] + } + ], + "dates": [ + { + "date": "2017-08-24", + "dateType": "Created" + }, + { + "date": "2019-08-04", + "dateType": "Updated" + }, + { + "date": "2017", + "dateType": "Issued" + } + ], + "descriptions": [ + {"description": ["Hello", "World"], "descriptionType": "Abstract"} + ], + "doi": "10.7916/d86x0cg1", + "isActive": true, + "language": "DE-CH", + "publicationYear": 2016, + "state": "findable", + "titles": [ + { + "title": "Additional file 123: ABC" + }, + { + "title": "DEF", + "titleType": "Subtitle" + } + ], + "types": { + "bibtex": "misc", + "citeproc": "article", + "ris": "GEN", + "schemaOrg": "CreativeWork", + "resourceType": "(:unav)" + } + } +} diff --git a/python/tests/files/datacite/datacite_result_29.json b/python/tests/files/datacite/datacite_result_29.json new file mode 100644 index 00000000..84bed9c8 --- /dev/null +++ b/python/tests/files/datacite/datacite_result_29.json @@ -0,0 +1,33 @@ +{ + "abstracts": [{"content": "Hello\nWorld", "lang": "en", "mimetype": "text/plain"}], + "contribs": [ + { + "index": 0, + "raw_affiliation": "Department of pataphysics", + "raw_name": "Anton Welch", + "role": "author" + }, + { + "extra": { + "type": "Editor" + }, + "given_name": "David", + "raw_name": "David Wemmer", + "surname": "Wemmer" + } + ], + "ext_ids": { + "doi": "10.7916/d86x0cg1" + }, + "extra": { + "datacite": {}, + "release_month": 8 + }, + "refs": [], + "release_date": "2017-08-24", + "release_stage": "published", + "release_type": "stub", + "release_year": 2017, + "subtitle": "DEF", + "title": "Additional file 123: ABC" +} diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py index a7d514ea..d0d94b10 100644 --- a/python/tests/import_datacite.py +++ b/python/tests/import_datacite.py @@ -287,7 +287,7 @@ def test_datacite_conversions(datacite_importer): for now. """ datacite_importer.debug = True - for i in range(29): + for i in range(30): src = 'tests/files/datacite/datacite_doc_{0:02d}.json'.format(i) dst = 'tests/files/datacite/datacite_result_{0:02d}.json'.format(i) print('testing mapping from {} => {}'.format(src, dst)) -- cgit v1.2.3