From a7e5460d6355dd0e99b08e480d4e50755fda3b16 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Wed, 8 Jan 2020 03:47:10 +0100 Subject: datacite: mark additional files as stub --- python/fatcat_tools/importers/datacite.py | 4 ++ python/tests/files/datacite/datacite_doc_25.json | 47 ++++++++++++++++++++++ .../tests/files/datacite/datacite_result_25.json | 25 ++++++++++++ python/tests/import_datacite.py | 2 +- 4 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 python/tests/files/datacite/datacite_doc_25.json create mode 100644 python/tests/files/datacite/datacite_result_25.json diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 587a65aa..90bc3db7 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -547,6 +547,10 @@ class DataciteImporter(EntityImporter): if publisher == 'Cambridge Crystallographic Data Centre': release_type = 'entry' + # Supplement files, e.g. "Additional file 1: ASE constructs in questionnaire." + if title.lower().startswith('additional file'): + release_type = 'stub' + # Language values are varied ("ger", "es", "English", "ENG", "en-us", # "other", ...). Try to crush it with langcodes: "It may sound to you # like langcodes solves a pretty boring problem. At one level, that's diff --git a/python/tests/files/datacite/datacite_doc_25.json b/python/tests/files/datacite/datacite_doc_25.json new file mode 100644 index 00000000..60cd0ab7 --- /dev/null +++ b/python/tests/files/datacite/datacite_doc_25.json @@ -0,0 +1,47 @@ +{ + "attributes": { + "doi": "10.7916/d86x0cg1", + "creators": [ + { + "name": "Anton Welch", + "affiliation": [ + "Department of pataphysics" + ], + "nameIdentifiers": [] + } + ], + "titles": [ + { + "title": "Additional file 123: ABC" + }, + { + "title": "DEF", + "titleType": "Subtitle" + } + ], + "publicationYear": 2016, + "language": "DE-CH", + "types": { + "ris": "GEN", + "bibtex": "misc", + "citeproc": "article", + "schemaOrg": "CreativeWork" + }, + "dates": [ + { + "date": "2017-08-24", + "dateType": "Created" + }, + { + "date": "2019-08-04", + "dateType": "Updated" + }, + { + "date": "2017", + "dateType": "Issued" + } + ], + "isActive": true, + "state": "findable" + } +} diff --git a/python/tests/files/datacite/datacite_result_25.json b/python/tests/files/datacite/datacite_result_25.json new file mode 100644 index 00000000..8a370bbb --- /dev/null +++ b/python/tests/files/datacite/datacite_result_25.json @@ -0,0 +1,25 @@ +{ + "extra": { + "datacite": {}, + "month": 8 + }, + "title": "Additional file 123: ABC", + "subtitle": "DEF", + "release_type": "stub", + "release_stage": "published", + "release_date": "2017-08-24", + "release_year": 2017, + "ext_ids": { + "doi": "10.7916/d86x0cg1" + }, + "contribs": [ + { + "index": 0, + "raw_name": "Anton Welch", + "role": "author", + "raw_affiliation": "Department of pataphysics" + } + ], + "refs": [], + "abstracts": [] +} diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py index 9ee479e8..7293ecac 100644 --- a/python/tests/import_datacite.py +++ b/python/tests/import_datacite.py @@ -287,7 +287,7 @@ def test_datacite_conversions(datacite_importer): for now. """ datacite_importer.debug = True - for i in range(25): + for i in range(26): src = 'tests/files/datacite/datacite_doc_{0:02d}.json'.format(i) dst = 'tests/files/datacite/datacite_result_{0:02d}.json'.format(i) print('testing mapping from {} => {}'.format(src, dst)) -- cgit v1.2.3