diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-01-08 03:47:10 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-01-08 03:47:10 +0100 |
commit | a7e5460d6355dd0e99b08e480d4e50755fda3b16 (patch) | |
tree | 8653d28948c0c1c374b695ebce533a244bcad950 | |
parent | a23f73e37cd88de5467c47aa5f84b96448c5713d (diff) | |
download | fatcat-a7e5460d6355dd0e99b08e480d4e50755fda3b16.tar.gz fatcat-a7e5460d6355dd0e99b08e480d4e50755fda3b16.zip |
datacite: mark additional files as stub
-rw-r--r-- | python/fatcat_tools/importers/datacite.py | 4 | ||||
-rw-r--r-- | python/tests/files/datacite/datacite_doc_25.json | 47 | ||||
-rw-r--r-- | python/tests/files/datacite/datacite_result_25.json | 25 | ||||
-rw-r--r-- | python/tests/import_datacite.py | 2 |
4 files changed, 77 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 587a65aa..90bc3db7 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -547,6 +547,10 @@ class DataciteImporter(EntityImporter): if publisher == 'Cambridge Crystallographic Data Centre': release_type = 'entry' + # Supplement files, e.g. "Additional file 1: ASE constructs in questionnaire." + if title.lower().startswith('additional file'): + release_type = 'stub' + # Language values are varied ("ger", "es", "English", "ENG", "en-us", # "other", ...). Try to crush it with langcodes: "It may sound to you # like langcodes solves a pretty boring problem. At one level, that's diff --git a/python/tests/files/datacite/datacite_doc_25.json b/python/tests/files/datacite/datacite_doc_25.json new file mode 100644 index 00000000..60cd0ab7 --- /dev/null +++ b/python/tests/files/datacite/datacite_doc_25.json @@ -0,0 +1,47 @@ +{ + "attributes": { + "doi": "10.7916/d86x0cg1", + "creators": [ + { + "name": "Anton Welch", + "affiliation": [ + "Department of pataphysics" + ], + "nameIdentifiers": [] + } + ], + "titles": [ + { + "title": "Additional file 123: ABC" + }, + { + "title": "DEF", + "titleType": "Subtitle" + } + ], + "publicationYear": 2016, + "language": "DE-CH", + "types": { + "ris": "GEN", + "bibtex": "misc", + "citeproc": "article", + "schemaOrg": "CreativeWork" + }, + "dates": [ + { + "date": "2017-08-24", + "dateType": "Created" + }, + { + "date": "2019-08-04", + "dateType": "Updated" + }, + { + "date": "2017", + "dateType": "Issued" + } + ], + "isActive": true, + "state": "findable" + } +} diff --git a/python/tests/files/datacite/datacite_result_25.json b/python/tests/files/datacite/datacite_result_25.json new file mode 100644 index 00000000..8a370bbb --- /dev/null +++ b/python/tests/files/datacite/datacite_result_25.json @@ -0,0 +1,25 @@ +{ + "extra": { + "datacite": {}, + "month": 8 + }, + "title": "Additional file 123: ABC", + "subtitle": "DEF", + "release_type": "stub", + "release_stage": "published", + "release_date": "2017-08-24", + "release_year": 2017, + "ext_ids": { + "doi": "10.7916/d86x0cg1" + }, + "contribs": [ + { + "index": 0, + "raw_name": "Anton Welch", + "role": "author", + "raw_affiliation": "Department of pataphysics" + } + ], + "refs": [], + "abstracts": [] +} diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py index 9ee479e8..7293ecac 100644 --- a/python/tests/import_datacite.py +++ b/python/tests/import_datacite.py @@ -287,7 +287,7 @@ def test_datacite_conversions(datacite_importer): for now. """ datacite_importer.debug = True - for i in range(25): + for i in range(26): src = 'tests/files/datacite/datacite_doc_{0:02d}.json'.format(i) dst = 'tests/files/datacite/datacite_result_{0:02d}.json'.format(i) print('testing mapping from {} => {}'.format(src, dst)) |