summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-01-09 16:16:20 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-01-09 16:16:20 +0100
commitda1daa8568be65c39265c1819d28e8796785e600 (patch)
tree15d223dd2d092498a5578a31991ef2332207f41c
parent6a7591103c7b7d985ad22199138af9378de697f4 (diff)
downloadfatcat-da1daa8568be65c39265c1819d28e8796785e600.tar.gz
fatcat-da1daa8568be65c39265c1819d28e8796785e600.zip
datacite: ignore known unknown values in resourceType*
-rw-r--r--python/fatcat_tools/importers/datacite.py4
-rw-r--r--python/tests/files/datacite/datacite_doc_29.json61
-rw-r--r--python/tests/files/datacite/datacite_result_29.json33
-rw-r--r--python/tests/import_datacite.py2
4 files changed, 97 insertions, 3 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 4128b3ca..12e0beba 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -550,9 +550,9 @@ class DataciteImporter(EntityImporter):
resource_type = types.get('resourceType', '') or ''
resource_type_general = types.get('resourceTypeGeneral', '') or ''
- if resource_type:
+ if resource_type and resource_type.lower() not in UNKNOWN_MARKERS_LOWER:
extra_datacite['resourceType'] = resource_type
- if resource_type_general:
+ if resource_type_general and resource_type_general.lower() not in UNKNOWN_MARKERS_LOWER:
extra_datacite['resourceTypeGeneral'] = resource_type_general
# Include certain relations from relatedIdentifiers. Keeping the
diff --git a/python/tests/files/datacite/datacite_doc_29.json b/python/tests/files/datacite/datacite_doc_29.json
new file mode 100644
index 00000000..6646325f
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_29.json
@@ -0,0 +1,61 @@
+{
+ "attributes": {
+ "contributors": [
+ {
+ "affiliation": [],
+ "contributorType": "Editor",
+ "familyName": "Wemmer",
+ "givenName": "David",
+ "name": "Wemmer, David",
+ "nameType": "Personal"
+ }
+ ],
+ "creators": [
+ {
+ "affiliation": [
+ "Department of pataphysics"
+ ],
+ "name": "Anton Welch",
+ "nameIdentifiers": []
+ }
+ ],
+ "dates": [
+ {
+ "date": "2017-08-24",
+ "dateType": "Created"
+ },
+ {
+ "date": "2019-08-04",
+ "dateType": "Updated"
+ },
+ {
+ "date": "2017",
+ "dateType": "Issued"
+ }
+ ],
+ "descriptions": [
+ {"description": ["Hello", "World"], "descriptionType": "Abstract"}
+ ],
+ "doi": "10.7916/d86x0cg1",
+ "isActive": true,
+ "language": "DE-CH",
+ "publicationYear": 2016,
+ "state": "findable",
+ "titles": [
+ {
+ "title": "Additional file 123: ABC"
+ },
+ {
+ "title": "DEF",
+ "titleType": "Subtitle"
+ }
+ ],
+ "types": {
+ "bibtex": "misc",
+ "citeproc": "article",
+ "ris": "GEN",
+ "schemaOrg": "CreativeWork",
+ "resourceType": "(:unav)"
+ }
+ }
+}
diff --git a/python/tests/files/datacite/datacite_result_29.json b/python/tests/files/datacite/datacite_result_29.json
new file mode 100644
index 00000000..84bed9c8
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_29.json
@@ -0,0 +1,33 @@
+{
+ "abstracts": [{"content": "Hello\nWorld", "lang": "en", "mimetype": "text/plain"}],
+ "contribs": [
+ {
+ "index": 0,
+ "raw_affiliation": "Department of pataphysics",
+ "raw_name": "Anton Welch",
+ "role": "author"
+ },
+ {
+ "extra": {
+ "type": "Editor"
+ },
+ "given_name": "David",
+ "raw_name": "David Wemmer",
+ "surname": "Wemmer"
+ }
+ ],
+ "ext_ids": {
+ "doi": "10.7916/d86x0cg1"
+ },
+ "extra": {
+ "datacite": {},
+ "release_month": 8
+ },
+ "refs": [],
+ "release_date": "2017-08-24",
+ "release_stage": "published",
+ "release_type": "stub",
+ "release_year": 2017,
+ "subtitle": "DEF",
+ "title": "Additional file 123: ABC"
+}
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index a7d514ea..d0d94b10 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -287,7 +287,7 @@ def test_datacite_conversions(datacite_importer):
for now.
"""
datacite_importer.debug = True
- for i in range(29):
+ for i in range(30):
src = 'tests/files/datacite/datacite_doc_{0:02d}.json'.format(i)
dst = 'tests/files/datacite/datacite_result_{0:02d}.json'.format(i)
print('testing mapping from {} => {}'.format(src, dst))