aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-01-08 03:47:10 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-01-08 03:47:10 +0100
commita7e5460d6355dd0e99b08e480d4e50755fda3b16 (patch)
tree8653d28948c0c1c374b695ebce533a244bcad950
parenta23f73e37cd88de5467c47aa5f84b96448c5713d (diff)
downloadfatcat-a7e5460d6355dd0e99b08e480d4e50755fda3b16.tar.gz
fatcat-a7e5460d6355dd0e99b08e480d4e50755fda3b16.zip
datacite: mark additional files as stub
-rw-r--r--python/fatcat_tools/importers/datacite.py4
-rw-r--r--python/tests/files/datacite/datacite_doc_25.json47
-rw-r--r--python/tests/files/datacite/datacite_result_25.json25
-rw-r--r--python/tests/import_datacite.py2
4 files changed, 77 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 587a65aa..90bc3db7 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -547,6 +547,10 @@ class DataciteImporter(EntityImporter):
if publisher == 'Cambridge Crystallographic Data Centre':
release_type = 'entry'
+ # Supplement files, e.g. "Additional file 1: ASE constructs in questionnaire."
+ if title.lower().startswith('additional file'):
+ release_type = 'stub'
+
# Language values are varied ("ger", "es", "English", "ENG", "en-us",
# "other", ...). Try to crush it with langcodes: "It may sound to you
# like langcodes solves a pretty boring problem. At one level, that's
diff --git a/python/tests/files/datacite/datacite_doc_25.json b/python/tests/files/datacite/datacite_doc_25.json
new file mode 100644
index 00000000..60cd0ab7
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_25.json
@@ -0,0 +1,47 @@
+{
+ "attributes": {
+ "doi": "10.7916/d86x0cg1",
+ "creators": [
+ {
+ "name": "Anton Welch",
+ "affiliation": [
+ "Department of pataphysics"
+ ],
+ "nameIdentifiers": []
+ }
+ ],
+ "titles": [
+ {
+ "title": "Additional file 123: ABC"
+ },
+ {
+ "title": "DEF",
+ "titleType": "Subtitle"
+ }
+ ],
+ "publicationYear": 2016,
+ "language": "DE-CH",
+ "types": {
+ "ris": "GEN",
+ "bibtex": "misc",
+ "citeproc": "article",
+ "schemaOrg": "CreativeWork"
+ },
+ "dates": [
+ {
+ "date": "2017-08-24",
+ "dateType": "Created"
+ },
+ {
+ "date": "2019-08-04",
+ "dateType": "Updated"
+ },
+ {
+ "date": "2017",
+ "dateType": "Issued"
+ }
+ ],
+ "isActive": true,
+ "state": "findable"
+ }
+}
diff --git a/python/tests/files/datacite/datacite_result_25.json b/python/tests/files/datacite/datacite_result_25.json
new file mode 100644
index 00000000..8a370bbb
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_25.json
@@ -0,0 +1,25 @@
+{
+ "extra": {
+ "datacite": {},
+ "month": 8
+ },
+ "title": "Additional file 123: ABC",
+ "subtitle": "DEF",
+ "release_type": "stub",
+ "release_stage": "published",
+ "release_date": "2017-08-24",
+ "release_year": 2017,
+ "ext_ids": {
+ "doi": "10.7916/d86x0cg1"
+ },
+ "contribs": [
+ {
+ "index": 0,
+ "raw_name": "Anton Welch",
+ "role": "author",
+ "raw_affiliation": "Department of pataphysics"
+ }
+ ],
+ "refs": [],
+ "abstracts": []
+}
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index 9ee479e8..7293ecac 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -287,7 +287,7 @@ def test_datacite_conversions(datacite_importer):
for now.
"""
datacite_importer.debug = True
- for i in range(25):
+ for i in range(26):
src = 'tests/files/datacite/datacite_doc_{0:02d}.json'.format(i)
dst = 'tests/files/datacite/datacite_result_{0:02d}.json'.format(i)
print('testing mapping from {} => {}'.format(src, dst))