From 4a82a0763bf927248f22e47ab5187af4beff83ee Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Mon, 9 Dec 2019 01:03:43 +0100
Subject: datacite: importer skeleton

* contributors, title, date, publisher, container, license

Field and value analysis via https://github.com/miku/indigo.
---
 python/tests/import_datacite.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 python/tests/import_datacite.py

(limited to 'python/tests')

diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
new file mode 100644
index 00000000..0bbaba2e
--- /dev/null
+++ b/python/tests/import_datacite.py
@@ -0,0 +1,25 @@
+"""
+Test datacite importer.
+
+Datacite is a aggregator, hence inputs are quite varied.
+
+Here is small sample of ID types taken from a sample:
+
+    497344 "DOI"
+     65013 "URL"
+     22210 "CCDC"
+     17853 "GBIF"
+     17635 "Other"
+     11474 "uri"
+      9170 "Publisher ID"
+      7775 "URN"
+      6196 "DUCHAS"
+      5624 "Handle"
+      5056 "publisherId"
+
+A nice tool, not yet existing tool (maybe named indigo) would do the following:
+
+    $ shuf -n 100000 datacite.ndjson | indigo -t md > data.md
+
+TODO(martin): Write tests.
+"""
-- 
cgit v1.2.3


From 403b1a2d4591d878145a021a7c1e15e2d60c47d8 Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Wed, 18 Dec 2019 20:21:49 +0100
Subject: improve datacite field mapping and import

Current version succeeded to import a random sample of 100000 records
(0.5%) from datacite.

The --debug (write JSON to stdout) and --insert-log-file (log batch
before committing to db) flags are temporary added to help debugging.

Add few unit tests.

Some edge cases:

a) Existing keys without value requires a slightly awkward:

```
titles = attributes.get('titles', []) or []
```

b) There can be 0, 1, or more (first one wins) titles.

c) Date handling is probably not ideal. Datacite has a potentiall fine
grained list of dates.

The test case (tests/files/datacite_sample.jsonl) refers to
https://ssl.fao.org/glis/doi/10.18730/8DYM9, which has date (main
descriptor) 1986. The datacite record contains: 2017 (publicationYear,
probably the year of record creation with reference system), 1978-06-03
(collected, e.g. experimental sample), 1986 ("Accepted"). The online
version of the resource knows even one more date (2019-06-05 10:14:43 by
WIEWS update).
---
 python/fatcat_import.py                         |  15 +-
 python/fatcat_tools/importers/datacite.py       | 180 ++++++++++++++++++------
 python/tests/files/datacite_1k_records.jsonl.gz | Bin 0 -> 684605 bytes
 python/tests/files/datacite_sample.jsonl        |   1 +
 python/tests/import_datacite.py                 | 108 +++++++++++---
 5 files changed, 245 insertions(+), 59 deletions(-)
 create mode 100644 python/tests/files/datacite_1k_records.jsonl.gz
 create mode 100644 python/tests/files/datacite_sample.jsonl

(limited to 'python/tests')

diff --git a/python/fatcat_import.py b/python/fatcat_import.py
index d7651792..90bb01a1 100755
--- a/python/fatcat_import.py
+++ b/python/fatcat_import.py
@@ -170,7 +170,10 @@ def run_datacite(args):
     dci = DataciteImporter(args.api,
         args.issn_map_file,
         edit_batch_size=args.batch_size,
-        bezerk_mode=args.bezerk_mode)
+        bezerk_mode=args.bezerk_mode,
+        debug=args.debug,
+        lang_detect=args.lang_detect,
+        insert_log_file=args.insert_log_file)
     if args.kafka_mode:
         KafkaJsonPusher(fci, args.kafka_hosts, args.kafka_env, "api-datacite",
             "fatcat-import", consume_batch_size=args.batch_size).run()
@@ -464,6 +467,16 @@ def main():
     sub_datacite.add_argument('--bezerk-mode',
         action='store_true',
         help="don't lookup existing DOIs, just insert (clobbers; only for fast bootstrap)")
+    sub_datacite.add_argument('--debug',
+        action='store_true',
+        help="write converted JSON to stdout")
+    sub_datacite.add_argument('--lang-detect',
+        action='store_true',
+        help="try to detect language (slow)")
+    sub_datacite.add_argument('--insert-log-file',
+        default='',
+        type=str,
+        help="write inserted documents into file (for debugging)")
     sub_datacite.set_defaults(
         func=run_datacite,
         auth_var="FATCAT_API_AUTH_TOKEN",
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 4e117dde..9774e334 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -6,13 +6,14 @@ Example doc at: https://gist.github.com/miku/5610a2d64e3fee82d16f5d3f3a295fc8
 
 from .common import EntityImporter
 import dateparser
-import langcodes
 import datetime
-import langdetect
 import fatcat_openapi_client
+import hashlib
 import json
+import langcodes
+import langdetect
+import sqlite3
 import sys
-import hashlib
 
 # https://guide.fatcat.wiki/entity_container.html#container_type-vocabulary
 CONTAINER_TYPE_MAP = {
@@ -147,10 +148,11 @@ LICENSE_SLUG_MAP = {
 
 class DataciteImporter(EntityImporter):
     """
-    Importer for datacite records. TODO(martin): Do we need issn_map_file?
+    Importer for datacite records.
     """
 
-    def __init__(self, api, issn_map_file, **kwargs):
+    def __init__(self, api, issn_map_file, debug=False, lang_detect=False,
+                 insert_log_file=None, **kwargs):
 
         eg_desc = kwargs.get('editgroup_description',
             "Automated import of Datacite DOI metadata, harvested from REST API")
@@ -163,7 +165,42 @@ class DataciteImporter(EntityImporter):
             **kwargs)
 
         self.create_containers = kwargs.get('create_containers', True)
+        extid_map_file = kwargs.get('extid_map_file')
+        self.extid_map_db = None
+        if extid_map_file:
+            db_uri = "file:{}?mode=ro".format(extid_map_file)
+            print("Using external ID map: {}".format(db_uri), file=sys.stderr)
+            self.extid_map_db = sqlite3.connect(db_uri, uri=True)
+        else:
+            print("Not using external ID map", file=sys.stderr)
+
         self.read_issn_map_file(issn_map_file)
+        self.debug = debug
+        self.lang_detect = lang_detect
+        self.insert_log_file = insert_log_file
+
+        print('datacite with debug={}, lang_detect={}'.format(self.debug, self.lang_detect), file=sys.stderr)
+
+    def lookup_ext_ids(self, doi):
+        """
+        Return dictionary of identifiers refering to the same things as the given DOI.
+        """
+        if self.extid_map_db is None:
+            return dict(core_id=None, pmid=None, pmcid=None, wikidata_qid=None, arxiv_id=None, jstor_id=None)
+        row = self.extid_map_db.execute("SELECT core, pmid, pmcid, wikidata FROM ids WHERE doi=? LIMIT 1",
+            [doi.lower()]).fetchone()
+        if row is None:
+            return dict(core_id=None, pmid=None, pmcid=None, wikidata_qid=None, arxiv_id=None, jstor_id=None)
+        row = [str(cell or '') or None for cell in row]
+        return dict(
+            core_id=row[0],
+            pmid=row[1],
+            pmcid=row[2],
+            wikidata_qid=row[3],
+            # TODO:
+            arxiv_id=None,
+            jstor_id=None,
+        )
 
     def parse_record(self, obj):
         """
@@ -174,14 +211,14 @@ class DataciteImporter(EntityImporter):
 
         attributes = obj['attributes']
 
-        # Contributors. Many nameIdentifierSchemes, we do not use yet:
-        # "attributes.creators[].nameIdentifiers[].nameIdentifierScheme": [
-        # "LCNA", "GND", "email", "NAF", "OSF", "RRID", "ORCID", "SCOPUS",
-        # "NRCPID", "schema.org", "GRID", "MGDS", "VIAF", "JACoW-ID" ],
+        # Contributors. Many nameIdentifierSchemes, we do not use (yet):
+        # "attributes.creators[].nameIdentifiers[].nameIdentifierScheme":
+        # ["LCNA", "GND", "email", "NAF", "OSF", "RRID", "ORCID",
+        # "SCOPUS", "NRCPID", "schema.org", "GRID", "MGDS", "VIAF", "JACoW-ID"].
         contribs = []
 
         for i, c in enumerate(attributes['creators']):
-            if not c.get('nameType') == 'Personal':
+            if 'nameType' in c and not c.get('nameType') == 'Personal':
                 continue
             creator_id = None
             for nid in c.get('nameIdentifiers', []):
@@ -191,7 +228,7 @@ class DataciteImporter(EntityImporter):
                 if not orcid:
                     continue
                 creator_id = self.lookup_orcid(orcid)
-                # If creator_id is None, should we create creators?
+                # TODO(martin): If creator_id is None, should we create creators?
             contribs.append(fatcat_openapi_client.ReleaseContrib(
                 creator_id=creator_id,
                 index=i,
@@ -204,11 +241,27 @@ class DataciteImporter(EntityImporter):
         # "AlternativeTitle", "Other", "Subtitle", "TranslatedTitle"
         title, subtitle = None, None
 
-        for entry in attributes.get('titles', []):
-            if not title and 'titleType' not in entry:
-                title = entry.get('title').strip()
-            if entry.get('titleType') == 'Subtitle':
-                subtitle = entry.get('title').strip()
+        titles = attributes.get('titles', []) or []
+        if len(titles) == 0:
+            print('skipping record w/o title: {}'.format(obj), file=sys.stderr)
+            return False
+        elif len(titles) == 1:
+            # We do not care about the type then.
+            title = titles[0].get('title', '') or ''
+            title = title.strip()
+        else:
+            for entry in titles:
+                if not title and ('titleType' not in entry or not entry.get('titleType')):
+                    title = entry.get('title').strip()
+                if entry.get('titleType') == 'Subtitle':
+                    subtitle = entry.get('title', '').strip()
+
+        if not title:
+            print('skipping record w/o title: {}'.format(obj), file=sys.stderr)
+            return False
+
+        if not subtitle:
+            subtitle = None
 
         # Dates. A few internal dates (registered, created, updated) and
         # published (0..2554). We try to work with typed date list, in
@@ -217,14 +270,13 @@ class DataciteImporter(EntityImporter):
         # "Updated", "Valid".
         release_year, release_date = None, None
 
+        # Ignore: Collected, Issued.
         date_type_prio = (
             'Valid',
-            'Issued',
             'Available',
             'Accepted',
             'Submitted',
             'Copyrighted',
-            'Collected',
             'Created',
             'Updated',
         )
@@ -233,15 +285,36 @@ class DataciteImporter(EntityImporter):
             for item in dates:
                 if not item.get('dateType') == prio:
                     continue
-                try:
-                    result = dateparser.parse(item.get('date'))
-                except TypeError as err:
-                    print("{} failed with: {}".format(item.get('date'), err), file=sys.stderr)
-                    continue
+
+                # Parse out date, use common patterns first, fallback to dateparser.
+                result, value, year_only = None, item.get('date', ''), False
+
+                # Before using (expensive) dateparser, try a few common patterns.
+                common_patterns = ('%Y-%m-%d', '%Y', '%Y-%m', '%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S')
+
+                for pattern in common_patterns:
+                    try:
+                        result = datetime.datetime.strptime(value, pattern)
+                    except ValueError:
+                        continue
+                    else:
+                        if pattern == '%Y':
+                            year_only = True
+                        break
+
+                if result is None:
+                    print('fallback for {}'.format(value), file=sys.stderr)
+                    try:
+                        result = dateparser.parse(value)
+                    except TypeError as err:
+                        print("{} date parsing failed with: {}".format(value, err), file=sys.stderr)
+                        continue
+
                 if result is None:
                     # Unparsable date.
                     continue
-                release_date = result
+                if not year_only:
+                    release_date = result.date()
                 release_year = result.year
                 if 1000 < release_year < datetime.date.today().year + 5:
                     # Skip possibly bogus dates.
@@ -280,10 +353,16 @@ class DataciteImporter(EntityImporter):
                     container_id = self.lookup_issnl(issnl)
 
                     if container_id is None and container.get('title'):
+                        container_title = container.get('title')
+                        if isinstance(container_title, list):
+                            if len(container_title) > 0:
+                                print('too many container titles: {}'.format(len(container_title)))
+                                container_title = container_title[0]
+                        assert isinstance(container_title, str)
                         ce = fatcat_openapi_client.ContainerEntity(
                             issnl=issnl,
                             container_type=container_type,
-                            name=container.get('title'),
+                            name=container_title,
                         )
                         ce_edit = self.create_container(ce)
                         container_id = ce_edit.ident
@@ -326,12 +405,12 @@ class DataciteImporter(EntityImporter):
         # closest, but not always supplied.
         for typeType in ('citeproc', 'resourceTypeGeneral', 'schemaOrg', 'bibtex', 'ris'):
             value = attributes.get('types', {}).get(typeType)
-            release_type = DATACITE_TYPE_MAP.get(value)
+            release_type = DATACITE_TYPE_MAP.get(typeType, {}).get(value)
             if release_type is not None:
                 break
 
         if release_type is None:
-            print("datacite unmapped type: {}".format(release_type), file=sys.stderr)
+            print("no mapped type: {}".format(value), file=sys.stderr)
 
         # Language values are varied ("ger", "es", "English", "ENG", "en-us",
         # "other", ...). Try to crush it with langcodes: "It may sound to you
@@ -347,7 +426,7 @@ class DataciteImporter(EntityImporter):
             try:
                 language = langcodes.get(value).language
             except langcodes.tag_parser.LanguageTagError:
-                print('could not determine language: {}'.format(value), file=sys.stderr)
+                pass
 
         # Abstracts appear in "attributes.descriptions[].descriptionType", some
         # of the observed values: "Methods", "TechnicalInfo",
@@ -355,8 +434,8 @@ class DataciteImporter(EntityImporter):
         # "Other" fields might contain references or related articles (with
         # DOI). TODO(martin): maybe try to parse out some of those refs.
         abstracts = []
-
-        for desc in attributes.get('descriptions', []):
+        descs = attributes.get('descriptions', []) or []
+        for desc in descs:
             if not desc.get('descriptionType') == 'Abstract':
                 continue
             if len(desc.get('description', '')) < 10:
@@ -364,10 +443,11 @@ class DataciteImporter(EntityImporter):
             text = desc.get('description')
             sha1 = hashlib.sha1(text.encode('utf-8')).hexdigest()
             lang = None
-            try:
-                lang = langdetect.detect(text)
-            except langdetect.lang_detect_exception.LangDetectException:
-                pass
+            if self.lang_detect:
+                try:
+                    lang = langdetect.detect(text)
+                except langdetect.lang_detect_exception.LangDetectException as err:
+                    print('language detection failed: {}'.format(err), file=sys.stderr)
             abstracts.append(fatcat_openapi_client.ReleaseAbstract(
                 mimetype="text/plain",
                 content=text,
@@ -386,7 +466,8 @@ class DataciteImporter(EntityImporter):
         # For the moment, we only care about References.
         refs, ref_index = [], 0
 
-        for rel in attributes.get('relatedIdentifiers', []):
+        relIds = attributes.get('relatedIdentifiers', []) or []
+        for rel in relIds:
             if not rel.get('relationType') == 'References':
                 continue
             ref_extra = dict()
@@ -422,6 +503,9 @@ class DataciteImporter(EntityImporter):
         if extra_datacite:
             extra['datacite'] = extra_datacite
 
+        doi = attributes.get('doi', '').lower()
+        extids = self.lookup_ext_ids(doi=doi)
+
         # Assemble release.
         re = fatcat_openapi_client.ReleaseEntity(
             work_id=None,
@@ -435,7 +519,13 @@ class DataciteImporter(EntityImporter):
             release_date=release_date,
             publisher=publisher,
             ext_ids=fatcat_openapi_client.ReleaseExtIds(
-                doi=attributes.get('doi'),
+                doi=doi,
+                pmid=extids['pmid'],
+                pmcid=extids['pmcid'],
+                wikidata_qid=extids['wikidata_qid'],
+                core=extids['core_id'],
+                arxiv=extids['arxiv_id'],
+                jstor=extids['jstor_id'],
             ),
             contribs=contribs,
             volume=volume,
@@ -449,11 +539,12 @@ class DataciteImporter(EntityImporter):
         )
         return re
 
-    def try_update(self, re, debug=True):
+    def try_update(self, re):
         """
-        When debug is true, write the RE to stdout.
+        When debug is true, write the RE to stdout, not to the database. Might
+        hide schema mismatch bugs.
         """
-        if debug is True:
+        if self.debug is True:
             print(json.dumps(re.to_dict(), default=extended_json_encoder))
             return False
 
@@ -476,10 +567,16 @@ class DataciteImporter(EntityImporter):
         return True
 
     def insert_batch(self, batch):
+        print('inserting batch ({})'.format(len(batch)), file=sys.stderr)
+        if self.insert_log_file:
+            with open(self.insert_log_file, 'a') as f:
+                for doc in batch:
+                    json.dump(doc.to_dict(), f, default=extended_json_encoder)
+                    f.write('\n')
         self.api.create_release_auto_batch(fatcat_openapi_client.ReleaseAutoBatch(
             editgroup=fatcat_openapi_client.Editgroup(
-                description=self.editgroup_description,
-                extra=self.editgroup_extra),
+            description=self.editgroup_description,
+            extra=self.editgroup_extra),
             entity_list=batch))
 
 def extended_json_encoder(value):
@@ -491,6 +588,7 @@ def extended_json_encoder(value):
         return value.isoformat()
     if isinstance(value, set):
         return list(value)
+    raise TypeError('cannot encode type: {}'.format(type(value)))
 
 def lookup_license_slug(raw):
     """
diff --git a/python/tests/files/datacite_1k_records.jsonl.gz b/python/tests/files/datacite_1k_records.jsonl.gz
new file mode 100644
index 00000000..28ea6e37
Binary files /dev/null and b/python/tests/files/datacite_1k_records.jsonl.gz differ
diff --git a/python/tests/files/datacite_sample.jsonl b/python/tests/files/datacite_sample.jsonl
new file mode 100644
index 00000000..dba3e267
--- /dev/null
+++ b/python/tests/files/datacite_sample.jsonl
@@ -0,0 +1 @@
+{"id":"10.18730/8dym9","type":"dois","attributes":{"doi":"10.18730/8dym9","identifiers":[{"identifier":"https://doi.org/10.18730/8dym9","identifierType":"DOI"},{"identifier":"ICDW 20791","identifierType":"Other"}],"creators":[{"name":"GLIS Of The ITPGRFA","affiliation":[]}],"titles":[{"title":"Triticum turgidum L. subsp. durum (Desf.) Husn. 97090"}],"publisher":"International Centre for Agricultural Research in Dry Areas","container":{},"publicationYear":2017,"subjects":[{"subject":"Plant Genetic Resource for Food and Agriculture"}],"contributors":[{"name":"International Centre For Agricultural Research In Dry Areas","affiliation":[]}],"dates":[{"date":"1986","dateType":"Accepted"},{"date":"1978-06-03","dateType":"Collected"},{"date":"2017","dateType":"Issued"}],"language":"en","types":{"ris":"GEN","bibtex":"misc","citeproc":"article","schemaOrg":"CreativeWork","resourceType":"PGRFA Material","resourceTypeGeneral":"PhysicalObject"},"relatedIdentifiers":[{"schemeUri":"http://www.fao.org/plant-treaty/areas-of-work/global-information-system/descriptors","schemeType":"XML","relationType":"HasMetadata","relatedIdentifier":"https://ssl.fao.org/glisapi/v1/pgrfas?doi=10.18730/8DYM9","relatedIdentifierType":"URL","relatedMetadataScheme":"GLIS Descriptors"},{"schemeUri":"http://rs.tdwg.org/dwc/terms/guides/text/index.htm","schemeType":"DwC-A","relationType":"HasMetadata","relatedIdentifier":"https://ssl.fao.org/glisapi/v1/pgrfas?_format=dwc&doi=10.18730/8DYM9","relatedIdentifierType":"URL","relatedMetadataScheme":"Darwin Core Archive"}],"sizes":[],"formats":[],"version":null,"rightsList":[],"descriptions":[{"description":"Plant Genetic Resource.<br>Taxonomy: Triticum turgidum L. subsp. durum (Desf.) Husn.<br>Common name(s): Wheat<br>Conserved by: International Centre for Agricultural Research in Dry Areas (ICARDA), Lebanon<br>Local sample unique identifier: 97090<br>Method of creation: Acquisition<br>Date: 1986<br>Biological status: Traditional cultivar/landrace<br>Other identifiers: ICDW 20791<br>MLS status: Included<br>Historical: No","descriptionType":"Abstract"}],"geoLocations":[{"geoLocationPlace":"Collecting site","geoLocationPoint":{"pointLatitude":"35.5","pointLongitude":"23.7333"}}],"fundingReferences":[],"url":"https://ssl.fao.org/glis/doi/10.18730/8DYM9","contentUrl":null,"metadataVersion":3,"schemaVersion":"http://datacite.org/schema/kernel-4","source":"mds","isActive":true,"state":"findable","reason":null,"created":"2017-11-11T12:26:01.000Z","registered":"2017-11-11T12:26:02.000Z","published":"2017","updated":"2019-08-02T16:34:56.000Z"},"relationships":{"client":{"data":{"id":"fao.itpgrfa","type":"clients"}}}}
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index 0bbaba2e..9c542fc6 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -1,25 +1,99 @@
 """
 Test datacite importer.
+"""
 
-Datacite is a aggregator, hence inputs are quite varied.
+import datetime
+import pytest
+import gzip
+from fatcat_tools.importers import DataciteImporter, JsonLinePusher
+from fixtures import api
+import json
 
-Here is small sample of ID types taken from a sample:
 
-    497344 "DOI"
-     65013 "URL"
-     22210 "CCDC"
-     17853 "GBIF"
-     17635 "Other"
-     11474 "uri"
-      9170 "Publisher ID"
-      7775 "URN"
-      6196 "DUCHAS"
-      5624 "Handle"
-      5056 "publisherId"
+@pytest.fixture(scope="function")
+def datacite_importer(api):
+    with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file:
+        yield DataciteImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3',
+                               bezerk_mode=True)
 
-A nice tool, not yet existing tool (maybe named indigo) would do the following:
+@pytest.fixture(scope="function")
+def datacite_importer_existing(api):
+    with open('tests/files/ISSN-to-ISSN-L.snip.txt', 'r') as issn_file:
+        yield DataciteImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3',
+                               bezerk_mode=False)
 
-    $ shuf -n 100000 datacite.ndjson | indigo -t md > data.md
 
-TODO(martin): Write tests.
-"""
+@pytest.mark.skip(reason="larger datacite import slows tests down")
+def test_datacite_importer_huge(datacite_importer):
+    last_index = datacite_importer.api.get_changelog(limit=1)[0].index
+    with gzip.open('tests/files/datacite_1k_records.jsonl.gz', 'rt') as f:
+        datacite_importer.bezerk_mode = True
+        counts = JsonLinePusher(datacite_importer, f).run()
+    assert counts['insert'] == 998
+    change = datacite_importer.api.get_changelog_entry(index=last_index+1)
+    release = datacite_importer.api.get_release(change.editgroup.edits.releases[0].ident)
+    assert len(release.contribs) == 3
+
+
+def test_datacite_importer(datacite_importer):
+    last_index = datacite_importer.api.get_changelog(limit=1)[0].index
+    with open('tests/files/datacite_sample.jsonl', 'r') as f:
+        datacite_importer.bezerk_mode = True
+        counts = JsonLinePusher(datacite_importer, f).run()
+    assert counts['insert'] == 1
+    assert counts['exists'] == 0
+    assert counts['skip'] == 0
+
+    # fetch most recent editgroup
+    change = datacite_importer.api.get_changelog_entry(index=last_index+1)
+    eg = change.editgroup
+    assert eg.description
+    assert "datacite" in eg.description.lower()
+    assert eg.extra['git_rev']
+    assert "fatcat_tools.DataciteImporter" in eg.extra['agent']
+
+    last_index = datacite_importer.api.get_changelog(limit=1)[0].index
+    with open('tests/files/datacite_sample.jsonl', 'r') as f:
+        datacite_importer.bezerk_mode = False
+        datacite_importer.reset()
+        counts = JsonLinePusher(datacite_importer, f).run()
+    assert counts['insert'] == 0
+    assert counts['exists'] == 1
+    assert counts['skip'] == 0
+    assert last_index == datacite_importer.api.get_changelog(limit=1)[0].index
+
+def test_datacite_dict_parse(datacite_importer):
+    with open('tests/files/datacite_sample.jsonl', 'r') as f:
+        raw = json.load(f)
+        r = datacite_importer.parse_record(raw)
+        # ensure the API server is ok with format
+        JsonLinePusher(datacite_importer, [json.dumps(raw)]).run()
+
+        print(r.extra)
+        assert r.title == "Triticum turgidum L. subsp. durum (Desf.) Husn. 97090"
+        assert r.publisher == "International Centre for Agricultural Research in Dry Areas"
+        assert r.release_type == "article"
+        assert r.release_stage == "published"
+        assert r.license_slug == None
+        assert r.original_title == "Triticum turgidum L. subsp. durum (Desf.) Husn. 97090"
+        assert r.ext_ids.doi == "10.18730/8dym9"
+        assert r.ext_ids.isbn13 == None
+        assert r.language == "enc"
+        assert r.subtitle == None
+        assert r.release_date == None
+        assert r.release_year == 1986
+        assert 'subtitle' not in r.extra
+        assert 'subtitle' not in r.extra['datacite']
+        assert 'funder' not in r.extra
+        assert 'funder' not in r.extra['datacite']
+        # matched by ISSN, so shouldn't be in there
+        #assert extra['container_name'] == "International Journal of Quantum Chemistry"
+        assert r.extra['datacite']['url'] == 'https://ssl.fao.org/glis/doi/10.18730/8DYM9'
+        assert r.extra['datacite']['subjects'] == [{'subject': 'Plant Genetic Resource for Food and Agriculture'}]
+        assert len(r.abstracts) == 1
+        assert len(r.abstracts[0].content) == 421
+        assert len(r.contribs) == 1
+        assert r.contribs[0].raw_name == "GLIS Of The ITPGRFA"
+        assert r.contribs[0].given_name == None
+        assert r.contribs[0].surname == None
+        assert len(r.refs) == 0
-- 
cgit v1.2.3


From a196435a0e88f85785742cdd089344f97401b43a Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Sat, 21 Dec 2019 23:30:56 +0100
Subject: address first round of MR14 comments

* add missing langdetect
* use entity_to_dict for json debug output
* factor out code for fields in function and add table driven tests
* update citeproc types
* add author as default role
* add raw_affiliation
* include relations from datacite
* remove url (covered by doi already)

Using yapf for python formatting.
---
 python/Pipfile                            |   1 +
 python/Pipfile.lock                       |   7 +
 python/fatcat_tools/importers/datacite.py | 467 ++++++++++++++++++++----------
 python/tests/import_datacite.py           | 178 +++++++++++-
 4 files changed, 503 insertions(+), 150 deletions(-)

(limited to 'python/tests')

diff --git a/python/Pipfile b/python/Pipfile
index dfb87514..6325c180 100644
--- a/python/Pipfile
+++ b/python/Pipfile
@@ -49,6 +49,7 @@ elasticsearch-dsl = ">=6.0.0,<7.0.0"
 elasticsearch = ">=6.0.0,<7.0.0"
 langcodes = ">=1.4"
 dateparser = ">=0.7"
+langdetect = "*"
 
 [requires]
 # Python 3.5 is the bundled (system) version of python for Ubuntu 16.04
diff --git a/python/Pipfile.lock b/python/Pipfile.lock
index b6e066b5..f0f60aa8 100644
--- a/python/Pipfile.lock
+++ b/python/Pipfile.lock
@@ -306,6 +306,13 @@
             "index": "pypi",
             "version": "==1.4.1"
         },
+        "langdetect": {
+            "hashes": [
+                "sha256:91a170d5f0ade380db809b3ba67f08e95fe6c6c8641f96d67a51ff7e98a9bf30"
+            ],
+            "index": "pypi",
+            "version": "==1.0.7"
+        },
         "loginpass": {
             "hashes": [
                 "sha256:717c87c1870a7e00547fd9d989aea9b22232b2f48826f552d79c34a47f9618c9",
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 77ce1012..19b89edf 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -14,6 +14,7 @@ import langcodes
 import langdetect
 import sqlite3
 import sys
+from fatcat_tools.transforms import entity_to_dict
 
 # https://guide.fatcat.wiki/entity_container.html#container_type-vocabulary
 CONTAINER_TYPE_MAP = {
@@ -55,16 +56,42 @@ DATACITE_TYPE_MAP = {
         'Thesis': 'thesis',
     },
     'citeproc': {
-        'dataset': 'dataset',
-        'chapter': 'chapter',
-        'article-journal': 'article-journal',
-        'song': 'song',
         'article': 'article',
-        'report': 'report',
+        'article-journal': 'article-journal',
+        'article-magazine': 'article-magazine',
+        'article-newspaper': 'article-newspaper',
+        'bill': 'bill',
+        'book': 'book',
+        'broadcast': 'broadcast',
+        'chapter': 'chapter',
+        'dataset': 'dataset',
+        'entry-dictionary': 'entry-dictionary',
+        'entry-encyclopedia': 'entry-encyclopedia',
+        'entry': 'entry',
+        'figure': 'figure',
         'graphic': 'graphic',
+        'interview': 'interview',
+        'legal_case': 'legal_case',
+        'legislation': 'legislation',
+        'manuscript': 'manuscript',
+        'map': 'map',
+        'motion_picture': 'motion_picture',
+        'musical_score': 'musical_score',
+        'pamphlet': 'pamphlet',
+        'paper-conference': 'paper-conference',
+        'patent': 'patent',
+        'personal_communication': 'personal_communication',
+        'post': 'post',
+        'post-weblog': 'post-weblog',
+        'report': 'report',
+        'review-book': 'review-book',
+        'review': 'review',
+        'song': 'song',
+        'speech': 'speech',
         'thesis': 'thesis',
-        'book': 'book',
-    },
+        'treaty': 'treaty',
+        'webpage': 'webpage',
+    },  # https://docs.citationstyles.org/en/master/specification.html#appendix-iii-types
     'bibtex': {
         'phdthesis': 'thesis',
         'inbook': 'chapter',
@@ -88,7 +115,6 @@ DATACITE_TYPE_MAP = {
     }
 }
 
-
 # TODO(martin): merge this with other maps, maybe.
 LICENSE_SLUG_MAP = {
     "//creativecommons.org/licenses/by/2.0/": "CC-BY",
@@ -124,7 +150,8 @@ LICENSE_SLUG_MAP = {
     "//www.karger.com/Services/SiteLicenses": "KARGER",
     "//www.opensource.org/licenses/Apache-2.0": "Apache-2.0",
     "//www.opensource.org/licenses/BSD-3-Clause": "BSD-3-Clause",
-    "//www.opensource.org/licenses/EUPL-1.1": "EUPL-1.1", # redirects to EUPL-1.2
+    "//www.opensource.org/licenses/EUPL-1.1":
+    "EUPL-1.1",  # redirects to EUPL-1.2
     "//www.opensource.org/licenses/MIT": "MIT",
     # "http://royalsocietypublishing.org/licence": "", # OA and "normal", https://royalsociety.org/journals/authors/licence-to-publish/
     # "http://rsc.li/journals-terms-of-use": "RSC",
@@ -146,23 +173,31 @@ LICENSE_SLUG_MAP = {
     # Note: Some URLs pointing to licensing terms are not in WB yet (but would be nice).
 }
 
+
 class DataciteImporter(EntityImporter):
     """
     Importer for datacite records.
     """
-
-    def __init__(self, api, issn_map_file, debug=False, lang_detect=False,
-                 insert_log_file=None, **kwargs):
-
-        eg_desc = kwargs.get('editgroup_description',
-            "Automated import of Datacite DOI metadata, harvested from REST API")
+    def __init__(self,
+                 api,
+                 issn_map_file,
+                 debug=False,
+                 lang_detect=False,
+                 insert_log_file=None,
+                 **kwargs):
+
+        eg_desc = kwargs.get(
+            'editgroup_description',
+            "Automated import of Datacite DOI metadata, harvested from REST API"
+        )
         eg_extra = kwargs.get('editgroup_extra', dict())
-        eg_extra['agent'] = eg_extra.get('agent', 'fatcat_tools.DataciteImporter')
+        eg_extra['agent'] = eg_extra.get('agent',
+                                         'fatcat_tools.DataciteImporter')
         super().__init__(api,
-            issn_map_file=issn_map_file,
-            editgroup_description=eg_desc,
-            editgroup_extra=eg_extra,
-            **kwargs)
+                         issn_map_file=issn_map_file,
+                         editgroup_description=eg_desc,
+                         editgroup_extra=eg_extra,
+                         **kwargs)
 
         self.create_containers = kwargs.get('create_containers', True)
         extid_map_file = kwargs.get('extid_map_file')
@@ -179,18 +214,31 @@ class DataciteImporter(EntityImporter):
         self.lang_detect = lang_detect
         self.insert_log_file = insert_log_file
 
-        print('datacite with debug={}, lang_detect={}'.format(self.debug, self.lang_detect), file=sys.stderr)
+        print('datacite with debug={}, lang_detect={}'.format(
+            self.debug, self.lang_detect),
+              file=sys.stderr)
 
     def lookup_ext_ids(self, doi):
         """
         Return dictionary of identifiers refering to the same things as the given DOI.
         """
         if self.extid_map_db is None:
-            return dict(core_id=None, pmid=None, pmcid=None, wikidata_qid=None, arxiv_id=None, jstor_id=None)
-        row = self.extid_map_db.execute("SELECT core, pmid, pmcid, wikidata FROM ids WHERE doi=? LIMIT 1",
+            return dict(core_id=None,
+                        pmid=None,
+                        pmcid=None,
+                        wikidata_qid=None,
+                        arxiv_id=None,
+                        jstor_id=None)
+        row = self.extid_map_db.execute(
+            "SELECT core, pmid, pmcid, wikidata FROM ids WHERE doi=? LIMIT 1",
             [doi.lower()]).fetchone()
         if row is None:
-            return dict(core_id=None, pmid=None, pmcid=None, wikidata_qid=None, arxiv_id=None, jstor_id=None)
+            return dict(core_id=None,
+                        pmid=None,
+                        pmcid=None,
+                        wikidata_qid=None,
+                        arxiv_id=None,
+                        jstor_id=None)
         row = [str(cell or '') or None for cell in row]
         return dict(
             core_id=row[0],
@@ -206,6 +254,8 @@ class DataciteImporter(EntityImporter):
         """
         Mapping datacite JSON to ReleaseEntity.
         """
+        if not obj or not isinstance(obj, dict):
+            return None
         if 'attributes' not in obj:
             return None
 
@@ -218,43 +268,54 @@ class DataciteImporter(EntityImporter):
         contribs = []
 
         for i, c in enumerate(attributes['creators']):
-            if 'nameType' in c and not c.get('nameType') == 'Personal':
-                continue
-            creator_id = None
-            for nid in c.get('nameIdentifiers', []):
-                if not nid.get('nameIdentifierScheme').lower() == "orcid":
+            nameType = c.get('nameType', '') or ''
+            if nameType == 'Personal':
+                creator_id = None
+                for nid in c.get('nameIdentifiers', []):
+                    if not nid.get('nameIdentifierScheme').lower() == "orcid":
+                        continue
+                    orcid = nid.get('nameIdentifier',
+                                    '').replace('https://orcid.org/', '')
+                    if not orcid:
+                        continue
+                    creator_id = self.lookup_orcid(orcid)
+                    # TODO(martin): If creator_id is None, should we create creators?
+
+                # If there are multiple affiliation strings, use the first one.
+                affiliations = c.get('affiliation', []) or []
+                raw_affiliation = None
+                if len(affiliations) == 0:
+                    raw_affiliation = None
+                else:
+                    raw_affiliation = affiliations[0]
+
+                contribs.append(
+                    fatcat_openapi_client.ReleaseContrib(
+                        creator_id=creator_id,
+                        index=i,
+                        raw_name=c.get('name'),
+                        given_name=c.get('givenName'),
+                        surname=c.get('familyName'),
+                        role='author',
+                        raw_affiliation=raw_affiliation,
+                    ))
+            elif nameType == 'Organizational':
+                name = c.get('name', '') or ''
+                if name == 'NN':
                     continue
-                orcid = nid.get('nameIdentifier', '').replace('https://orcid.org/', '')
-                if not orcid:
+                if len(name) < 3:
                     continue
-                creator_id = self.lookup_orcid(orcid)
-                # TODO(martin): If creator_id is None, should we create creators?
-            contribs.append(fatcat_openapi_client.ReleaseContrib(
-                creator_id=creator_id,
-                index=i,
-                raw_name=c.get('name'),
-                given_name=c.get('givenName'),
-                surname=c.get('familyName'),
-            ))
+                extra = {'organization': name}
+                contribs.append(fatcat_openapi_client.ReleaseContrib(
+                    index=i, extra=extra))
+            else:
+                print('unknown name type: {}'.format(nameType), file=sys.stderr)
 
         # Title, may come with "attributes.titles[].titleType", like
         # "AlternativeTitle", "Other", "Subtitle", "TranslatedTitle"
-        title, subtitle = None, None
-
         titles = attributes.get('titles', []) or []
-        if len(titles) == 0:
-            print('skipping record w/o title: {}'.format(obj), file=sys.stderr)
-            return False
-        elif len(titles) == 1:
-            # We do not care about the type then.
-            title = titles[0].get('title', '') or ''
-            title = title.strip()
-        else:
-            for entry in titles:
-                if not title and ('titleType' not in entry or not entry.get('titleType')):
-                    title = entry.get('title').strip()
-                if entry.get('titleType') == 'Subtitle':
-                    subtitle = entry.get('title', '').strip()
+        title, original_language_title, subtitle = parse_datacite_titles(
+            titles)
 
         if not title:
             print('skipping record w/o title: {}'.format(obj), file=sys.stderr)
@@ -268,67 +329,14 @@ class DataciteImporter(EntityImporter):
         # "attributes.dates[].dateType", values: "Accepted", "Available"
         # "Collected", "Copyrighted", "Created", "Issued", "Submitted",
         # "Updated", "Valid".
-        release_year, release_date = None, None
-
-        # Ignore: Collected, Issued.
-        date_type_prio = (
-            'Valid',
-            'Available',
-            'Accepted',
-            'Submitted',
-            'Copyrighted',
-            'Created',
-            'Updated',
-        )
-
-        # Before using (expensive) dateparser, try a few common patterns.
-        common_patterns = ('%Y-%m-%d', '%Y-%m', '%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S', '%Y')
-
-        for prio in date_type_prio:
-            dates = attributes.get('dates', []) or [] # Never be None.
-            for item in dates:
-                if not item.get('dateType') == prio:
-                    continue
-
-                # Parse out date, use common patterns first, fallback to dateparser.
-                result, value, year_only = None, item.get('date', ''), False
-
-                for pattern in common_patterns:
-                    try:
-                        result = datetime.datetime.strptime(value, pattern)
-                    except ValueError:
-                        continue
-                    else:
-                        if pattern == '%Y':
-                            year_only = True
-                        break
-
-                if result is None:
-                    print('fallback for {}'.format(value), file=sys.stderr)
-                    try:
-                        result = dateparser.parse(value)
-                    except TypeError as err:
-                        print("{} date parsing failed with: {}".format(value, err), file=sys.stderr)
-                        continue
-
-                if result is None:
-                    # Unparsable date.
-                    continue
-                if not year_only:
-                    release_date = result.date()
-                release_year = result.year
-                if 1000 < release_year < datetime.date.today().year + 5:
-                    # Skip possibly bogus dates.
-                    continue
-                break
-            else:
-                continue
-            break
+        release_date, release_year = parse_datacite_dates(
+            attributes.get('dates', []))
 
         # Publisher. A few NA values. A few bogus values.
         publisher = attributes.get('publisher')
 
-        if publisher in ('(:unav)', 'Unknown', 'n.a.', '[s.n.]', '(:unap)', '(:none)'):
+        if publisher in ('(:unav)', 'Unknown', 'n.a.', '[s.n.]', '(:unap)',
+                         '(:none)'):
             publisher = None
         if publisher is not None and len(publisher) > 80:
             # Arbitrary magic value max length. TODO(martin): better heuristic,
@@ -345,7 +353,8 @@ class DataciteImporter(EntityImporter):
         container = attributes.get('container', {}) or {}
         if container.get('type') in CONTAINER_TYPE_MAP.keys():
             container_type = CONTAINER_TYPE_MAP.get(container['type'])
-            if container.get('identifier') and container.get('identifierType') == 'ISSN':
+            if container.get('identifier') and container.get(
+                    'identifierType') == 'ISSN':
                 issn = container.get('identifier')
                 if len(issn) == 8:
                     issn = issn[:4] + "-" + issn[4:]
@@ -357,7 +366,8 @@ class DataciteImporter(EntityImporter):
                         container_title = container.get('title')
                         if isinstance(container_title, list):
                             if len(container_title) > 0:
-                                print('too many container titles: {}'.format(len(container_title)))
+                                print('too many container titles: {}'.format(
+                                    len(container_title)))
                                 container_title = container_title[0]
                         assert isinstance(container_title, str)
                         ce = fatcat_openapi_client.ContainerEntity(
@@ -404,7 +414,8 @@ class DataciteImporter(EntityImporter):
         # types supplied in datacite. The "attributes.types.resourceType"
         # contains too many (176 in sample) things for now; citeproc may be the
         # closest, but not always supplied.
-        for typeType in ('citeproc', 'resourceTypeGeneral', 'schemaOrg', 'bibtex', 'ris'):
+        for typeType in ('citeproc', 'resourceTypeGeneral', 'schemaOrg',
+                         'bibtex', 'ris'):
             value = attributes.get('types', {}).get(typeType)
             release_type = DATACITE_TYPE_MAP.get(typeType, {}).get(value)
             if release_type is not None:
@@ -442,19 +453,19 @@ class DataciteImporter(EntityImporter):
             if len(desc.get('description', '')) < 10:
                 continue
             text = desc.get('description')
-            sha1 = hashlib.sha1(text.encode('utf-8')).hexdigest()
             lang = None
             if self.lang_detect:
                 try:
                     lang = langdetect.detect(text)
                 except langdetect.lang_detect_exception.LangDetectException as err:
-                    print('language detection failed: {}'.format(err), file=sys.stderr)
-            abstracts.append(fatcat_openapi_client.ReleaseAbstract(
-                mimetype="text/plain",
-                content=text,
-                sha1=sha1,
-                lang=lang,
-            ))
+                    print('language detection failed: {}'.format(err),
+                          file=sys.stderr)
+            abstracts.append(
+                fatcat_openapi_client.ReleaseAbstract(
+                    mimetype="text/plain",
+                    content=text,
+                    lang=lang,
+                ))
 
         # References and relations. Datacite include many relation types in
         # "attributes.relatedIdentifiers[].relationType", e.g.
@@ -476,17 +487,19 @@ class DataciteImporter(EntityImporter):
                 ref_extra['doi'] = rel.get('relatedIdentifier')
             if not ref_extra:
                 ref_extra = None
-            refs.append(fatcat_openapi_client.ReleaseRef(
-                index=ref_index,
-                extra=ref_extra,
-            ))
+            refs.append(
+                fatcat_openapi_client.ReleaseRef(
+                    index=ref_index,
+                    extra=ref_extra,
+                ))
             ref_index += 1
 
         # Start with clear stages, e.g. published. TODO(martin): we could
         # probably infer a bit more from the relations, e.g.
         # "IsPreviousVersionOf" or "IsNewVersionOf".
         release_stage = None
-        if attributes.get('state') == 'findable' or attributes.get('isActive') is True:
+        if attributes.get(
+                'state') == 'findable' or attributes.get('isActive') is True:
             release_stage = 'published'
 
         # Extra information.
@@ -496,8 +509,22 @@ class DataciteImporter(EntityImporter):
             extra_datacite['license'] = license_extra
         if attributes.get('subjects'):
             extra_datacite['subjects'] = attributes['subjects']
-        if attributes.get('url'):
-            extra_datacite['url'] = attributes['url']
+
+        # Include certain relations from relatedIdentifiers. Keeping the
+        # original structure of data here, which is a list of dicts, with
+        # relation type, identifer and identifier type (mostly).
+        relations = []
+        for rel in relIds:
+            if rel.get('relationType') in ('IsPartOf', 'Reviews', 'Continues',
+                                           'IsVariantFormOf', 'IsSupplementTo',
+                                           'HasVersion', 'IsMetadataFor',
+                                           'IsNewVersionOf', 'IsIdenticalTo',
+                                           'IsVersionOf', 'IsDerivedFrom',
+                                           'IsSourceOf'):
+                relations.append(rel)
+
+        if relations:
+            extra_datacite['relations'] = relations
 
         extra = dict()
 
@@ -515,7 +542,7 @@ class DataciteImporter(EntityImporter):
             release_stage=release_stage,
             title=title,
             subtitle=subtitle,
-            original_title=title,
+            original_title=original_language_title,
             release_year=release_year,
             release_date=release_date,
             publisher=publisher,
@@ -546,7 +573,7 @@ class DataciteImporter(EntityImporter):
         hide schema mismatch bugs.
         """
         if self.debug is True:
-            print(json.dumps(re.to_dict(), default=extended_json_encoder))
+            print(json.dumps(entity_to_dict(re, api_client=None)))
             return False
 
         # lookup existing DOI (don't need to try other ext idents for crossref)
@@ -572,24 +599,15 @@ class DataciteImporter(EntityImporter):
         if self.insert_log_file:
             with open(self.insert_log_file, 'a') as f:
                 for doc in batch:
-                    json.dump(doc.to_dict(), f, default=extended_json_encoder)
+                    json.dump(entity_to_dict(re, api_client=None), f)
                     f.write('\n')
-        self.api.create_release_auto_batch(fatcat_openapi_client.ReleaseAutoBatch(
-            editgroup=fatcat_openapi_client.Editgroup(
-            description=self.editgroup_description,
-            extra=self.editgroup_extra),
-            entity_list=batch))
+        self.api.create_release_auto_batch(
+            fatcat_openapi_client.ReleaseAutoBatch(
+                editgroup=fatcat_openapi_client.Editgroup(
+                    description=self.editgroup_description,
+                    extra=self.editgroup_extra),
+                entity_list=batch))
 
-def extended_json_encoder(value):
-    """
-    Can be used with json.dumps(value, default=extended_json_encoder) to serialize
-    value not serializable by default. https://docs.python.org/3/library/json.html#basic-usage
-    """
-    if isinstance(value, (datetime.datetime, datetime.date)):
-        return value.isoformat()
-    if isinstance(value, set):
-        return list(value)
-    raise TypeError('cannot encode type: {}'.format(type(value)))
 
 def lookup_license_slug(raw):
     """
@@ -604,3 +622,156 @@ def lookup_license_slug(raw):
         if not raw.endswith('/'):
             raw = raw + '/'
     return LICENSE_SLUG_MAP.get(raw)
+
+
+def find_original_language_title(item, min_length=4, max_questionmarks=3):
+    """
+    Perform a few checks before returning a potential original language title.
+    """
+    if not 'original_language_title' in item:
+        return None
+    title = item.get('title')
+    if not title:
+        return None
+    original_language_title = item.get('original_language_title')
+    if isinstance(original_language_title,
+                  str) and title != original_language_title:
+        if len(original_language_title) < min_length:
+            return None
+        if original_language_title.count('?') > max_questionmarks:
+            return None
+        return original_language_title
+    if isinstance(original_language_title, dict):
+        content = original_language_title.get('__content__', '') or ''
+        if content and content != title and not content.count(
+                '?') > max_questionmarks:
+            return content
+    return None
+
+
+def parse_datacite_titles(titles):
+    """
+    Given a list of title items from datacite, return 3-tuple (title,
+    original_language_title, subtitle).
+
+    Example input:
+
+        [
+            {
+                 "title": "Meeting Heterogeneity in Consumer Demand"
+            }
+        ]
+    """
+    title, original_language_title, subtitle = None, None, None
+
+    if titles is None:
+        return title, original_language_title, subtitle
+    if len(titles) == 0:
+        return title, original_language_title, subtitle
+    elif len(titles) == 1:
+        original_language_title = find_original_language_title(titles[0])
+        title = titles[0].get('title', '') or ''
+        title = title.strip()
+        if not title:
+            title = None
+        return title, original_language_title, subtitle
+    else:
+        for entry in titles:
+            if not title and ('titleType' not in entry
+                              or not entry.get('titleType')):
+                title = entry.get('title').strip()
+            if not subtitle and entry.get('titleType') == 'Subtitle':
+                subtitle = entry.get('title', '').strip()
+            if not original_language_title:
+                original_language_title = find_original_language_title(entry)
+
+    return title, original_language_title, subtitle
+
+
+def parse_datacite_dates(dates):
+    """
+    Given a list of date fields (under .dates), return tuple, (release_date,
+    release_year).
+    """
+    release_date, release_year = None, None
+
+    if not dates:
+        return release_date, release_year
+
+    if not isinstance(dates, list):
+        raise ValueError('expected a list of date items')
+
+    # Ignored: Collected, Issued.
+    date_type_prio = (
+        'Valid',
+        'Available',
+        'Accepted',
+        'Submitted',
+        'Copyrighted',
+        'Created',
+        'Updated',
+    )
+
+    # Before using (expensive) dateparser, try a few common patterns.
+    common_patterns = ('%Y-%m-%d', '%Y-%m', '%Y-%m-%dT%H:%M:%SZ',
+                       '%Y-%m-%dT%H:%M:%S', '%Y')
+
+    def parse_item(item):
+        result, value, year_only = None, item.get('date', ''), False
+        release_date, release_year = None, None
+
+        for pattern in common_patterns:
+            try:
+                result = datetime.datetime.strptime(value, pattern)
+            except ValueError:
+                continue
+            else:
+                if pattern == '%Y':
+                    year_only = True
+                break
+
+        if result is None:
+            print('fallback for {}'.format(value), file=sys.stderr)
+            try:
+                result = dateparser.parse(value)
+            except TypeError as err:
+                print("{} date parsing failed with: {}".format(value, err),
+                      file=sys.stderr)
+                return result_date, result_year
+
+        if result is None:
+            # Unparsable date.
+            return release_date, release_year
+
+        if not year_only:
+            release_date = result.date()
+        release_year = result.year
+
+        return release_date, release_year
+
+    for prio in date_type_prio:
+        for item in dates:
+            if not item.get('dateType') == prio:
+                continue
+
+            release_date, release_year = parse_item(item)
+            if release_date is None and release_year is None:
+                continue
+
+            if release_year < 1000 or release_year > datetime.date.today(
+            ).year + 5:
+                # Skip possibly bogus dates.
+                release_year = None
+                continue
+            break
+        else:
+            continue
+        break
+
+    if release_date is None and release_year is None:
+        for item in dates:
+            release_date, release_year = parse_item(item)
+            if release_year or release_date:
+                break
+
+    return release_date, release_year
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index 9c542fc6..ab67a310 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -2,10 +2,12 @@
 Test datacite importer.
 """
 
+import collections
 import datetime
 import pytest
 import gzip
 from fatcat_tools.importers import DataciteImporter, JsonLinePusher
+from fatcat_tools.importers.datacite import find_original_language_title, parse_datacite_titles, parse_datacite_dates
 from fixtures import api
 import json
 
@@ -22,7 +24,6 @@ def datacite_importer_existing(api):
         yield DataciteImporter(api, issn_file, extid_map_file='tests/files/example_map.sqlite3',
                                bezerk_mode=False)
 
-
 @pytest.mark.skip(reason="larger datacite import slows tests down")
 def test_datacite_importer_huge(datacite_importer):
     last_index = datacite_importer.api.get_changelog(limit=1)[0].index
@@ -35,6 +36,179 @@ def test_datacite_importer_huge(datacite_importer):
     assert len(release.contribs) == 3
 
 
+def test_find_original_language_title():
+    """
+    Original language might be included, in various ways.
+    """
+    Case = collections.namedtuple('Case', 'about input result')
+    cases = [
+        Case('defaults to None', {}, None),
+        Case('ignore unknown keys', {'broken': 'kv'}, None),
+        Case('just a title', {'title': 'Noise Reduction'}, None),
+        Case('same title should be ignored', {
+            'title': 'Noise Reduction',
+            'original_language_title': 'Noise Reduction'
+        }, None),
+        Case('empty subdict is ignored', {
+            'title': 'Noise Reduction',
+            'original_language_title': {},
+        }, None),
+        Case('unknown subdict keys are ignored', {
+            'title': 'Noise Reduction',
+            'original_language_title': {'broken': 'kv'},
+        }, None),
+        Case('original string', {
+            'title': 'Noise Reduction',
+            'original_language_title': 'Подавление шума',
+        }, 'Подавление шума'),
+        Case('language tag is ignored, since its broken', {
+            'title': 'Noise Reduction',
+            'original_language_title': {
+                'language': 'ja',
+                '__content__': 'Noise Reduction'
+            },
+        }, None),
+        Case('do not care about language', {
+            'title': 'Noise Reduction',
+            'original_language_title': {
+                'language': 'ja',
+                '__content__': 'Rauschunterdrückung',
+            },
+        }, 'Rauschunterdrückung'),
+        Case('ignore excessive questionmarks', {
+            'title': 'Noise Reduction',
+            'original_language_title': {
+                'language': 'ja',
+                '__content__': '???? However',
+            },
+        }, None),
+    ]
+
+    for case in cases:
+        result = find_original_language_title(case.input)
+        assert result == case.result
+
+def test_parse_datacite_titles():
+    """
+    Given a list of titles, find title, original_language_title and subtitle.
+    Result is a 3-tuple of title, original_language_title, subtitle.
+    """
+    Case = collections.namedtuple('Case', 'about input result')
+    cases = [
+        Case('handle None', None, (None, None, None)),
+        Case('empty list', [], (None, None, None)),
+        Case('empty item', [{}], (None, None, None)),
+        Case('broken keys', [{'broken': 'kv'}], (None, None, None)),
+        Case('title only', [{'title': 'Total carbon dioxide'}],
+             ('Total carbon dioxide', None, None),
+        ),
+        Case('title and subtitle', [
+            {'title': 'Total carbon dioxide'},
+            {'title': 'Station TT043_7-9', 'titleType': 'Subtitle'},
+        ],
+             ('Total carbon dioxide', None, 'Station TT043_7-9'),
+        ),
+        Case('title, subtitle order does not matter', [
+            {'title': 'Station TT043_7-9', 'titleType': 'Subtitle'},
+            {'title': 'Total carbon dioxide'},
+        ],
+             ('Total carbon dioxide', None, 'Station TT043_7-9'),
+        ),
+        Case('multiple titles, first wins', [
+            {'title': 'Total carbon dioxide'},
+            {'title': 'Meeting Heterogeneity'},
+        ],
+             ('Total carbon dioxide', None, None),
+        ),
+        Case('multiple titles, plus sub', [
+            {'title': 'Total carbon dioxide'},
+            {'title': 'Meeting Heterogeneity'},
+            {'title': 'Station TT043_7-9', 'titleType': 'Subtitle'},
+        ],
+             ('Total carbon dioxide', None, 'Station TT043_7-9'),
+        ),
+        Case('multiple titles, multiple subs', [
+            {'title': 'Total carbon dioxide'},
+            {'title': 'Meeting Heterogeneity'},
+            {'title': 'Station TT043_7-9', 'titleType': 'Subtitle'},
+            {'title': 'Some other subtitle', 'titleType': 'Subtitle'},
+        ],
+             ('Total carbon dioxide', None, 'Station TT043_7-9'),
+        ),
+        Case('title, original, sub', [
+            {'title': 'Total carbon dioxide', 'original_language_title': 'Всего углекислого газа'},
+            {'title': 'Station TT043_7-9', 'titleType': 'Subtitle'},
+        ],
+             ('Total carbon dioxide', 'Всего углекислого газа', 'Station TT043_7-9'),
+        ),
+        Case('title, original same as title, sub', [
+            {'title': 'Total carbon dioxide', 'original_language_title': {
+                '__content__': 'Total carbon dioxide',
+            }},
+            {'title': 'Station TT043_7-9', 'titleType': 'Subtitle'},
+        ],
+             ('Total carbon dioxide', None, 'Station TT043_7-9'),
+        ),
+        Case('title, original dict, sub', [
+            {'title': 'Total carbon dioxide', 'original_language_title': {
+                '__content__': 'Всего углекислого газа',
+            }},
+            {'title': 'Station TT043_7-9', 'titleType': 'Subtitle'},
+        ],
+             ('Total carbon dioxide', 'Всего углекислого газа', 'Station TT043_7-9'),
+        ),
+    ]
+
+    for case in cases:
+        result = parse_datacite_titles(case.input)
+        assert result == case.result, case.about
+
+def test_parse_datacite_dates():
+    """
+    Test datacite date parsing.
+    """
+    Case = collections.namedtuple('Case', 'about input result')
+    cases = [
+        Case('None is None', None, (None, None)),
+        Case('empty list is None', [], (None, None)),
+        Case('empty item is None', [{}], (None, None)),
+        Case('empty item is None', [{'date': '2019'}], (None, 2019)),
+        Case('first wins', [{'date': '2019'}, {'date': '2020'}], (None, 2019)),
+        Case('skip bogus year', [{'date': 'abc'}, {'date': '2020'}], (None, 2020)),
+        Case('first with type', [
+            {'date': '2019', 'dateType': 'Accepted'}, {'date': '2020'}
+        ], (None, 2019)),
+        Case('full date', [
+            {'date': '2019-12-01', 'dateType': 'Valid'},
+        ], (datetime.date(2019, 12, 1), 2019)),
+        Case('date type prio', [
+            {'date': '2000-12-01', 'dateType': 'Valid'},
+            {'date': '2010-01-01', 'dateType': 'Updated'},
+        ], (datetime.date(2000, 12, 1), 2000)),
+        Case('date type prio, Available > Updated', [
+            {'date': '2010-01-01', 'dateType': 'Updated'},
+            {'date': '2000-12-01', 'dateType': 'Available'},
+        ], (datetime.date(2000, 12, 1), 2000)),
+        Case('allow different date formats, Available > Updated', [
+            {'date': '2010-01-01T10:00:00', 'dateType': 'Updated'},
+            {'date': '2000-12-01T10:00:00', 'dateType': 'Available'},
+        ], (datetime.date(2000, 12, 1), 2000)),
+        Case('allow different date formats, Available > Updated', [
+            {'date': '2010-01-01T10:00:00Z', 'dateType': 'Updated'},
+            {'date': '2000-12-01T10:00:00Z', 'dateType': 'Available'},
+        ], (datetime.date(2000, 12, 1), 2000)),
+        Case('allow fuzzy date formats, Available > Updated', [
+            {'date': '2010', 'dateType': 'Updated'},
+            {'date': '2000 Dec 01', 'dateType': 'Available'},
+        ], (datetime.date(2000, 12, 1), 2000)),
+        Case('ignore broken date', [
+            {'date': 'Febrrr 45', 'dateType': 'Updated'},
+        ], (None, None)),
+    ]
+    for case in cases:
+        result = parse_datacite_dates(case.input)
+        assert result == case.result, case.about
+
 def test_datacite_importer(datacite_importer):
     last_index = datacite_importer.api.get_changelog(limit=1)[0].index
     with open('tests/files/datacite_sample.jsonl', 'r') as f:
@@ -75,7 +249,7 @@ def test_datacite_dict_parse(datacite_importer):
         assert r.release_type == "article"
         assert r.release_stage == "published"
         assert r.license_slug == None
-        assert r.original_title == "Triticum turgidum L. subsp. durum (Desf.) Husn. 97090"
+        assert r.original_title == None
         assert r.ext_ids.doi == "10.18730/8dym9"
         assert r.ext_ids.isbn13 == None
         assert r.language == "enc"
-- 
cgit v1.2.3


From 9a2a7e35948e350aaf40b07d4d4427d288970d3f Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Thu, 26 Dec 2019 23:52:40 +0100
Subject: datacite: adjust tests

---
 python/tests/import_datacite.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'python/tests')

diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index ab67a310..bc47a185 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -252,7 +252,7 @@ def test_datacite_dict_parse(datacite_importer):
         assert r.original_title == None
         assert r.ext_ids.doi == "10.18730/8dym9"
         assert r.ext_ids.isbn13 == None
-        assert r.language == "enc"
+        assert r.language == "en"
         assert r.subtitle == None
         assert r.release_date == None
         assert r.release_year == 1986
@@ -262,7 +262,6 @@ def test_datacite_dict_parse(datacite_importer):
         assert 'funder' not in r.extra['datacite']
         # matched by ISSN, so shouldn't be in there
         #assert extra['container_name'] == "International Journal of Quantum Chemistry"
-        assert r.extra['datacite']['url'] == 'https://ssl.fao.org/glis/doi/10.18730/8DYM9'
         assert r.extra['datacite']['subjects'] == [{'subject': 'Plant Genetic Resource for Food and Agriculture'}]
         assert len(r.abstracts) == 1
         assert len(r.abstracts[0].content) == 421
-- 
cgit v1.2.3


From 96e38edde79735b4080ec08d57e9f54759e97b61 Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Thu, 2 Jan 2020 17:35:54 +0100
Subject: datacite: add conversion fixtures

The `test_datacite_conversions` function will compare an input
(datacite) document to an expected output (release entity as JSON). This
way, it should not be too hard to add more cases by adding: input,
output - and by increasing the counter in the range loop within the
test.

To view input and result side by side with vim, change into the test
directory and run:

    tests/files/datacite $ ./caseview.sh 18
---
 python/tests/files/datacite/caseview.sh            |  17 +
 python/tests/files/datacite/datacite_doc_00.json   | 140 +++++
 python/tests/files/datacite/datacite_doc_01.json   |  81 +++
 python/tests/files/datacite/datacite_doc_02.json   |  85 +++
 python/tests/files/datacite/datacite_doc_03.json   |  70 +++
 python/tests/files/datacite/datacite_doc_04.json   |  80 +++
 python/tests/files/datacite/datacite_doc_05.json   | 598 +++++++++++++++++++++
 python/tests/files/datacite/datacite_doc_06.json   |  83 +++
 python/tests/files/datacite/datacite_doc_07.json   | 120 +++++
 python/tests/files/datacite/datacite_doc_08.json   | 105 ++++
 python/tests/files/datacite/datacite_doc_09.json   | 130 +++++
 python/tests/files/datacite/datacite_doc_10.json   |  83 +++
 python/tests/files/datacite/datacite_doc_11.json   |  86 +++
 python/tests/files/datacite/datacite_doc_12.json   | 103 ++++
 python/tests/files/datacite/datacite_doc_13.json   |  86 +++
 python/tests/files/datacite/datacite_doc_14.json   | 166 ++++++
 python/tests/files/datacite/datacite_doc_15.json   |  79 +++
 python/tests/files/datacite/datacite_doc_16.json   |  80 +++
 python/tests/files/datacite/datacite_doc_17.json   |  72 +++
 python/tests/files/datacite/datacite_doc_18.json   |  79 +++
 python/tests/files/datacite/datacite_doc_19.json   |  79 +++
 python/tests/files/datacite/datacite_doc_20.json   |  42 ++
 python/tests/files/datacite/datacite_doc_21.json   |  42 ++
 python/tests/files/datacite/datacite_doc_22.json   |  44 ++
 python/tests/files/datacite/datacite_doc_23.json   |  44 ++
 .../tests/files/datacite/datacite_result_00.json   |  87 +++
 .../tests/files/datacite/datacite_result_01.json   |  32 ++
 .../tests/files/datacite/datacite_result_02.json   |  36 ++
 .../tests/files/datacite/datacite_result_03.json   |  19 +
 .../tests/files/datacite/datacite_result_04.json   |  28 +
 .../tests/files/datacite/datacite_result_05.json   | 530 ++++++++++++++++++
 .../tests/files/datacite/datacite_result_06.json   |  26 +
 .../tests/files/datacite/datacite_result_07.json   |  73 +++
 .../tests/files/datacite/datacite_result_08.json   |  53 ++
 .../tests/files/datacite/datacite_result_09.json   |  35 ++
 .../tests/files/datacite/datacite_result_10.json   |  32 ++
 .../tests/files/datacite/datacite_result_11.json   |  21 +
 .../tests/files/datacite/datacite_result_12.json   |  44 ++
 .../tests/files/datacite/datacite_result_13.json   |  28 +
 .../tests/files/datacite/datacite_result_14.json   | 110 ++++
 .../tests/files/datacite/datacite_result_15.json   |  22 +
 .../tests/files/datacite/datacite_result_16.json   |  31 ++
 .../tests/files/datacite/datacite_result_17.json   |  20 +
 .../tests/files/datacite/datacite_result_18.json   |  15 +
 .../tests/files/datacite/datacite_result_19.json   |  15 +
 .../tests/files/datacite/datacite_result_20.json   |  14 +
 .../tests/files/datacite/datacite_result_21.json   |  15 +
 .../tests/files/datacite/datacite_result_22.json   |  22 +
 .../tests/files/datacite/datacite_result_23.json   |  22 +
 python/tests/import_datacite.py                    |  26 +-
 50 files changed, 3949 insertions(+), 1 deletion(-)
 create mode 100755 python/tests/files/datacite/caseview.sh
 create mode 100644 python/tests/files/datacite/datacite_doc_00.json
 create mode 100644 python/tests/files/datacite/datacite_doc_01.json
 create mode 100644 python/tests/files/datacite/datacite_doc_02.json
 create mode 100644 python/tests/files/datacite/datacite_doc_03.json
 create mode 100644 python/tests/files/datacite/datacite_doc_04.json
 create mode 100644 python/tests/files/datacite/datacite_doc_05.json
 create mode 100644 python/tests/files/datacite/datacite_doc_06.json
 create mode 100644 python/tests/files/datacite/datacite_doc_07.json
 create mode 100644 python/tests/files/datacite/datacite_doc_08.json
 create mode 100644 python/tests/files/datacite/datacite_doc_09.json
 create mode 100644 python/tests/files/datacite/datacite_doc_10.json
 create mode 100644 python/tests/files/datacite/datacite_doc_11.json
 create mode 100644 python/tests/files/datacite/datacite_doc_12.json
 create mode 100644 python/tests/files/datacite/datacite_doc_13.json
 create mode 100644 python/tests/files/datacite/datacite_doc_14.json
 create mode 100644 python/tests/files/datacite/datacite_doc_15.json
 create mode 100644 python/tests/files/datacite/datacite_doc_16.json
 create mode 100644 python/tests/files/datacite/datacite_doc_17.json
 create mode 100644 python/tests/files/datacite/datacite_doc_18.json
 create mode 100644 python/tests/files/datacite/datacite_doc_19.json
 create mode 100644 python/tests/files/datacite/datacite_doc_20.json
 create mode 100644 python/tests/files/datacite/datacite_doc_21.json
 create mode 100644 python/tests/files/datacite/datacite_doc_22.json
 create mode 100644 python/tests/files/datacite/datacite_doc_23.json
 create mode 100644 python/tests/files/datacite/datacite_result_00.json
 create mode 100644 python/tests/files/datacite/datacite_result_01.json
 create mode 100644 python/tests/files/datacite/datacite_result_02.json
 create mode 100644 python/tests/files/datacite/datacite_result_03.json
 create mode 100644 python/tests/files/datacite/datacite_result_04.json
 create mode 100644 python/tests/files/datacite/datacite_result_05.json
 create mode 100644 python/tests/files/datacite/datacite_result_06.json
 create mode 100644 python/tests/files/datacite/datacite_result_07.json
 create mode 100644 python/tests/files/datacite/datacite_result_08.json
 create mode 100644 python/tests/files/datacite/datacite_result_09.json
 create mode 100644 python/tests/files/datacite/datacite_result_10.json
 create mode 100644 python/tests/files/datacite/datacite_result_11.json
 create mode 100644 python/tests/files/datacite/datacite_result_12.json
 create mode 100644 python/tests/files/datacite/datacite_result_13.json
 create mode 100644 python/tests/files/datacite/datacite_result_14.json
 create mode 100644 python/tests/files/datacite/datacite_result_15.json
 create mode 100644 python/tests/files/datacite/datacite_result_16.json
 create mode 100644 python/tests/files/datacite/datacite_result_17.json
 create mode 100644 python/tests/files/datacite/datacite_result_18.json
 create mode 100644 python/tests/files/datacite/datacite_result_19.json
 create mode 100644 python/tests/files/datacite/datacite_result_20.json
 create mode 100644 python/tests/files/datacite/datacite_result_21.json
 create mode 100644 python/tests/files/datacite/datacite_result_22.json
 create mode 100644 python/tests/files/datacite/datacite_result_23.json

(limited to 'python/tests')

diff --git a/python/tests/files/datacite/caseview.sh b/python/tests/files/datacite/caseview.sh
new file mode 100755
index 00000000..d1e98c04
--- /dev/null
+++ b/python/tests/files/datacite/caseview.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Open input and output in vertical vim split.
+#
+# $ caseview 13
+#
+view() {
+    if [ -z "$1" ]; then
+        echo usage: "$0" CASE-NUMBER
+        exit 1
+    else
+        padded=$(printf "%02d\n" "$1")
+        vim -O "datacite_doc_$padded.json" "datacite_result_$padded.json"
+    fi
+}
+
+view "$@"
diff --git a/python/tests/files/datacite/datacite_doc_00.json b/python/tests/files/datacite/datacite_doc_00.json
new file mode 100644
index 00000000..248f525f
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_00.json
@@ -0,0 +1,140 @@
+{
+  "id": "10.1007/s10870-008-9413-z",
+  "type": "dois",
+  "attributes": {
+    "doi": "10.1007/s10870-008-9413-z",
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.1007/s10870-008-9413-z",
+        "identifierType": "DOI"
+      },
+      {
+        "identifier": "s10870-008-9413-z",
+        "identifierType": "Publisher ID"
+      }
+    ],
+    "creators": [
+      {
+        "name": "Li, Qian-Jin",
+        "nameType": "Personal",
+        "givenName": "Qian-Jin",
+        "familyName": "Li",
+        "affiliation": []
+      },
+      {
+        "name": "Yang, Chun-Long",
+        "nameType": "Personal",
+        "givenName": "Chun-Long",
+        "familyName": "Yang",
+        "affiliation": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "Synthesis and Crystal Structure of a Compound with Two Conformational Isomers: N-(2-methylbenzoyl)-N′-(4-nitrophenyl)thiourea"
+      }
+    ],
+    "publisher": "Springer Science and Business Media LLC",
+    "container": {
+      "type": "Journal",
+      "issue": "12",
+      "title": "Journal of Chemical Crystallography",
+      "volume": "38",
+      "lastPage": "930",
+      "firstPage": "927",
+      "identifier": "1074-1542",
+      "identifierType": "ISSN"
+    },
+    "publicationYear": 2008,
+    "subjects": [],
+    "contributors": [],
+    "dates": [
+      {
+        "date": "2008-05-30",
+        "dateType": "Issued"
+      },
+      {
+        "date": "2019-05-31T04:04:23Z",
+        "dateType": "Updated"
+      }
+    ],
+    "language": null,
+    "types": {
+      "ris": "JOUR",
+      "bibtex": "article",
+      "citeproc": "article-journal",
+      "schemaOrg": "ScholarlyArticle",
+      "resourceType": "JournalArticle",
+      "resourceTypeGeneral": "Text"
+    },
+    "relatedIdentifiers": [
+      {
+        "relationType": "IsPartOf",
+        "relatedIdentifier": "1074-1542",
+        "resourceTypeGeneral": "Collection",
+        "relatedIdentifierType": "ISSN"
+      },
+      {
+        "relationType": "References",
+        "relatedIdentifier": "10.1016/j.bmcl.2005.09.033",
+        "relatedIdentifierType": "DOI"
+      },
+      {
+        "relationType": "References",
+        "relatedIdentifier": "10.1016/s0022-1139(02)00330-5",
+        "relatedIdentifierType": "DOI"
+      },
+      {
+        "relationType": "References",
+        "relatedIdentifier": "10.1016/s0010-8545(01)00337-x",
+        "relatedIdentifierType": "DOI"
+      },
+      {
+        "relationType": "References",
+        "relatedIdentifier": "10.1016/j.tetlet.2005.06.135",
+        "relatedIdentifierType": "DOI"
+      },
+      {
+        "relationType": "References",
+        "relatedIdentifier": "10.1039/p298700000s1",
+        "relatedIdentifierType": "DOI"
+      },
+      {
+        "relationType": "References",
+        "relatedIdentifier": "10.1002/anie.199515551",
+        "relatedIdentifierType": "DOI"
+      }
+    ],
+    "sizes": [],
+    "formats": [],
+    "version": null,
+    "rightsList": [
+      {
+        "rightsUri": "http://www.springer.com/tdm"
+      }
+    ],
+    "descriptions": [],
+    "geoLocations": [],
+    "fundingReferences": [],
+    "url": "http://link.springer.com/10.1007/s10870-008-9413-z",
+    "contentUrl": null,
+    "metadataVersion": 1,
+    "schemaVersion": "http://datacite.org/schema/kernel-4",
+    "source": "levriero",
+    "isActive": true,
+    "state": "findable",
+    "reason": null,
+    "created": "2019-06-18T14:52:19.000Z",
+    "registered": null,
+    "published": "2008",
+    "updated": "2019-08-03T00:03:40.000Z"
+  },
+  "relationships": {
+    "client": {
+      "data": {
+        "id": "crossref.citations",
+        "type": "clients"
+      }
+    }
+  }
+}
diff --git a/python/tests/files/datacite/datacite_doc_01.json b/python/tests/files/datacite/datacite_doc_01.json
new file mode 100644
index 00000000..c4ef6e45
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_01.json
@@ -0,0 +1,81 @@
+{
+  "id": "10.11588/diglit.25558.39",
+  "type": "dois",
+  "attributes": {
+    "doi": "10.11588/diglit.25558.39",
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.11588/diglit.25558.39",
+        "identifierType": "DOI"
+      }
+    ],
+    "creators": [
+      {
+        "name": "Dargenty, G.",
+        "nameType": "Personal",
+        "givenName": "G.",
+        "familyName": "Dargenty",
+        "affiliation": []
+      }
+    ],
+    "titles": [
+      {
+        "lang": "de",
+        "title": "Ferdinand Gaillard, [1]: né à Paris le 16 janvier 1834, mort à Paris le 19 janvier 1887"
+      }
+    ],
+    "publisher": "University Library Heidelberg",
+    "container": {},
+    "publicationYear": 1887,
+    "subjects": [],
+    "contributors": [],
+    "dates": [
+      {
+        "date": "1887",
+        "dateType": "Issued"
+      }
+    ],
+    "language": "fre",
+    "types": {
+      "ris": "RPRT",
+      "bibtex": "article",
+      "citeproc": "article-journal",
+      "schemaOrg": "ScholarlyArticle",
+      "resourceType": "DigitalisatDigital copy",
+      "resourceTypeGeneral": "Text"
+    },
+    "relatedIdentifiers": [],
+    "sizes": [],
+    "formats": [],
+    "version": null,
+    "rightsList": [
+      {
+        "lang": "de",
+        "rights": "Standard (Creative Commons - Namensnennung - Weitergabe unter gleichen Bedingungen) - http://www.ub.uni-heidelberg.de/helios/digi/nutzung/Welcome.html"
+      }
+    ],
+    "descriptions": [],
+    "geoLocations": [],
+    "fundingReferences": [],
+    "url": "http://digi.ub.uni-heidelberg.de/diglit/art1887_1/0172",
+    "contentUrl": null,
+    "metadataVersion": 4,
+    "schemaVersion": "http://datacite.org/schema/kernel-4",
+    "source": null,
+    "isActive": true,
+    "state": "findable",
+    "reason": null,
+    "created": "2016-12-08T07:43:15.000Z",
+    "registered": "2016-12-08T07:43:15.000Z",
+    "published": "1887",
+    "updated": "2019-08-02T14:27:33.000Z"
+  },
+  "relationships": {
+    "client": {
+      "data": {
+        "id": "gesis.ubhd",
+        "type": "clients"
+      }
+    }
+  }
+}
diff --git a/python/tests/files/datacite/datacite_doc_02.json b/python/tests/files/datacite/datacite_doc_02.json
new file mode 100644
index 00000000..8b9a594e
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_02.json
@@ -0,0 +1,85 @@
+{
+  "id": "10.11588/diglit.37715.57",
+  "type": "dois",
+  "attributes": {
+    "doi": "10.11588/diglit.37715.57",
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.11588/diglit.37715.57",
+        "identifierType": "DOI"
+      }
+    ],
+    "creators": [
+      {
+        "name": "Weyersberg, Albert",
+        "nameType": "Personal",
+        "givenName": "Albert",
+        "familyName": "Weyersberg",
+        "affiliation": []
+      }
+    ],
+    "titles": [
+      {
+        "lang": "de",
+        "title": "Solinger Schwertschmiede-Familien, [4]"
+      }
+    ],
+    "publisher": "University Library Heidelberg",
+    "container": {},
+    "publicationYear": 1897,
+    "subjects": [],
+    "contributors": [],
+    "dates": [
+      {
+        "date": "1897",
+        "dateType": "Issued"
+      }
+    ],
+    "language": "ger",
+    "types": {
+      "ris": "RPRT",
+      "bibtex": "article",
+      "citeproc": "article-journal",
+      "schemaOrg": "ScholarlyArticle",
+      "resourceType": "DigitalisatDigital copy",
+      "resourceTypeGeneral": "Text"
+    },
+    "relatedIdentifiers": [],
+    "sizes": [],
+    "formats": [],
+    "version": null,
+    "rightsList": [
+      {
+        "lang": "de",
+        "rights": "Creative Commons - Namensnennung - Weitergabe unter gleichen Bedingungen - https://creativecommons.org/licenses/by-sa/3.0/de/"
+      },
+      {
+        "lang": "en",
+        "rights": "Creative Commons - Namensnennung - Weitergabe unter gleichen Bedingungen - https://creativecommons.org/licenses/by-sa/3.0/"
+      }
+    ],
+    "descriptions": [],
+    "geoLocations": [],
+    "fundingReferences": [],
+    "url": "https://digi.ub.uni-heidelberg.de/diglit/zhwk1897_1899/0131",
+    "contentUrl": null,
+    "metadataVersion": 2,
+    "schemaVersion": "http://datacite.org/schema/kernel-4",
+    "source": "mds",
+    "isActive": true,
+    "state": "findable",
+    "reason": null,
+    "created": "2018-11-29T12:04:12.000Z",
+    "registered": "2018-11-29T12:04:13.000Z",
+    "published": "1897",
+    "updated": "2019-08-02T21:31:04.000Z"
+  },
+  "relationships": {
+    "client": {
+      "data": {
+        "id": "gesis.ubhd",
+        "type": "clients"
+      }
+    }
+  }
+}
diff --git a/python/tests/files/datacite/datacite_doc_03.json b/python/tests/files/datacite/datacite_doc_03.json
new file mode 100644
index 00000000..e77a359c
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_03.json
@@ -0,0 +1,70 @@
+{
+  "id": "10.13140/rg.2.2.30434.53446",
+  "type": "dois",
+  "attributes": {
+    "doi": "10.13140/rg.2.2.30434.53446",
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.13140/rg.2.2.30434.53446",
+        "identifierType": "DOI"
+      }
+    ],
+    "creators": [
+      {
+        "name": "Mastura Yahya",
+        "affiliation": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "midterm ah30903"
+      }
+    ],
+    "publisher": "Unpublished",
+    "container": {},
+    "publicationYear": 2016,
+    "subjects": [],
+    "contributors": [],
+    "dates": [
+      {
+        "date": "2016",
+        "dateType": "Issued"
+      }
+    ],
+    "language": "ms",
+    "types": {
+      "ris": "GEN",
+      "bibtex": "misc",
+      "citeproc": "article",
+      "schemaOrg": "CreativeWork"
+    },
+    "relatedIdentifiers": [],
+    "sizes": [],
+    "formats": [],
+    "version": null,
+    "rightsList": [],
+    "descriptions": [],
+    "geoLocations": [],
+    "fundingReferences": [],
+    "url": "http://rgdoi.net/10.13140/RG.2.2.30434.53446",
+    "contentUrl": null,
+    "metadataVersion": 0,
+    "schemaVersion": "http://datacite.org/schema/kernel-3",
+    "source": null,
+    "isActive": true,
+    "state": "findable",
+    "reason": null,
+    "created": "2016-11-03T09:07:08.000Z",
+    "registered": "2016-11-03T09:07:09.000Z",
+    "published": "2016",
+    "updated": "2019-08-02T12:51:15.000Z"
+  },
+  "relationships": {
+    "client": {
+      "data": {
+        "id": "rg.rg",
+        "type": "clients"
+      }
+    }
+  }
+}
diff --git a/python/tests/files/datacite/datacite_doc_04.json b/python/tests/files/datacite/datacite_doc_04.json
new file mode 100644
index 00000000..8655a26a
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_04.json
@@ -0,0 +1,80 @@
+{
+  "id": "10.14288/1.0080520",
+  "type": "dois",
+  "attributes": {
+    "doi": "10.14288/1.0080520",
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.14288/1.0080520",
+        "identifierType": "DOI"
+      }
+    ],
+    "creators": [
+      {
+        "name": "Nicollerat, Marc Andre",
+        "nameType": "Personal",
+        "givenName": "Marc Andre",
+        "familyName": "Nicollerat",
+        "affiliation": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "On chain maps inducing isomorphisms in homology"
+      }
+    ],
+    "publisher": "University of British Columbia",
+    "container": {},
+    "publicationYear": 1973,
+    "subjects": [],
+    "contributors": [],
+    "dates": [
+      {
+        "date": "1973",
+        "dateType": "Issued"
+      }
+    ],
+    "language": "en",
+    "types": {
+      "ris": "RPRT",
+      "bibtex": "article",
+      "citeproc": "article-journal",
+      "schemaOrg": "ScholarlyArticle",
+      "resourceType": "Text",
+      "resourceTypeGeneral": "Text"
+    },
+    "relatedIdentifiers": [],
+    "sizes": [],
+    "formats": [],
+    "version": null,
+    "rightsList": [],
+    "descriptions": [
+      {
+        "description": "Let A be an abelian category, I the full subcategory of A consisting of injective objects of A, and K(A) the category whose objects are cochain complexes of elements of A, and whose morphisms are homotopy classes of cochain maps.  In (5), lemma 4.6., p. 42, R. Hartshorne has proved that, under certain conditions, a cochain complex X˙ ε. |KA)| can be embedded in a complex I˙ ε. |K(I)| in such a way that I˙ has the same cohomology as X˙.  In Chapter I we show that the construction given in the two first parts of Hartshorne's Lemma is natural i.e. there exists a functor  J : K(A) → K(I) and a natural transformation [formula omitted]  (where E : K(I) → K(A) is the embedding functor) such that [formula omitted] is  injective and induces isomorphism in cohomology. The question whether the construction given in the third part of the lemma is functorial is still open.  We also prove that J is left adjoint to E, so that K(I) is a reflective subcategory of K(A).  In the special case where A is a category [formula omitted] of left A-modules, and [formula omitted] the category of cochain complexes in [formula omitted] and cochain maps (not homotopy classes), we prove the existence of a functor [formula omitted]  In Chapter II we study the natural homomorphism [formula omitted]   where A, B are rings, and M, L, N modules or chain complexes. In particular we give several sufficient conditions under which v is an isomorphism, or induces isomorphism in homology.  In the appendix we give a detailed proof of Hartshorne's Lemma. We think that this is useful, as no complete proof is, to our knowledge, to be found in the literature.",
+        "descriptionType": "Abstract"
+      }
+    ],
+    "geoLocations": [],
+    "fundingReferences": [],
+    "url": "https://doi.library.ubc.ca/10.14288/1.0080520",
+    "contentUrl": null,
+    "metadataVersion": 5,
+    "schemaVersion": "http://datacite.org/schema/kernel-3",
+    "source": null,
+    "isActive": true,
+    "state": "findable",
+    "reason": null,
+    "created": "2015-11-11T11:12:34.000Z",
+    "registered": "2015-11-11T11:12:35.000Z",
+    "published": "1973",
+    "updated": "2019-08-02T09:43:14.000Z"
+  },
+  "relationships": {
+    "client": {
+      "data": {
+        "id": "cisti.ubc",
+        "type": "clients"
+      }
+    }
+  }
+}
diff --git a/python/tests/files/datacite/datacite_doc_05.json b/python/tests/files/datacite/datacite_doc_05.json
new file mode 100644
index 00000000..75e68e9d
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_05.json
@@ -0,0 +1,598 @@
+{
+  "id": "10.15156/bio/sh409843.07fu",
+  "type": "dois",
+  "attributes": {
+    "doi": "10.15156/bio/sh409843.07fu",
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.15156/bio/sh409843.07fu",
+        "identifierType": "DOI"
+      }
+    ],
+    "creators": [
+      {
+        "name": "Kõljalg, Urmas",
+        "nameType": "Personal",
+        "givenName": "Urmas",
+        "familyName": "Kõljalg",
+        "affiliation": []
+      },
+      {
+        "name": "Abarenkov, Kessy",
+        "nameType": "Personal",
+        "givenName": "Kessy",
+        "familyName": "Abarenkov",
+        "affiliation": []
+      },
+      {
+        "name": "Nilsson, R. Henrik",
+        "nameType": "Personal",
+        "givenName": "R. Henrik",
+        "familyName": "Nilsson",
+        "affiliation": []
+      },
+      {
+        "name": "Larsson, Karl-Henrik",
+        "nameType": "Personal",
+        "givenName": "Karl-Henrik",
+        "familyName": "Larsson",
+        "affiliation": []
+      },
+      {
+        "name": "Aas, Anders Bjørnsgard",
+        "nameType": "Personal",
+        "givenName": "Anders Bjørnsgard",
+        "familyName": "Aas",
+        "affiliation": []
+      },
+      {
+        "name": "Adams, Rachel",
+        "nameType": "Personal",
+        "givenName": "Rachel",
+        "familyName": "Adams",
+        "affiliation": []
+      },
+      {
+        "name": "Alves, Artur",
+        "nameType": "Personal",
+        "givenName": "Artur",
+        "familyName": "Alves",
+        "affiliation": []
+      },
+      {
+        "name": "Ammirati, Joseph F.",
+        "nameType": "Personal",
+        "givenName": "Joseph F.",
+        "familyName": "Ammirati",
+        "affiliation": []
+      },
+      {
+        "name": "Arnold, A. Elizabeth",
+        "nameType": "Personal",
+        "givenName": "A. Elizabeth",
+        "familyName": "Arnold",
+        "affiliation": []
+      },
+      {
+        "name": "Bahram, Mohammad",
+        "nameType": "Personal",
+        "givenName": "Mohammad",
+        "familyName": "Bahram",
+        "affiliation": []
+      },
+      {
+        "name": "Bengtsson-Palme, Johan",
+        "nameType": "Personal",
+        "givenName": "Johan",
+        "familyName": "Bengtsson-Palme",
+        "affiliation": []
+      },
+      {
+        "name": "Berlin, Anna",
+        "nameType": "Personal",
+        "givenName": "Anna",
+        "familyName": "Berlin",
+        "affiliation": []
+      },
+      {
+        "name": "Botnen, Synnøve",
+        "nameType": "Personal",
+        "givenName": "Synnøve",
+        "familyName": "Botnen",
+        "affiliation": []
+      },
+      {
+        "name": "Bourlat, Sarah",
+        "nameType": "Personal",
+        "givenName": "Sarah",
+        "familyName": "Bourlat",
+        "affiliation": []
+      },
+      {
+        "name": "Cheeke, Tanya",
+        "nameType": "Personal",
+        "givenName": "Tanya",
+        "familyName": "Cheeke",
+        "affiliation": []
+      },
+      {
+        "name": "Dima, Bálint",
+        "nameType": "Personal",
+        "givenName": "Bálint",
+        "familyName": "Dima",
+        "affiliation": []
+      },
+      {
+        "name": "Drenkhan, Rein",
+        "nameType": "Personal",
+        "givenName": "Rein",
+        "familyName": "Drenkhan",
+        "affiliation": []
+      },
+      {
+        "name": "Duarte, Camila",
+        "nameType": "Personal",
+        "givenName": "Camila",
+        "familyName": "Duarte",
+        "affiliation": []
+      },
+      {
+        "name": "Dueñas, Margarita",
+        "nameType": "Personal",
+        "givenName": "Margarita",
+        "familyName": "Dueñas",
+        "affiliation": []
+      },
+      {
+        "name": "Eberhardt, Ursula",
+        "nameType": "Personal",
+        "givenName": "Ursula",
+        "familyName": "Eberhardt",
+        "affiliation": []
+      },
+      {
+        "name": "Friberg, Hanna",
+        "nameType": "Personal",
+        "givenName": "Hanna",
+        "familyName": "Friberg",
+        "affiliation": []
+      },
+      {
+        "name": "Frøslev, Tobias G.",
+        "nameType": "Personal",
+        "givenName": "Tobias G.",
+        "familyName": "Frøslev",
+        "affiliation": []
+      },
+      {
+        "name": "Garnica, Sigisfredo",
+        "nameType": "Personal",
+        "givenName": "Sigisfredo",
+        "familyName": "Garnica",
+        "affiliation": []
+      },
+      {
+        "name": "Geml, József",
+        "nameType": "Personal",
+        "givenName": "József",
+        "familyName": "Geml",
+        "affiliation": []
+      },
+      {
+        "name": "Ghobad-Nejhad, Masoomeh",
+        "nameType": "Personal",
+        "givenName": "Masoomeh",
+        "familyName": "Ghobad-Nejhad",
+        "affiliation": []
+      },
+      {
+        "name": "Grebenc, Tine",
+        "nameType": "Personal",
+        "givenName": "Tine",
+        "familyName": "Grebenc",
+        "affiliation": []
+      },
+      {
+        "name": "Griffith, Gareth W.",
+        "nameType": "Personal",
+        "givenName": "Gareth W.",
+        "familyName": "Griffith",
+        "affiliation": []
+      },
+      {
+        "name": "Hampe, Felix",
+        "nameType": "Personal",
+        "givenName": "Felix",
+        "familyName": "Hampe",
+        "affiliation": []
+      },
+      {
+        "name": "Kennedy, Peter",
+        "nameType": "Personal",
+        "givenName": "Peter",
+        "familyName": "Kennedy",
+        "affiliation": []
+      },
+      {
+        "name": "Khomich, Maryia",
+        "nameType": "Personal",
+        "givenName": "Maryia",
+        "familyName": "Khomich",
+        "affiliation": []
+      },
+      {
+        "name": "Kohout, Petr",
+        "nameType": "Personal",
+        "givenName": "Petr",
+        "familyName": "Kohout",
+        "affiliation": []
+      },
+      {
+        "name": "Kollom, Anu",
+        "nameType": "Personal",
+        "givenName": "Anu",
+        "familyName": "Kollom",
+        "affiliation": []
+      },
+      {
+        "name": "Larsson, Ellen",
+        "nameType": "Personal",
+        "givenName": "Ellen",
+        "familyName": "Larsson",
+        "affiliation": []
+      },
+      {
+        "name": "Laszlo, Irinyi",
+        "nameType": "Personal",
+        "givenName": "Irinyi",
+        "familyName": "Laszlo",
+        "affiliation": []
+      },
+      {
+        "name": "Leavitt, Steven",
+        "nameType": "Personal",
+        "givenName": "Steven",
+        "familyName": "Leavitt",
+        "affiliation": []
+      },
+      {
+        "name": "Liimatainen, Kare",
+        "nameType": "Personal",
+        "givenName": "Kare",
+        "familyName": "Liimatainen",
+        "affiliation": []
+      },
+      {
+        "name": "Lindahl, Björn",
+        "nameType": "Personal",
+        "givenName": "Björn",
+        "familyName": "Lindahl",
+        "affiliation": []
+      },
+      {
+        "name": "Lodge, Deborah J.",
+        "nameType": "Personal",
+        "givenName": "Deborah J.",
+        "familyName": "Lodge",
+        "affiliation": []
+      },
+      {
+        "name": "Lumbsch, Helge Thorsten",
+        "nameType": "Personal",
+        "givenName": "Helge Thorsten",
+        "familyName": "Lumbsch",
+        "affiliation": []
+      },
+      {
+        "name": "Martín Esteban, María Paz",
+        "nameType": "Personal",
+        "givenName": "María Paz",
+        "familyName": "Martín Esteban",
+        "affiliation": []
+      },
+      {
+        "name": "Meyer, Wieland",
+        "nameType": "Personal",
+        "givenName": "Wieland",
+        "familyName": "Meyer",
+        "affiliation": []
+      },
+      {
+        "name": "Miettinen, Otto",
+        "nameType": "Personal",
+        "givenName": "Otto",
+        "familyName": "Miettinen",
+        "affiliation": []
+      },
+      {
+        "name": "Nguyen, Nhu",
+        "nameType": "Personal",
+        "givenName": "Nhu",
+        "familyName": "Nguyen",
+        "affiliation": []
+      },
+      {
+        "name": "Niskanen, Tuula",
+        "nameType": "Personal",
+        "givenName": "Tuula",
+        "familyName": "Niskanen",
+        "affiliation": []
+      },
+      {
+        "name": "Oono, Ryoko",
+        "nameType": "Personal",
+        "givenName": "Ryoko",
+        "familyName": "Oono",
+        "affiliation": []
+      },
+      {
+        "name": "Öpik, Maarja",
+        "nameType": "Personal",
+        "givenName": "Maarja",
+        "familyName": "Öpik",
+        "affiliation": []
+      },
+      {
+        "name": "Ordynets, Alexander",
+        "nameType": "Personal",
+        "givenName": "Alexander",
+        "familyName": "Ordynets",
+        "affiliation": []
+      },
+      {
+        "name": "Pawłowska, Julia",
+        "nameType": "Personal",
+        "givenName": "Julia",
+        "familyName": "Pawłowska",
+        "affiliation": []
+      },
+      {
+        "name": "Peintner, Ursula",
+        "nameType": "Personal",
+        "givenName": "Ursula",
+        "familyName": "Peintner",
+        "affiliation": []
+      },
+      {
+        "name": "Pereira, Olinto Liparini",
+        "nameType": "Personal",
+        "givenName": "Olinto Liparini",
+        "familyName": "Pereira",
+        "affiliation": []
+      },
+      {
+        "name": "Pinho, Danilo Batista",
+        "nameType": "Personal",
+        "givenName": "Danilo Batista",
+        "familyName": "Pinho",
+        "affiliation": []
+      },
+      {
+        "name": "Põldmaa, Kadri",
+        "nameType": "Personal",
+        "givenName": "Kadri",
+        "familyName": "Põldmaa",
+        "affiliation": []
+      },
+      {
+        "name": "Runnel, Kadri",
+        "nameType": "Personal",
+        "givenName": "Kadri",
+        "familyName": "Runnel",
+        "affiliation": []
+      },
+      {
+        "name": "Ryberg, Martin",
+        "nameType": "Personal",
+        "givenName": "Martin",
+        "familyName": "Ryberg",
+        "affiliation": []
+      },
+      {
+        "name": "Saar, Irja",
+        "nameType": "Personal",
+        "givenName": "Irja",
+        "familyName": "Saar",
+        "affiliation": []
+      },
+      {
+        "name": "Sanli, Kemal",
+        "nameType": "Personal",
+        "givenName": "Kemal",
+        "familyName": "Sanli",
+        "affiliation": []
+      },
+      {
+        "name": "Scott, James",
+        "nameType": "Personal",
+        "givenName": "James",
+        "familyName": "Scott",
+        "affiliation": []
+      },
+      {
+        "name": "Spirin, Viacheslav",
+        "nameType": "Personal",
+        "givenName": "Viacheslav",
+        "familyName": "Spirin",
+        "affiliation": []
+      },
+      {
+        "name": "Suija, Ave",
+        "nameType": "Personal",
+        "givenName": "Ave",
+        "familyName": "Suija",
+        "affiliation": []
+      },
+      {
+        "name": "Svantesson, Sten",
+        "nameType": "Personal",
+        "givenName": "Sten",
+        "familyName": "Svantesson",
+        "affiliation": []
+      },
+      {
+        "name": "Tadych, Mariusz",
+        "nameType": "Personal",
+        "givenName": "Mariusz",
+        "familyName": "Tadych",
+        "affiliation": []
+      },
+      {
+        "name": "Takamatsu, Susumu",
+        "nameType": "Personal",
+        "givenName": "Susumu",
+        "familyName": "Takamatsu",
+        "affiliation": []
+      },
+      {
+        "name": "Tamm, Heidi",
+        "nameType": "Personal",
+        "givenName": "Heidi",
+        "familyName": "Tamm",
+        "affiliation": []
+      },
+      {
+        "name": "Taylor, AFS.",
+        "nameType": "Personal",
+        "givenName": "AFS.",
+        "familyName": "Taylor",
+        "affiliation": []
+      },
+      {
+        "name": "Tedersoo, Leho",
+        "nameType": "Personal",
+        "givenName": "Leho",
+        "familyName": "Tedersoo",
+        "affiliation": []
+      },
+      {
+        "name": "Telleria, M.T.",
+        "nameType": "Personal",
+        "givenName": "M.T.",
+        "familyName": "Telleria",
+        "affiliation": []
+      },
+      {
+        "name": "Udayanga, Dhanushka",
+        "nameType": "Personal",
+        "givenName": "Dhanushka",
+        "familyName": "Udayanga",
+        "affiliation": []
+      },
+      {
+        "name": "Unterseher, Martin",
+        "nameType": "Personal",
+        "givenName": "Martin",
+        "familyName": "Unterseher",
+        "affiliation": []
+      },
+      {
+        "name": "Volobuev, Sergey",
+        "nameType": "Personal",
+        "givenName": "Sergey",
+        "familyName": "Volobuev",
+        "affiliation": []
+      },
+      {
+        "name": "Weiss, Michael",
+        "nameType": "Personal",
+        "givenName": "Michael",
+        "familyName": "Weiss",
+        "affiliation": []
+      },
+      {
+        "name": "Wurzbacher, Christian",
+        "nameType": "Personal",
+        "givenName": "Christian",
+        "familyName": "Wurzbacher",
+        "affiliation": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "SH409843.07FU"
+      },
+      {
+        "title": "Gomphales",
+        "titleType": "Subtitle"
+      }
+    ],
+    "publisher": "UNITE Community",
+    "container": {},
+    "publicationYear": 2015,
+    "subjects": [],
+    "contributors": [
+      {
+        "name": "Kessy Abarenkov",
+        "affiliation": []
+      },
+      {
+        "name": "NHM UT-University Of Tartu; Natural History Museum And Botanic Garden",
+        "affiliation": []
+      }
+    ],
+    "dates": [
+      {
+        "date": "2016-04-22",
+        "dateType": "Updated"
+      },
+      {
+        "date": "2014-10-05",
+        "dateType": "Created"
+      },
+      {
+        "date": "2015",
+        "dateType": "Issued"
+      }
+    ],
+    "language": "eng",
+    "types": {
+      "ris": "DATA",
+      "bibtex": "misc",
+      "citeproc": "dataset",
+      "schemaOrg": "Dataset",
+      "resourceType": "Dataset/UNITE Species Hypothesis",
+      "resourceTypeGeneral": "Dataset"
+    },
+    "relatedIdentifiers": [],
+    "sizes": [],
+    "formats": [
+      "application/json"
+    ],
+    "version": null,
+    "rightsList": [
+      {
+        "rights": "Attribution-NonCommercial (CC BY-NC)",
+        "rightsUri": "http://creativecommons.org/licenses/by-nc/4.0"
+      }
+    ],
+    "descriptions": [
+      {
+        "description": "UNITE provides a unified way for delimiting, identifying, communicating, and working with DNA-based Species Hypotheses (SH). All fungal ITS sequences in the international nucleotide sequence databases are clustered to approximately the species level by applying a set of dynamic distance values (&lt;0.5 - 3.0%). All species hypotheses are given a unique, stable name in the form of a DOI, and their taxonomic and ecological annotations are verified through distributed, web-based third-party annotation efforts. SHs are connected to a taxon name and its classification as far as possible (phylum, class, order, etc.) by taking into account identifications for all sequences in the SH. An automatically or manually designated sequence is chosen to represent each such SH. These sequences are released (https://unite.ut.ee/repository.php) for use by the scientific community in, for example, local sequence similarity searches and next-generation sequencing analysis pipelines. The system and the data are updated automatically as the number of public fungal ITS sequences grows.",
+        "descriptionType": "Abstract"
+      }
+    ],
+    "geoLocations": [],
+    "fundingReferences": [],
+    "url": "https://plutof.ut.ee/#/datacite/10.15156/BIO/SH409843.07FU",
+    "contentUrl": null,
+    "metadataVersion": 1,
+    "schemaVersion": "http://datacite.org/schema/kernel-3",
+    "source": null,
+    "isActive": true,
+    "state": "findable",
+    "reason": null,
+    "created": "2015-06-05T10:23:18.000Z",
+    "registered": "2015-06-05T10:23:19.000Z",
+    "published": "2015",
+    "updated": "2019-08-02T07:45:28.000Z"
+  },
+  "relationships": {
+    "client": {
+      "data": {
+        "id": "estdoi.bio",
+        "type": "clients"
+      }
+    }
+  }
+}
diff --git a/python/tests/files/datacite/datacite_doc_06.json b/python/tests/files/datacite/datacite_doc_06.json
new file mode 100644
index 00000000..a7f3ee70
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_06.json
@@ -0,0 +1,83 @@
+{
+  "id": "10.16903/ethz-grs-d_006220",
+  "type": "dois",
+  "attributes": {
+    "doi": "10.16903/ethz-grs-d_006220",
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.16903/ethz-grs-d_006220",
+        "identifierType": "DOI"
+      }
+    ],
+    "creators": [
+      {
+        "name": "Crispijn De Passe (Der Ältere) (1564-1637)",
+        "nameType": "Personal",
+        "affiliation": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "Der Eifer (Sedulitas), Blatt 7 der Folge \"Die Tugenden\""
+      }
+    ],
+    "publisher": "n.a.",
+    "container": {},
+    "publicationYear": 1590,
+    "subjects": [],
+    "contributors": [],
+    "dates": [
+      {
+        "date": "1590",
+        "dateType": "Available"
+      },
+      {
+        "date": "1590",
+        "dateType": "Issued"
+      }
+    ],
+    "language": null,
+    "types": {
+      "ris": "GEN",
+      "bibtex": "misc",
+      "citeproc": "article",
+      "schemaOrg": "CreativeWork",
+      "resourceTypeGeneral": "InteractiveResource"
+    },
+    "relatedIdentifiers": [],
+    "sizes": [],
+    "formats": [
+      "Blattgrösse: 21.0 x 14.4 x 0.0 cm (beschnitten)",
+      "Kupferstich"
+    ],
+    "version": null,
+    "rightsList": [
+      {
+        "rights": "ETH-Bibliothek Zürich, Graphische Sammlung / D 6220 / Public Domain Mark 1.0"
+      }
+    ],
+    "descriptions": [],
+    "geoLocations": [],
+    "fundingReferences": [],
+    "url": "http://www.e-gs.ethz.ch/eMP/eMuseumPlus?service=ExternalInterface&module=collection&objectId=29469&viewType=detailView",
+    "contentUrl": null,
+    "metadataVersion": 1,
+    "schemaVersion": "http://datacite.org/schema/kernel-3",
+    "source": "mds",
+    "isActive": true,
+    "state": "findable",
+    "reason": null,
+    "created": "2017-12-13T12:03:09.000Z",
+    "registered": "2017-12-13T12:03:09.000Z",
+    "published": "1590",
+    "updated": "2019-08-02T17:20:02.000Z"
+  },
+  "relationships": {
+    "client": {
+      "data": {
+        "id": "ethz.gs",
+        "type": "clients"
+      }
+    }
+  }
+}
diff --git a/python/tests/files/datacite/datacite_doc_07.json b/python/tests/files/datacite/datacite_doc_07.json
new file mode 100644
index 00000000..c70695b6
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_07.json
@@ -0,0 +1,120 @@
+{
+  "id": "10.18462/iir.icr.2015.0926",
+  "type": "dois",
+  "attributes": {
+    "doi": "10.18462/iir.icr.2015.0926",
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.18462/iir.icr.2015.0926",
+        "identifierType": "DOI"
+      }
+    ],
+    "creators": [
+      {
+        "name": "ROTHUIZEN, E.",
+        "nameType": "Personal",
+        "givenName": "E.",
+        "familyName": "ROTHUIZEN",
+        "affiliation": []
+      },
+      {
+        "name": "ELMEGAARD, B.",
+        "nameType": "Personal",
+        "givenName": "B.",
+        "familyName": "ELMEGAARD",
+        "affiliation": []
+      },
+      {
+        "name": "MARKUSSEN W., B.",
+        "nameType": "Personal",
+        "givenName": "B.",
+        "familyName": "MARKUSSEN W.",
+        "affiliation": []
+      },
+      {
+        "name": "Et Al.",
+        "affiliation": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "High efficient heat pump system using storage tanks to increase cop by means of the ISEC concept. 1: model validation."
+      }
+    ],
+    "publisher": "International Institute of Refrigeration (IIR)",
+    "container": {},
+    "publicationYear": 2015,
+    "subjects": [
+      {
+        "subject": "HEAT PUMP"
+      },
+      {
+        "subject": "HOT WATER"
+      },
+      {
+        "subject": "HEAT TRANSFER"
+      },
+      {
+        "subject": "PERFORMANCE"
+      },
+      {
+        "subject": "THERMAL STORAGE"
+      },
+      {
+        "subject": "TANK"
+      },
+      {
+        "subject": "MODEL"
+      }
+    ],
+    "contributors": [],
+    "dates": [
+      {
+        "date": "2015",
+        "dateType": "Issued"
+      }
+    ],
+    "language": "eng",
+    "types": {
+      "ris": "DATA",
+      "bibtex": "misc",
+      "citeproc": "dataset",
+      "schemaOrg": "Dataset",
+      "resourceType": "Dataset",
+      "resourceTypeGeneral": "Dataset"
+    },
+    "relatedIdentifiers": [],
+    "sizes": [],
+    "formats": [],
+    "version": null,
+    "rightsList": [],
+    "descriptions": [
+      {
+        "description": "The purpose of the ISEC concept is to provide a high-efficient heat pump system for hot water production. The ISEC concept uses two storage tanks for the water, one discharged and one charged. Hot water for the industrial process is tapped from the charged tank, while the other tank is charging. Charging is done by circulating the water in the tank through the condenser of a heat pump several times and thereby gradually heating the water. The charging is done with a higher mass flow rate than the discharging to reach several circulations of the water during the time frame of one discharging. This result in a lower condensing temperature than if the water was heated in one step. Two test setups were built, one to test the performance of the heat pump gradually heating the water and one to investigate the stratification in the storage tanks. Furthermore, a dynamic model of the system was implemented in Dymola, and validated by the use of test data from the two experimental setups. This paper shows that there is a good consistency between the model and the experimental tests.",
+        "descriptionType": "Abstract"
+      }
+    ],
+    "geoLocations": [],
+    "fundingReferences": [],
+    "url": "http://www.iifiir.org/clientBookline/service/reference.asp?INSTANCE=EXPLOITATION&OUTPUT=PORTAL&DOCID=IFD_REFDOC_0015008&DOCBASE=IFD_REFDOC_EN&SETLANGUAGE=EN",
+    "contentUrl": null,
+    "metadataVersion": 0,
+    "schemaVersion": null,
+    "source": null,
+    "isActive": true,
+    "state": "findable",
+    "reason": null,
+    "created": "2016-11-21T13:08:14.000Z",
+    "registered": "2016-11-21T13:08:14.000Z",
+    "published": "2015",
+    "updated": "2019-08-16T18:00:59.000Z"
+  },
+  "relationships": {
+    "client": {
+      "data": {
+        "id": "inist.iif",
+        "type": "clients"
+      }
+    }
+  }
+}
diff --git a/python/tests/files/datacite/datacite_doc_08.json b/python/tests/files/datacite/datacite_doc_08.json
new file mode 100644
index 00000000..e9170788
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_08.json
@@ -0,0 +1,105 @@
+{
+  "id": "10.22004/ag.econ.284864",
+  "type": "dois",
+  "attributes": {
+    "doi": "10.22004/ag.econ.284864",
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.22004/ag.econ.284864",
+        "identifierType": "DOI"
+      }
+    ],
+    "creators": [
+      {
+        "name": "Kajisa, Kei",
+        "nameType": "Personal",
+        "givenName": "Kei",
+        "familyName": "Kajisa",
+        "affiliation": [],
+        "nameIdentifiers": []
+      },
+      {
+        "name": "Kajisa, Kei",
+        "nameType": "Personal",
+        "givenName": "Kei",
+        "familyName": "Kajisa",
+        "affiliation": [],
+        "nameIdentifiers": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "Irrigation Policies under Rapid Industrialization and Labor Migration: Lessons from Japan, China and India"
+      }
+    ],
+    "publisher": "Unknown",
+    "container": {},
+    "publicationYear": 2017,
+    "subjects": [
+      {
+        "subject": "Land Economics/Use"
+      },
+      {
+        "subject": "irrigation",
+        "subjectScheme": "keyword"
+      },
+      {
+        "subject": "industrialization",
+        "subjectScheme": "keyword"
+      },
+      {
+        "subject": "collective action",
+        "subjectScheme": "keyword"
+      }
+    ],
+    "contributors": [],
+    "dates": [
+      {
+        "date": "2017",
+        "dateType": "Issued"
+      }
+    ],
+    "language": "eng",
+    "types": {
+      "ris": "RPRT",
+      "bibtex": "article",
+      "citeproc": "article-journal",
+      "schemaOrg": "ScholarlyArticle",
+      "resourceType": "Text",
+      "resourceTypeGeneral": "Text"
+    },
+    "relatedIdentifiers": [],
+    "sizes": [],
+    "formats": [],
+    "version": null,
+    "rightsList": [],
+    "descriptions": [
+      {
+        "description": "International society recognizes that the scarcity of fresh water is increasing and farming sectors suffer from lack of irrigation water. However, if we look at this issue with a framework of relative factor endowment, a different view will arise. In emerging states with rapid industrialization and labor migration, labor scarcity increases at a faster pace than that of irrigation water. Using the historical review of Japan’s irrigation policies as well as the case studies of India and China, this paper shows that the introduction of policies which do not reflect the actual relative resource scarcity may mislead the development path. We argue that under increasing relative labor scarcity it is important to realize the substitution of capital for labor for surface irrigation system management and that the substitution needs public support because the service of surface irrigation system has some externalities. Through this argument, this paper also intends to shed the light back to the role of the state for local resource management which seems to be unfairly undervalued since the boom of community participatory approach in the 1980s.",
+        "descriptionType": "Abstract"
+      }
+    ],
+    "geoLocations": [],
+    "fundingReferences": [],
+    "url": "https://ageconsearch.umn.edu/record/284864",
+    "contentUrl": null,
+    "metadataVersion": 1,
+    "schemaVersion": null,
+    "source": "mds",
+    "isActive": true,
+    "state": "findable",
+    "reason": null,
+    "created": "2019-08-24T07:46:47.000Z",
+    "registered": "2019-08-24T07:46:47.000Z",
+    "published": "2017",
+    "updated": "2019-08-25T09:38:33.000Z"
+  },
+  "relationships": {
+    "client": {
+      "data": {
+        "id": "tind.agecon",
+        "type": "clients"
+      }
+    }
+  }
+}
diff --git a/python/tests/files/datacite/datacite_doc_09.json b/python/tests/files/datacite/datacite_doc_09.json
new file mode 100644
index 00000000..d09af545
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_09.json
@@ -0,0 +1,130 @@
+{
+  "id": "10.2314/gbv:880813733",
+  "type": "dois",
+  "attributes": {
+    "doi": "10.2314/gbv:880813733",
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.2314/gbv:880813733",
+        "identifierType": "DOI"
+      },
+      {
+        "identifier": "880813733",
+        "identifierType": "ppn"
+      },
+      {
+        "identifier": "03WKCF3C",
+        "identifierType": "contract"
+      },
+      {
+        "identifier": "01132105",
+        "identifierType": "contract"
+      },
+      {
+        "identifier": "GBV:880813733",
+        "identifierType": "firstid"
+      },
+      {
+        "identifier": "TIBKAT:880813733",
+        "identifierType": "ftx-id"
+      }
+    ],
+    "creators": [
+      {
+        "name": "Kirstaedter, Nils",
+        "nameType": "Personal",
+        "givenName": "Nils",
+        "familyName": "Kirstaedter",
+        "affiliation": [],
+        "nameIdentifiers": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "BrightLas : TP3.3. Module für Direktdiodenstrahlquellen bis 4kW und Untersuchungen zur Leistungsskalierung (Diodemodul) : zum Verbundvorhaben Direktdiodenlaseranlagen und -systeme (VP3) im Förderschwerpunkt innovative regionale Wachstumskerne, BMBF : Abschlussbericht"
+      },
+      {
+        "title": "Module für Direktdiodenstrahlquellen bis 4kW und Untersuchungen zur Leistungsskalierung (Diodemodul)",
+        "titleType": "AlternativeTitle"
+      },
+      {
+        "title": "Direktdiodenlaseranlagen und -systeme (VP3)",
+        "titleType": "AlternativeTitle"
+      }
+    ],
+    "publisher": "[Lumics GmbH]",
+    "container": {},
+    "publicationYear": 2016,
+    "subjects": [
+      {
+        "subject": "Direktdiodenlasersysteme"
+      },
+      {
+        "subject": "Physics",
+        "subjectScheme": "linsearch"
+      }
+    ],
+    "contributors": [
+      {
+        "name": "TIB-Technische Informationsbibliothek Universitätsbibliothek Hannover",
+        "nameType": "Organizational",
+        "affiliation": [],
+        "contributorType": "HostingInstitution",
+        "nameIdentifiers": []
+      },
+      {
+        "name": "Technische Informationsbibliothek (TIB)",
+        "affiliation": [],
+        "contributorType": "DataManager",
+        "nameIdentifiers": []
+      }
+    ],
+    "dates": [
+      {
+        "date": "2016",
+        "dateType": "Issued"
+      }
+    ],
+    "language": "de",
+    "types": {
+      "ris": "RPRT",
+      "bibtex": "article",
+      "citeproc": "report",
+      "schemaOrg": "ScholarlyArticle",
+      "resourceType": "Report",
+      "resourceTypeGeneral": "Text"
+    },
+    "relatedIdentifiers": [],
+    "sizes": [
+      "1 Online-Ressource (10 Seiten, 1,40 MB)"
+    ],
+    "formats": [
+      "application/pdf"
+    ],
+    "version": "1.0",
+    "rightsList": [],
+    "descriptions": [],
+    "geoLocations": [],
+    "fundingReferences": [],
+    "url": "https://www.tib.eu/suchen/id/TIBKAT:880813733/",
+    "contentUrl": null,
+    "metadataVersion": 9,
+    "schemaVersion": "http://datacite.org/schema/kernel-4",
+    "source": "mds",
+    "isActive": true,
+    "state": "findable",
+    "reason": null,
+    "created": "2017-02-25T00:00:18.000Z",
+    "registered": "2017-02-25T00:00:19.000Z",
+    "published": "2016",
+    "updated": "2019-08-03T05:53:51.000Z"
+  },
+  "relationships": {
+    "client": {
+      "data": {
+        "id": "tib.tib",
+        "type": "clients"
+      }
+    }
+  }
+}
diff --git a/python/tests/files/datacite/datacite_doc_10.json b/python/tests/files/datacite/datacite_doc_10.json
new file mode 100644
index 00000000..d40fc272
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_10.json
@@ -0,0 +1,83 @@
+{
+  "id": "10.25549/wpacards-m6171",
+  "type": "dois",
+  "attributes": {
+    "doi": "10.25549/wpacards-m6171",
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.25549/wpacards-m6171",
+        "identifierType": "DOI"
+      }
+    ],
+    "creators": [
+      {
+        "name": "Unknown",
+        "affiliation": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "WPA household census for 210 E VERNON, Los Angeles"
+      }
+    ],
+    "publisher": "University of Southern California Digital Library (USC.DL)",
+    "container": {},
+    "publicationYear": 2012,
+    "subjects": [
+      {
+        "subject": "housing areas"
+      },
+      {
+        "subject": "Dwellings"
+      }
+    ],
+    "contributors": [],
+    "dates": [
+      {
+        "date": "2012",
+        "dateType": "Issued"
+      }
+    ],
+    "language": "eng",
+    "types": {
+      "ris": "DATA",
+      "bibtex": "misc",
+      "citeproc": "dataset",
+      "schemaOrg": "Dataset",
+      "resourceType": "Dataset",
+      "resourceTypeGeneral": "Dataset"
+    },
+    "relatedIdentifiers": [],
+    "sizes": [],
+    "formats": [],
+    "version": null,
+    "rightsList": [],
+    "descriptions": [
+      {
+        "descriptionType": "Abstract"
+      }
+    ],
+    "geoLocations": [],
+    "fundingReferences": [],
+    "url": "http://digitallibrary.usc.edu/cdm/ref/collection/p15799coll8/id/2608",
+    "contentUrl": null,
+    "metadataVersion": 0,
+    "schemaVersion": "http://datacite.org/schema/kernel-4",
+    "source": "mds",
+    "isActive": true,
+    "state": "findable",
+    "reason": null,
+    "created": "2018-09-09T08:32:09.000Z",
+    "registered": "2018-09-09T08:33:10.000Z",
+    "published": "2012",
+    "updated": "2019-08-02T20:03:32.000Z"
+  },
+  "relationships": {
+    "client": {
+      "data": {
+        "id": "usc.dl",
+        "type": "clients"
+      }
+    }
+  }
+}
diff --git a/python/tests/files/datacite/datacite_doc_11.json b/python/tests/files/datacite/datacite_doc_11.json
new file mode 100644
index 00000000..50fe8363
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_11.json
@@ -0,0 +1,86 @@
+{
+  "id": "10.3932/ethz-a-000055869",
+  "type": "dois",
+  "attributes": {
+    "doi": "10.3932/ethz-a-000055869",
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.3932/ethz-a-000055869",
+        "identifierType": "DOI"
+      }
+    ],
+    "creators": [
+      {
+        "name": "Comet Photo AG (Zürich)",
+        "affiliation": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "N1 bei Safenwil"
+      }
+    ],
+    "publisher": "ETH-Bibliothek Zürich, Bildarchiv",
+    "container": {},
+    "publicationYear": 1965,
+    "subjects": [],
+    "contributors": [],
+    "dates": [
+      {
+        "date": "1965",
+        "dateType": "Available"
+      },
+      {
+        "date": "1965",
+        "dateType": "Issued"
+      }
+    ],
+    "language": "de",
+    "types": {
+      "ris": "FIGURE",
+      "bibtex": "misc",
+      "citeproc": "graphic",
+      "schemaOrg": "ImageObject",
+      "resourceTypeGeneral": "Image"
+    },
+    "relatedIdentifiers": [],
+    "sizes": [],
+    "formats": [
+      "TIFF-Bild"
+    ],
+    "version": null,
+    "rightsList": [],
+    "descriptions": [
+      {
+        "description": "Download und Nutzung frei",
+        "descriptionType": "Other"
+      },
+      {
+        "description": "10, N1, Genève, Bern, Zürich, Sankt Gallen, Sankt Margrethen, Strassen, Strassenbau, 2.",
+        "descriptionType": "Other"
+      }
+    ],
+    "geoLocations": [],
+    "fundingReferences": [],
+    "url": "http://ba.e-pics.ethz.ch/link.jsp?id=44861",
+    "contentUrl": null,
+    "metadataVersion": 6,
+    "schemaVersion": "http://datacite.org/schema/kernel-3",
+    "source": "mds",
+    "isActive": true,
+    "state": "findable",
+    "reason": null,
+    "created": "2019-03-04T23:56:42.000Z",
+    "registered": "2019-07-30T13:17:45.000Z",
+    "published": "1965",
+    "updated": "2019-08-02T22:08:26.000Z"
+  },
+  "relationships": {
+    "client": {
+      "data": {
+        "id": "ethz.epics-ba",
+        "type": "clients"
+      }
+    }
+  }
+}
diff --git a/python/tests/files/datacite/datacite_doc_12.json b/python/tests/files/datacite/datacite_doc_12.json
new file mode 100644
index 00000000..31c0f0ca
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_12.json
@@ -0,0 +1,103 @@
+{
+  "id": "10.5167/uzh-171449",
+  "type": "dois",
+  "attributes": {
+    "doi": "10.5167/uzh-171449",
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.5167/uzh-171449",
+        "identifierType": "DOI"
+      }
+    ],
+    "creators": [
+      {
+        "name": "Spanias, Charalampos",
+        "nameType": "Personal",
+        "givenName": "Charalampos",
+        "familyName": "Spanias",
+        "affiliation": [],
+        "nameIdentifiers": []
+      },
+      {
+        "name": "Nikolaidis, Pantelis T",
+        "nameType": "Personal",
+        "givenName": "Pantelis T",
+        "familyName": "Nikolaidis",
+        "affiliation": [],
+        "nameIdentifiers": []
+      },
+      {
+        "name": "Rosemann, Thomas",
+        "nameType": "Personal",
+        "givenName": "Thomas",
+        "familyName": "Rosemann",
+        "affiliation": [],
+        "nameIdentifiers": []
+      },
+      {
+        "name": "Knechtle, Beat",
+        "nameType": "Personal",
+        "givenName": "Beat",
+        "familyName": "Knechtle",
+        "affiliation": [],
+        "nameIdentifiers": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "Anthropometric and Physiological Profile of Mixed Martial Art Athletes: A Brief Review"
+      }
+    ],
+    "publisher": "MDPI Publishing",
+    "container": {},
+    "publicationYear": 2019,
+    "subjects": [],
+    "contributors": [],
+    "dates": [
+      {
+        "date": "2019-06-14",
+        "dateType": "Available"
+      },
+      {
+        "date": "2019",
+        "dateType": "Issued"
+      }
+    ],
+    "language": null,
+    "types": {
+      "ris": "RPRT",
+      "bibtex": "article",
+      "citeproc": "article-journal",
+      "schemaOrg": "ScholarlyArticle",
+      "resourceTypeGeneral": "Text"
+    },
+    "relatedIdentifiers": [],
+    "sizes": [],
+    "formats": [],
+    "version": null,
+    "rightsList": [],
+    "descriptions": [],
+    "geoLocations": [],
+    "fundingReferences": [],
+    "url": "https://www.zora.uzh.ch/id/eprint/171449",
+    "contentUrl": null,
+    "metadataVersion": 0,
+    "schemaVersion": null,
+    "source": "mds",
+    "isActive": true,
+    "state": "findable",
+    "reason": null,
+    "created": "2019-06-27T01:01:35.000Z",
+    "registered": "2019-06-27T01:01:36.000Z",
+    "published": "2019",
+    "updated": "2019-09-26T16:44:24.000Z"
+  },
+  "relationships": {
+    "client": {
+      "data": {
+        "id": "ethz.zora",
+        "type": "clients"
+      }
+    }
+  }
+}
diff --git a/python/tests/files/datacite/datacite_doc_13.json b/python/tests/files/datacite/datacite_doc_13.json
new file mode 100644
index 00000000..ff6eb229
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_13.json
@@ -0,0 +1,86 @@
+{
+  "id": "10.5169/seals-314104",
+  "type": "dois",
+  "attributes": {
+    "doi": "10.5169/seals-314104",
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.5169/seals-314104",
+        "identifierType": "DOI"
+      }
+    ],
+    "creators": [
+      {
+        "name": "O.M.",
+        "affiliation": []
+      },
+      {
+        "name": "Hiltbrunner, Hermann",
+        "nameType": "Personal",
+        "givenName": "Hermann",
+        "familyName": "Hiltbrunner",
+        "affiliation": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "[Müssen wir des Glücks uns schämen?]"
+      }
+    ],
+    "publisher": "Buchdruckerei Büchler & Co.",
+    "container": {},
+    "publicationYear": 1940,
+    "subjects": [],
+    "contributors": [],
+    "dates": [
+      {
+        "date": "1940-10-05",
+        "dateType": "Available"
+      },
+      {
+        "date": "1940",
+        "dateType": "Issued"
+      }
+    ],
+    "language": null,
+    "types": {
+      "ris": "JOUR",
+      "bibtex": "article",
+      "citeproc": "article-journal",
+      "schemaOrg": "ScholarlyArticle",
+      "resourceType": "Journal Article",
+      "resourceTypeGeneral": "Text"
+    },
+    "relatedIdentifiers": [],
+    "sizes": [],
+    "formats": [
+      "text/html",
+      "application/pdf"
+    ],
+    "version": null,
+    "rightsList": [],
+    "descriptions": [],
+    "geoLocations": [],
+    "fundingReferences": [],
+    "url": "https://www.e-periodica.ch/digbib/view?pid=sle-001:1940-1941:45::13",
+    "contentUrl": null,
+    "metadataVersion": 17,
+    "schemaVersion": "http://datacite.org/schema/kernel-3",
+    "source": null,
+    "isActive": true,
+    "state": "findable",
+    "reason": null,
+    "created": "2013-03-22T14:02:08.000Z",
+    "registered": "2013-03-22T13:58:11.000Z",
+    "published": "1940",
+    "updated": "2019-08-02T02:22:55.000Z"
+  },
+  "relationships": {
+    "client": {
+      "data": {
+        "id": "ethz.seals",
+        "type": "clients"
+      }
+    }
+  }
+}
diff --git a/python/tests/files/datacite/datacite_doc_14.json b/python/tests/files/datacite/datacite_doc_14.json
new file mode 100644
index 00000000..b1e1ebf2
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_14.json
@@ -0,0 +1,166 @@
+{
+  "id": "10.5517/cc7gns3",
+  "type": "dois",
+  "attributes": {
+    "doi": "10.5517/cc7gns3",
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.5517/cc7gns3",
+        "identifierType": "DOI"
+      },
+      {
+        "identifier": "222635",
+        "identifierType": "CCDC"
+      }
+    ],
+    "creators": [
+      {
+        "name": "Stulz, E.",
+        "nameType": "Personal",
+        "givenName": "E.",
+        "familyName": "Stulz",
+        "affiliation": []
+      },
+      {
+        "name": "Scott, S.M.",
+        "nameType": "Personal",
+        "givenName": "S.M.",
+        "familyName": "Scott",
+        "affiliation": []
+      },
+      {
+        "name": "Ng, Yiu-Fai",
+        "nameType": "Personal",
+        "givenName": "Yiu-Fai",
+        "familyName": "Ng",
+        "affiliation": []
+      },
+      {
+        "name": "Bond, A.D.",
+        "nameType": "Personal",
+        "givenName": "A.D.",
+        "familyName": "Bond",
+        "affiliation": []
+      },
+      {
+        "name": "Teat, S.J.",
+        "nameType": "Personal",
+        "givenName": "S.J.",
+        "familyName": "Teat",
+        "affiliation": []
+      },
+      {
+        "name": "Darling, S.L.",
+        "nameType": "Personal",
+        "givenName": "S.L.",
+        "familyName": "Darling",
+        "affiliation": []
+      },
+      {
+        "name": "Feeder, N.",
+        "nameType": "Personal",
+        "givenName": "N.",
+        "familyName": "Feeder",
+        "affiliation": []
+      },
+      {
+        "name": "Sanders, J.K.M.",
+        "nameType": "Personal",
+        "givenName": "J.K.M.",
+        "familyName": "Sanders",
+        "affiliation": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "CCDC 222635: Experimental Crystal Structure Determination"
+      }
+    ],
+    "publisher": "Cambridge Crystallographic Data Centre",
+    "container": {},
+    "publicationYear": 2004,
+    "subjects": [
+      {
+        "subject": "Crystal Structure"
+      },
+      {
+        "subject": "Experimental 3D Coordinates"
+      },
+      {
+        "subject": "Crystal System"
+      },
+      {
+        "subject": "Space Group"
+      },
+      {
+        "subject": "Cell Parameters"
+      },
+      {
+        "subject": "Crystallography"
+      },
+      {
+        "subject": "bis(mu~2~-5-(3,5-Di-t-butylphenyl)-15-(4-(2-(diphenylphosphino)ethynyl)phenyl)-2,8,12,18-tetrahexyl-3,7,13,17-tetramethylporphyrinato)-(5,15-bis(3,5-di-t-butylphenyl)-2,8,12,18-tetraethyl-3,7,13,17-tetramethylporphyrinato)-di-nickel-ruthenium chloroform solvate"
+      }
+    ],
+    "contributors": [],
+    "dates": [
+      {
+        "date": "2004",
+        "dateType": "Issued"
+      }
+    ],
+    "language": "eng",
+    "types": {
+      "ris": "DATA",
+      "bibtex": "misc",
+      "citeproc": "dataset",
+      "schemaOrg": "Dataset",
+      "resourceTypeGeneral": "Dataset"
+    },
+    "relatedIdentifiers": [
+      {
+        "relationType": "IsSupplementTo",
+        "relatedIdentifier": "10.1021/ic034699w",
+        "relatedIdentifierType": "DOI"
+      }
+    ],
+    "sizes": [],
+    "formats": [
+      "CIF"
+    ],
+    "version": null,
+    "rightsList": [],
+    "descriptions": [
+      {
+        "description": "Related Article: E.Stulz, S.M.Scott, Yiu-Fai Ng, A.D.Bond, S.J.Teat, S.L.Darling, N.Feeder, J.K.M.Sanders|2003|Inorg.Chem.|42|6564|doi:10.1021/ic034699w",
+        "descriptionType": "Other"
+      },
+      {
+        "description": "An entry from the Cambridge Structural Database, the world’s repository for small molecule crystal structures. The entry contains experimental data from a crystal diffraction study. The deposited dataset for this entry is freely available from the CCDC and typically includes 3D coordinates, cell parameters, space group, experimental conditions and quality measures.",
+        "descriptionType": "Abstract"
+      }
+    ],
+    "geoLocations": [],
+    "fundingReferences": [],
+    "url": "http://www.ccdc.cam.ac.uk/services/structure_request?id=doi:10.5517/cc7gns3&sid=DataCite",
+    "contentUrl": null,
+    "metadataVersion": 2,
+    "schemaVersion": "http://datacite.org/schema/kernel-3",
+    "source": null,
+    "isActive": true,
+    "state": "findable",
+    "reason": null,
+    "created": "2014-03-18T07:28:28.000Z",
+    "registered": "2014-03-18T07:28:29.000Z",
+    "published": "2004",
+    "updated": "2019-08-02T03:38:32.000Z"
+  },
+  "relationships": {
+    "client": {
+      "data": {
+        "id": "ccdc.csd",
+        "type": "clients"
+      }
+    }
+  }
+}
diff --git a/python/tests/files/datacite/datacite_doc_15.json b/python/tests/files/datacite/datacite_doc_15.json
new file mode 100644
index 00000000..5b4ee8ec
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_15.json
@@ -0,0 +1,79 @@
+{
+  "id": "10.6073/pasta/95296d8416aae24f3d39b4ecb27f0b28",
+  "type": "dois",
+  "attributes": {
+    "doi": "10.6073/pasta/95296d8416aae24f3d39b4ecb27f0b28",
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.6073/pasta/95296d8416aae24f3d39b4ecb27f0b28",
+        "identifierType": "DOI"
+      },
+      {
+        "identifier": "https://pasta.lternet.edu/package/eml/knb-lter-vcr/102/16",
+        "identifierType": "URL"
+      }
+    ],
+    "creators": [
+      {
+        "name": "Richardson, David",
+        "nameType": "Personal",
+        "givenName": "David",
+        "familyName": "Richardson",
+        "affiliation": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "Parramore Island of the Virginia Coast Reserve Permanent Plot Resurvey: Tree data 1997"
+      }
+    ],
+    "publisher": "Environmental Data Initiative",
+    "container": {},
+    "publicationYear": 2017,
+    "subjects": [],
+    "contributors": [],
+    "dates": [
+      {
+        "date": "2017",
+        "dateType": "Issued"
+      }
+    ],
+    "language": null,
+    "types": {
+      "ris": "DATA",
+      "bibtex": "misc",
+      "citeproc": "dataset",
+      "schemaOrg": "Dataset",
+      "resourceType": "dataPackage",
+      "resourceTypeGeneral": "Dataset"
+    },
+    "relatedIdentifiers": [],
+    "sizes": [],
+    "formats": [],
+    "version": null,
+    "rightsList": [],
+    "descriptions": [],
+    "geoLocations": [],
+    "fundingReferences": [],
+    "url": "https://portal.lternet.edu/nis/mapbrowse?packageid=knb-lter-vcr.102.16",
+    "contentUrl": null,
+    "metadataVersion": 1,
+    "schemaVersion": "http://datacite.org/schema/kernel-2.2",
+    "source": null,
+    "isActive": true,
+    "state": "findable",
+    "reason": null,
+    "created": "2017-02-01T18:20:04.000Z",
+    "registered": "2017-02-01T18:20:05.000Z",
+    "published": "2017",
+    "updated": "2019-08-02T14:16:49.000Z"
+  },
+  "relationships": {
+    "client": {
+      "data": {
+        "id": "edi.edi",
+        "type": "clients"
+      }
+    }
+  }
+}
diff --git a/python/tests/files/datacite/datacite_doc_16.json b/python/tests/files/datacite/datacite_doc_16.json
new file mode 100644
index 00000000..5af7fbe1
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_16.json
@@ -0,0 +1,80 @@
+{
+  "id": "10.6084/m9.figshare.1282478",
+  "type": "dois",
+  "attributes": {
+    "doi": "10.6084/m9.figshare.1282478",
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.6084/m9.figshare.1282478",
+        "identifierType": "DOI"
+      }
+    ],
+    "creators": [
+      {
+        "name": "Sochi, Taha",
+        "nameType": "Personal",
+        "givenName": "Taha",
+        "familyName": "Sochi",
+        "affiliation": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "Testing the Connectivity of Networks"
+      }
+    ],
+    "publisher": "Figshare",
+    "container": {},
+    "publicationYear": 2014,
+    "subjects": [],
+    "contributors": [],
+    "dates": [
+      {
+        "date": "2014",
+        "dateType": "Issued"
+      }
+    ],
+    "language": null,
+    "types": {
+      "ris": "DATA",
+      "bibtex": "misc",
+      "citeproc": "dataset",
+      "schemaOrg": "Dataset",
+      "resourceType": "Paper",
+      "resourceTypeGeneral": "Dataset"
+    },
+    "relatedIdentifiers": [],
+    "sizes": [],
+    "formats": [],
+    "version": null,
+    "rightsList": [
+      {
+        "rights": "CC-BY",
+        "rightsUri": "http://creativecommons.org/licenses/by/3.0/us"
+      }
+    ],
+    "descriptions": [],
+    "geoLocations": [],
+    "fundingReferences": [],
+    "url": "http://figshare.com/articles/Testing_the_Connectivity_of_Networks/1282478",
+    "contentUrl": null,
+    "metadataVersion": 0,
+    "schemaVersion": "http://datacite.org/schema/kernel-3",
+    "source": null,
+    "isActive": true,
+    "state": "findable",
+    "reason": null,
+    "created": "2014-12-31T15:38:16.000Z",
+    "registered": "2014-12-31T15:38:18.000Z",
+    "published": "2014",
+    "updated": "2019-08-02T04:52:11.000Z"
+  },
+  "relationships": {
+    "client": {
+      "data": {
+        "id": "figshare.ars",
+        "type": "clients"
+      }
+    }
+  }
+}
diff --git a/python/tests/files/datacite/datacite_doc_17.json b/python/tests/files/datacite/datacite_doc_17.json
new file mode 100644
index 00000000..f1363a61
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_17.json
@@ -0,0 +1,72 @@
+{
+  "id": "10.7910/dvn/tsqfwc/yytj22",
+  "type": "dois",
+  "attributes": {
+    "doi": "10.7910/dvn/tsqfwc/yytj22",
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.7910/dvn/tsqfwc/yytj22",
+        "identifierType": "DOI"
+      }
+    ],
+    "creators": [
+      {
+        "name": "Di Giovanna, Antonino Paolo (University Of Florence)",
+        "nameType": "Personal",
+        "affiliation": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "gel_BSA-FITC_Markov_segmntation0343.tif"
+      }
+    ],
+    "publisher": "Harvard Dataverse",
+    "container": {},
+    "publicationYear": 2018,
+    "subjects": [],
+    "contributors": [],
+    "dates": [
+      {
+        "date": "2018",
+        "dateType": "Issued"
+      }
+    ],
+    "language": null,
+    "types": {
+      "ris": "DATA",
+      "bibtex": "misc",
+      "citeproc": "dataset",
+      "schemaOrg": "Dataset",
+      "resourceTypeGeneral": "Dataset"
+    },
+    "relatedIdentifiers": [],
+    "sizes": [],
+    "formats": [],
+    "version": null,
+    "rightsList": [],
+    "descriptions": [],
+    "geoLocations": [],
+    "fundingReferences": [],
+    "url": "https://dataverse.harvard.edu/file.xhtml?persistentId=doi:10.7910/DVN/TSQFWC/YYTJ22",
+    "contentUrl": null,
+    "metadataVersion": 0,
+    "schemaVersion": "http://datacite.org/schema/kernel-4",
+    "source": "mds",
+    "isActive": true,
+    "state": "findable",
+    "reason": null,
+    "created": "2018-08-22T17:36:10.000Z",
+    "registered": "2018-08-22T17:37:30.000Z",
+    "published": "2018",
+    "updated": "2019-08-02T19:43:20.000Z"
+  },
+  "relationships": {
+    "client": {
+      "data": {
+        "id": "gdcc.harvard-dv",
+        "type": "clients"
+      }
+    }
+  }
+}
diff --git a/python/tests/files/datacite/datacite_doc_18.json b/python/tests/files/datacite/datacite_doc_18.json
new file mode 100644
index 00000000..f6bc81a6
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_18.json
@@ -0,0 +1,79 @@
+{
+  "id": "10.7916/d81z522m",
+  "type": "dois",
+  "attributes": {
+    "doi": "10.7916/d81z522m",
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.7916/d81z522m",
+        "identifierType": "DOI"
+      }
+    ],
+    "creators": [
+      {
+        "name": "(:Unav)",
+        "affiliation": [],
+        "nameIdentifiers": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "Eastern questionnaire, answer sheet for Interviewee 53215, page 064"
+      }
+    ],
+    "publisher": "Columbia University",
+    "container": {},
+    "publicationYear": 2017,
+    "subjects": [],
+    "contributors": [],
+    "dates": [
+      {
+        "date": "2017-08-21",
+        "dateType": "Created"
+      },
+      {
+        "date": "2019-08-04",
+        "dateType": "Updated"
+      },
+      {
+        "date": "2017",
+        "dateType": "Issued"
+      }
+    ],
+    "language": null,
+    "types": {
+      "ris": "GEN",
+      "bibtex": "misc",
+      "citeproc": "article",
+      "schemaOrg": "CreativeWork"
+    },
+    "relatedIdentifiers": [],
+    "sizes": [],
+    "formats": [],
+    "version": null,
+    "rightsList": [],
+    "descriptions": [],
+    "geoLocations": [],
+    "fundingReferences": [],
+    "url": "https://dlc.library.columbia.edu/lcaaj/cul:k3j9kd52d6",
+    "contentUrl": null,
+    "metadataVersion": 2,
+    "schemaVersion": "http://datacite.org/schema/kernel-3",
+    "source": "ez",
+    "isActive": true,
+    "state": "findable",
+    "reason": null,
+    "created": "2017-11-29T02:15:31.000Z",
+    "registered": "2017-11-29T02:15:32.000Z",
+    "published": "2017",
+    "updated": "2019-08-04T13:17:58.000Z"
+  },
+  "relationships": {
+    "client": {
+      "data": {
+        "id": "cul.columbia",
+        "type": "clients"
+      }
+    }
+  }
+}
diff --git a/python/tests/files/datacite/datacite_doc_19.json b/python/tests/files/datacite/datacite_doc_19.json
new file mode 100644
index 00000000..c0bc25ba
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_19.json
@@ -0,0 +1,79 @@
+{
+  "id": "10.7916/d86x0cg1",
+  "type": "dois",
+  "attributes": {
+    "doi": "10.7916/d86x0cg1",
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.7916/d86x0cg1",
+        "identifierType": "DOI"
+      }
+    ],
+    "creators": [
+      {
+        "name": "(:Unav)",
+        "affiliation": [],
+        "nameIdentifiers": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "Eastern questionnaire, answer sheet for Interviewee 55236, page 092"
+      }
+    ],
+    "publisher": "Columbia University",
+    "container": {},
+    "publicationYear": 2017,
+    "subjects": [],
+    "contributors": [],
+    "dates": [
+      {
+        "date": "2017-08-24",
+        "dateType": "Created"
+      },
+      {
+        "date": "2019-08-04",
+        "dateType": "Updated"
+      },
+      {
+        "date": "2017",
+        "dateType": "Issued"
+      }
+    ],
+    "language": null,
+    "types": {
+      "ris": "GEN",
+      "bibtex": "misc",
+      "citeproc": "article",
+      "schemaOrg": "CreativeWork"
+    },
+    "relatedIdentifiers": [],
+    "sizes": [],
+    "formats": [],
+    "version": null,
+    "rightsList": [],
+    "descriptions": [],
+    "geoLocations": [],
+    "fundingReferences": [],
+    "url": "https://dlc.library.columbia.edu/lcaaj/cul:44j0zpc98s",
+    "contentUrl": null,
+    "metadataVersion": 3,
+    "schemaVersion": "http://datacite.org/schema/kernel-3",
+    "source": "ez",
+    "isActive": true,
+    "state": "findable",
+    "reason": null,
+    "created": "2017-11-29T09:29:33.000Z",
+    "registered": "2017-11-29T09:29:34.000Z",
+    "published": "2017",
+    "updated": "2019-08-04T23:43:40.000Z"
+  },
+  "relationships": {
+    "client": {
+      "data": {
+        "id": "cul.columbia",
+        "type": "clients"
+      }
+    }
+  }
+}
diff --git a/python/tests/files/datacite/datacite_doc_20.json b/python/tests/files/datacite/datacite_doc_20.json
new file mode 100644
index 00000000..964e2cbb
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_20.json
@@ -0,0 +1,42 @@
+{
+    "attributes": {
+      "doi": "10.7916/d86x0cg1",
+      "creators": [
+        {
+          "name": "(:Unav)",
+          "affiliation": [],
+          "nameIdentifiers": []
+        }
+      ],
+      "titles": [
+        {
+          "title": "<h1>Eastern questionnaire</h1>"
+        }
+      ],
+      "publicationYear": 2017,
+      "dates": [
+        {
+          "date": "2017-08-24",
+          "dateType": "Created"
+        },
+        {
+          "date": "2019-08-04",
+          "dateType": "Updated"
+        },
+        {
+          "date": "2017",
+          "dateType": "Issued"
+        }
+      ],
+      "language": null,
+      "types": {
+        "ris": "GEN",
+        "bibtex": "misc",
+        "citeproc": "article",
+        "schemaOrg": "CreativeWork"
+      },
+      "isActive": true,
+      "state": "findable"
+    }
+  }
+  
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_doc_21.json b/python/tests/files/datacite/datacite_doc_21.json
new file mode 100644
index 00000000..cae7f40f
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_21.json
@@ -0,0 +1,42 @@
+{
+    "attributes": {
+      "doi": "10.7916/d86x0cg1",
+      "creators": [
+        {
+          "name": "(:Unav)",
+          "affiliation": [],
+          "nameIdentifiers": []
+        }
+      ],
+      "titles": [
+        {
+          "title": "ABC"
+        }
+      ],
+      "publicationYear": 2017,
+      "language": "GERMAN",
+      "types": {
+        "ris": "GEN",
+        "bibtex": "misc",
+        "citeproc": "article",
+        "schemaOrg": "CreativeWork"
+      },
+      "dates": [
+        {
+          "date": "2017-08-24",
+          "dateType": "Created"
+        },
+        {
+          "date": "2019-08-04",
+          "dateType": "Updated"
+        },
+        {
+          "date": "2017",
+          "dateType": "Issued"
+        }
+      ],
+      "isActive": true,
+      "state": "findable"
+    }
+  }
+  
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_doc_22.json b/python/tests/files/datacite/datacite_doc_22.json
new file mode 100644
index 00000000..42448ddf
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_22.json
@@ -0,0 +1,44 @@
+{
+    "attributes": {
+      "doi": "10.7916/d86x0cg1",
+      "creators": [
+        {
+          "name": "Anton Welch",
+          "affiliation": [
+            "Department of pataphysics"
+          ],
+          "nameIdentifiers": []
+        }
+      ],
+      "titles": [
+        {
+          "title": "ABC"
+        }
+      ],
+      "publicationYear": 2017,
+      "language": "GERMAN",
+      "types": {
+        "ris": "GEN",
+        "bibtex": "misc",
+        "citeproc": "article",
+        "schemaOrg": "CreativeWork"
+      },
+      "dates": [
+        {
+          "date": "2017-08-24",
+          "dateType": "Created"
+        },
+        {
+          "date": "2019-08-04",
+          "dateType": "Updated"
+        },
+        {
+          "date": "2017",
+          "dateType": "Issued"
+        }
+      ],
+      "isActive": true,
+      "state": "findable"
+    }
+  }
+
diff --git a/python/tests/files/datacite/datacite_doc_23.json b/python/tests/files/datacite/datacite_doc_23.json
new file mode 100644
index 00000000..1e5bcc3f
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_23.json
@@ -0,0 +1,44 @@
+{
+    "attributes": {
+      "doi": "10.7916/d86x0cg1\u2013xxx",
+      "creators": [
+        {
+          "name": "Anton Welch",
+          "affiliation": [
+            "Department of pataphysics"
+          ],
+          "nameIdentifiers": []
+        }
+      ],
+      "titles": [
+        {
+          "title": "ABC"
+        }
+      ],
+      "publicationYear": 2017,
+      "language": "GERMAN",
+      "types": {
+        "ris": "GEN",
+        "bibtex": "misc",
+        "citeproc": "article",
+        "schemaOrg": "CreativeWork"
+      },
+      "dates": [
+        {
+          "date": "2017-08-24",
+          "dateType": "Created"
+        },
+        {
+          "date": "2019-08-04",
+          "dateType": "Updated"
+        },
+        {
+          "date": "2017",
+          "dateType": "Issued"
+        }
+      ],
+      "isActive": true,
+      "state": "findable"
+    }
+  }
+
diff --git a/python/tests/files/datacite/datacite_result_00.json b/python/tests/files/datacite/datacite_result_00.json
new file mode 100644
index 00000000..085e23f3
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_00.json
@@ -0,0 +1,87 @@
+{
+    "extra": {
+        "container_name": "Journal of Chemical Crystallography",
+        "datacite": {
+            "license": [
+                {
+                    "rightsUri": "http://www.springer.com/tdm"
+                }
+            ],
+            "relations": [
+                {
+                    "relationType": "IsPartOf",
+                    "relatedIdentifier": "1074-1542",
+                    "resourceTypeGeneral": "Collection",
+                    "relatedIdentifierType": "ISSN"
+                }
+            ]
+        }
+    },
+    "title": "Synthesis and Crystal Structure of a Compound with Two Conformational Isomers: N-(2-methylbenzoyl)-N\u2032-(4-nitrophenyl)thiourea",
+    "release_type": "article-journal",
+    "release_stage": "published",
+    "release_date": "2019-05-31",
+    "release_year": 2019,
+    "ext_ids": {
+        "doi": "10.1007/s10870-008-9413-z"
+    },
+    "volume": "38",
+    "issue": "12",
+    "pages": "927-930",
+    "publisher": "Springer Science and Business Media LLC",
+    "contribs": [
+        {
+            "index": 0,
+            "raw_name": "Li, Qian-Jin",
+            "given_name": "Qian-Jin",
+            "surname": "Li",
+            "role": "author"
+        },
+        {
+            "index": 1,
+            "raw_name": "Yang, Chun-Long",
+            "given_name": "Chun-Long",
+            "surname": "Yang",
+            "role": "author"
+        }
+    ],
+    "refs": [
+        {
+            "index": 0,
+            "extra": {
+                "doi": "10.1016/j.bmcl.2005.09.033"
+            }
+        },
+        {
+            "index": 1,
+            "extra": {
+                "doi": "10.1016/s0022-1139(02)00330-5"
+            }
+        },
+        {
+            "index": 2,
+            "extra": {
+                "doi": "10.1016/s0010-8545(01)00337-x"
+            }
+        },
+        {
+            "index": 3,
+            "extra": {
+                "doi": "10.1016/j.tetlet.2005.06.135"
+            }
+        },
+        {
+            "index": 4,
+            "extra": {
+                "doi": "10.1039/p298700000s1"
+            }
+        },
+        {
+            "index": 5,
+            "extra": {
+                "doi": "10.1002/anie.199515551"
+            }
+        }
+    ],
+    "abstracts": []
+}
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_01.json b/python/tests/files/datacite/datacite_result_01.json
new file mode 100644
index 00000000..f8c6b930
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_01.json
@@ -0,0 +1,32 @@
+{
+    "extra": {
+        "datacite": {
+            "license": [
+                {
+                    "lang": "de",
+                    "rights": "Standard (Creative Commons - Namensnennung - Weitergabe unter gleichen Bedingungen) - http://www.ub.uni-heidelberg.de/helios/digi/nutzung/Welcome.html"
+                }
+            ]
+        }
+    },
+    "title": "Ferdinand Gaillard, [1]: n\u00e9 \u00e0 Paris le 16 janvier 1834, mort \u00e0 Paris le 19 janvier 1887",
+    "release_type": "article-journal",
+    "release_stage": "published",
+    "release_year": 1887,
+    "ext_ids": {
+        "doi": "10.11588/diglit.25558.39"
+    },
+    "publisher": "University Library Heidelberg",
+    "language": "fr",
+    "contribs": [
+        {
+            "index": 0,
+            "raw_name": "Dargenty, G.",
+            "given_name": "G.",
+            "surname": "Dargenty",
+            "role": "author"
+        }
+    ],
+    "refs": [],
+    "abstracts": []
+}
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_02.json b/python/tests/files/datacite/datacite_result_02.json
new file mode 100644
index 00000000..f8b85f38
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_02.json
@@ -0,0 +1,36 @@
+{
+    "extra": {
+        "datacite": {
+            "license": [
+                {
+                    "lang": "de",
+                    "rights": "Creative Commons - Namensnennung - Weitergabe unter gleichen Bedingungen - https://creativecommons.org/licenses/by-sa/3.0/de/"
+                },
+                {
+                    "lang": "en",
+                    "rights": "Creative Commons - Namensnennung - Weitergabe unter gleichen Bedingungen - https://creativecommons.org/licenses/by-sa/3.0/"
+                }
+            ]
+        }
+    },
+    "title": "Solinger Schwertschmiede-Familien, [4]",
+    "release_type": "article-journal",
+    "release_stage": "published",
+    "release_year": 1897,
+    "ext_ids": {
+        "doi": "10.11588/diglit.37715.57"
+    },
+    "publisher": "University Library Heidelberg",
+    "language": "de",
+    "contribs": [
+        {
+            "index": 0,
+            "raw_name": "Weyersberg, Albert",
+            "given_name": "Albert",
+            "surname": "Weyersberg",
+            "role": "author"
+        }
+    ],
+    "refs": [],
+    "abstracts": []
+}
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_03.json b/python/tests/files/datacite/datacite_result_03.json
new file mode 100644
index 00000000..3e3c2bd5
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_03.json
@@ -0,0 +1,19 @@
+{
+    "extra": {},
+    "title": "midterm ah30903",
+    "release_type": "article",
+    "release_year": 2016,
+    "ext_ids": {
+        "doi": "10.13140/rg.2.2.30434.53446"
+    },
+    "language": "ms",
+    "contribs": [
+        {
+            "index": 0,
+            "raw_name": "Mastura Yahya",
+            "role": "author"
+        }
+    ],
+    "refs": [],
+    "abstracts": []
+}
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_04.json b/python/tests/files/datacite/datacite_result_04.json
new file mode 100644
index 00000000..7ca70d6c
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_04.json
@@ -0,0 +1,28 @@
+{
+    "extra": {},
+    "title": "On chain maps inducing isomorphisms in homology",
+    "release_type": "article-journal",
+    "release_stage": "published",
+    "release_year": 1973,
+    "ext_ids": {
+        "doi": "10.14288/1.0080520"
+    },
+    "publisher": "University of British Columbia",
+    "language": "en",
+    "contribs": [
+        {
+            "index": 0,
+            "raw_name": "Nicollerat, Marc Andre",
+            "given_name": "Marc Andre",
+            "surname": "Nicollerat",
+            "role": "author"
+        }
+    ],
+    "refs": [],
+    "abstracts": [
+        {
+            "content": "Let A be an abelian category, I the full subcategory of A consisting of injective objects of A, and K(A) the category whose objects are cochain complexes of elements of A, and whose morphisms are homotopy classes of cochain maps.  In (5), lemma 4.6., p. 42, R. Hartshorne has proved that, under certain conditions, a cochain complex X\u02d9 \u03b5. |KA)| can be embedded in a complex I\u02d9 \u03b5. |K(I)| in such a way that I\u02d9 has the same cohomology as X\u02d9.  In Chapter I we show that the construction given in the two first parts of Hartshorne's Lemma is natural i.e. there exists a functor  J : K(A) \u2192 K(I) and a natural transformation [formula omitted]  (where E : K(I) \u2192 K(A) is the embedding functor) such that [formula omitted] is  injective and induces isomorphism in cohomology. The question whether the construction given in the third part of the lemma is functorial is still open.  We also prove that J is left adjoint to E, so that K(I) is a reflective subcategory of K(A).  In the special case where A is a category [formula omitted] of left A-modules, and [formula omitted] the category of cochain complexes in [formula omitted] and cochain maps (not homotopy classes), we prove the existence of a functor [formula omitted]  In Chapter II we study the natural homomorphism [formula omitted]   where A, B are rings, and M, L, N modules or chain complexes. In particular we give several sufficient conditions under which v is an isomorphism, or induces isomorphism in homology.  In the appendix we give a detailed proof of Hartshorne's Lemma. We think that this is useful, as no complete proof is, to our knowledge, to be found in the literature.",
+            "mimetype": "text/plain"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_05.json b/python/tests/files/datacite/datacite_result_05.json
new file mode 100644
index 00000000..e61769de
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_05.json
@@ -0,0 +1,530 @@
+{
+    "extra": {
+        "datacite": {
+            "license": [
+                {
+                    "rights": "Attribution-NonCommercial (CC BY-NC)",
+                    "rightsUri": "http://creativecommons.org/licenses/by-nc/4.0"
+                }
+            ]
+        }
+    },
+    "title": "SH409843.07FU",
+    "subtitle": "Gomphales",
+    "release_type": "dataset",
+    "release_stage": "published",
+    "release_date": "2014-10-05",
+    "release_year": 2014,
+    "ext_ids": {
+        "doi": "10.15156/bio/sh409843.07fu"
+    },
+    "publisher": "UNITE Community",
+    "language": "en",
+    "license_slug": "CC-BY-NC",
+    "contribs": [
+        {
+            "index": 0,
+            "raw_name": "K\u00f5ljalg, Urmas",
+            "given_name": "Urmas",
+            "surname": "K\u00f5ljalg",
+            "role": "author"
+        },
+        {
+            "index": 1,
+            "raw_name": "Abarenkov, Kessy",
+            "given_name": "Kessy",
+            "surname": "Abarenkov",
+            "role": "author"
+        },
+        {
+            "index": 2,
+            "raw_name": "Nilsson, R. Henrik",
+            "given_name": "R. Henrik",
+            "surname": "Nilsson",
+            "role": "author"
+        },
+        {
+            "index": 3,
+            "raw_name": "Larsson, Karl-Henrik",
+            "given_name": "Karl-Henrik",
+            "surname": "Larsson",
+            "role": "author"
+        },
+        {
+            "index": 4,
+            "raw_name": "Aas, Anders Bj\u00f8rnsgard",
+            "given_name": "Anders Bj\u00f8rnsgard",
+            "surname": "Aas",
+            "role": "author"
+        },
+        {
+            "index": 5,
+            "raw_name": "Adams, Rachel",
+            "given_name": "Rachel",
+            "surname": "Adams",
+            "role": "author"
+        },
+        {
+            "index": 6,
+            "raw_name": "Alves, Artur",
+            "given_name": "Artur",
+            "surname": "Alves",
+            "role": "author"
+        },
+        {
+            "index": 7,
+            "raw_name": "Ammirati, Joseph F.",
+            "given_name": "Joseph F.",
+            "surname": "Ammirati",
+            "role": "author"
+        },
+        {
+            "index": 8,
+            "raw_name": "Arnold, A. Elizabeth",
+            "given_name": "A. Elizabeth",
+            "surname": "Arnold",
+            "role": "author"
+        },
+        {
+            "index": 9,
+            "raw_name": "Bahram, Mohammad",
+            "given_name": "Mohammad",
+            "surname": "Bahram",
+            "role": "author"
+        },
+        {
+            "index": 10,
+            "raw_name": "Bengtsson-Palme, Johan",
+            "given_name": "Johan",
+            "surname": "Bengtsson-Palme",
+            "role": "author"
+        },
+        {
+            "index": 11,
+            "raw_name": "Berlin, Anna",
+            "given_name": "Anna",
+            "surname": "Berlin",
+            "role": "author"
+        },
+        {
+            "index": 12,
+            "raw_name": "Botnen, Synn\u00f8ve",
+            "given_name": "Synn\u00f8ve",
+            "surname": "Botnen",
+            "role": "author"
+        },
+        {
+            "index": 13,
+            "raw_name": "Bourlat, Sarah",
+            "given_name": "Sarah",
+            "surname": "Bourlat",
+            "role": "author"
+        },
+        {
+            "index": 14,
+            "raw_name": "Cheeke, Tanya",
+            "given_name": "Tanya",
+            "surname": "Cheeke",
+            "role": "author"
+        },
+        {
+            "index": 15,
+            "raw_name": "Dima, B\u00e1lint",
+            "given_name": "B\u00e1lint",
+            "surname": "Dima",
+            "role": "author"
+        },
+        {
+            "index": 16,
+            "raw_name": "Drenkhan, Rein",
+            "given_name": "Rein",
+            "surname": "Drenkhan",
+            "role": "author"
+        },
+        {
+            "index": 17,
+            "raw_name": "Duarte, Camila",
+            "given_name": "Camila",
+            "surname": "Duarte",
+            "role": "author"
+        },
+        {
+            "index": 18,
+            "raw_name": "Due\u00f1as, Margarita",
+            "given_name": "Margarita",
+            "surname": "Due\u00f1as",
+            "role": "author"
+        },
+        {
+            "index": 19,
+            "raw_name": "Eberhardt, Ursula",
+            "given_name": "Ursula",
+            "surname": "Eberhardt",
+            "role": "author"
+        },
+        {
+            "index": 20,
+            "raw_name": "Friberg, Hanna",
+            "given_name": "Hanna",
+            "surname": "Friberg",
+            "role": "author"
+        },
+        {
+            "index": 21,
+            "raw_name": "Fr\u00f8slev, Tobias G.",
+            "given_name": "Tobias G.",
+            "surname": "Fr\u00f8slev",
+            "role": "author"
+        },
+        {
+            "index": 22,
+            "raw_name": "Garnica, Sigisfredo",
+            "given_name": "Sigisfredo",
+            "surname": "Garnica",
+            "role": "author"
+        },
+        {
+            "index": 23,
+            "raw_name": "Geml, J\u00f3zsef",
+            "given_name": "J\u00f3zsef",
+            "surname": "Geml",
+            "role": "author"
+        },
+        {
+            "index": 24,
+            "raw_name": "Ghobad-Nejhad, Masoomeh",
+            "given_name": "Masoomeh",
+            "surname": "Ghobad-Nejhad",
+            "role": "author"
+        },
+        {
+            "index": 25,
+            "raw_name": "Grebenc, Tine",
+            "given_name": "Tine",
+            "surname": "Grebenc",
+            "role": "author"
+        },
+        {
+            "index": 26,
+            "raw_name": "Griffith, Gareth W.",
+            "given_name": "Gareth W.",
+            "surname": "Griffith",
+            "role": "author"
+        },
+        {
+            "index": 27,
+            "raw_name": "Hampe, Felix",
+            "given_name": "Felix",
+            "surname": "Hampe",
+            "role": "author"
+        },
+        {
+            "index": 28,
+            "raw_name": "Kennedy, Peter",
+            "given_name": "Peter",
+            "surname": "Kennedy",
+            "role": "author"
+        },
+        {
+            "index": 29,
+            "raw_name": "Khomich, Maryia",
+            "given_name": "Maryia",
+            "surname": "Khomich",
+            "role": "author"
+        },
+        {
+            "index": 30,
+            "raw_name": "Kohout, Petr",
+            "given_name": "Petr",
+            "surname": "Kohout",
+            "role": "author"
+        },
+        {
+            "index": 31,
+            "raw_name": "Kollom, Anu",
+            "given_name": "Anu",
+            "surname": "Kollom",
+            "role": "author"
+        },
+        {
+            "index": 32,
+            "raw_name": "Larsson, Ellen",
+            "given_name": "Ellen",
+            "surname": "Larsson",
+            "role": "author"
+        },
+        {
+            "index": 33,
+            "raw_name": "Laszlo, Irinyi",
+            "given_name": "Irinyi",
+            "surname": "Laszlo",
+            "role": "author"
+        },
+        {
+            "index": 34,
+            "raw_name": "Leavitt, Steven",
+            "given_name": "Steven",
+            "surname": "Leavitt",
+            "role": "author"
+        },
+        {
+            "index": 35,
+            "raw_name": "Liimatainen, Kare",
+            "given_name": "Kare",
+            "surname": "Liimatainen",
+            "role": "author"
+        },
+        {
+            "index": 36,
+            "raw_name": "Lindahl, Bj\u00f6rn",
+            "given_name": "Bj\u00f6rn",
+            "surname": "Lindahl",
+            "role": "author"
+        },
+        {
+            "index": 37,
+            "raw_name": "Lodge, Deborah J.",
+            "given_name": "Deborah J.",
+            "surname": "Lodge",
+            "role": "author"
+        },
+        {
+            "index": 38,
+            "raw_name": "Lumbsch, Helge Thorsten",
+            "given_name": "Helge Thorsten",
+            "surname": "Lumbsch",
+            "role": "author"
+        },
+        {
+            "index": 39,
+            "raw_name": "Mart\u00edn Esteban, Mar\u00eda Paz",
+            "given_name": "Mar\u00eda Paz",
+            "surname": "Mart\u00edn Esteban",
+            "role": "author"
+        },
+        {
+            "index": 40,
+            "raw_name": "Meyer, Wieland",
+            "given_name": "Wieland",
+            "surname": "Meyer",
+            "role": "author"
+        },
+        {
+            "index": 41,
+            "raw_name": "Miettinen, Otto",
+            "given_name": "Otto",
+            "surname": "Miettinen",
+            "role": "author"
+        },
+        {
+            "index": 42,
+            "raw_name": "Nguyen, Nhu",
+            "given_name": "Nhu",
+            "surname": "Nguyen",
+            "role": "author"
+        },
+        {
+            "index": 43,
+            "raw_name": "Niskanen, Tuula",
+            "given_name": "Tuula",
+            "surname": "Niskanen",
+            "role": "author"
+        },
+        {
+            "index": 44,
+            "raw_name": "Oono, Ryoko",
+            "given_name": "Ryoko",
+            "surname": "Oono",
+            "role": "author"
+        },
+        {
+            "index": 45,
+            "raw_name": "\u00d6pik, Maarja",
+            "given_name": "Maarja",
+            "surname": "\u00d6pik",
+            "role": "author"
+        },
+        {
+            "index": 46,
+            "raw_name": "Ordynets, Alexander",
+            "given_name": "Alexander",
+            "surname": "Ordynets",
+            "role": "author"
+        },
+        {
+            "index": 47,
+            "raw_name": "Paw\u0142owska, Julia",
+            "given_name": "Julia",
+            "surname": "Paw\u0142owska",
+            "role": "author"
+        },
+        {
+            "index": 48,
+            "raw_name": "Peintner, Ursula",
+            "given_name": "Ursula",
+            "surname": "Peintner",
+            "role": "author"
+        },
+        {
+            "index": 49,
+            "raw_name": "Pereira, Olinto Liparini",
+            "given_name": "Olinto Liparini",
+            "surname": "Pereira",
+            "role": "author"
+        },
+        {
+            "index": 50,
+            "raw_name": "Pinho, Danilo Batista",
+            "given_name": "Danilo Batista",
+            "surname": "Pinho",
+            "role": "author"
+        },
+        {
+            "index": 51,
+            "raw_name": "P\u00f5ldmaa, Kadri",
+            "given_name": "Kadri",
+            "surname": "P\u00f5ldmaa",
+            "role": "author"
+        },
+        {
+            "index": 52,
+            "raw_name": "Runnel, Kadri",
+            "given_name": "Kadri",
+            "surname": "Runnel",
+            "role": "author"
+        },
+        {
+            "index": 53,
+            "raw_name": "Ryberg, Martin",
+            "given_name": "Martin",
+            "surname": "Ryberg",
+            "role": "author"
+        },
+        {
+            "index": 54,
+            "raw_name": "Saar, Irja",
+            "given_name": "Irja",
+            "surname": "Saar",
+            "role": "author"
+        },
+        {
+            "index": 55,
+            "raw_name": "Sanli, Kemal",
+            "given_name": "Kemal",
+            "surname": "Sanli",
+            "role": "author"
+        },
+        {
+            "index": 56,
+            "raw_name": "Scott, James",
+            "given_name": "James",
+            "surname": "Scott",
+            "role": "author"
+        },
+        {
+            "index": 57,
+            "raw_name": "Spirin, Viacheslav",
+            "given_name": "Viacheslav",
+            "surname": "Spirin",
+            "role": "author"
+        },
+        {
+            "index": 58,
+            "raw_name": "Suija, Ave",
+            "given_name": "Ave",
+            "surname": "Suija",
+            "role": "author"
+        },
+        {
+            "index": 59,
+            "raw_name": "Svantesson, Sten",
+            "given_name": "Sten",
+            "surname": "Svantesson",
+            "role": "author"
+        },
+        {
+            "index": 60,
+            "raw_name": "Tadych, Mariusz",
+            "given_name": "Mariusz",
+            "surname": "Tadych",
+            "role": "author"
+        },
+        {
+            "index": 61,
+            "raw_name": "Takamatsu, Susumu",
+            "given_name": "Susumu",
+            "surname": "Takamatsu",
+            "role": "author"
+        },
+        {
+            "index": 62,
+            "raw_name": "Tamm, Heidi",
+            "given_name": "Heidi",
+            "surname": "Tamm",
+            "role": "author"
+        },
+        {
+            "index": 63,
+            "raw_name": "Taylor, AFS.",
+            "given_name": "AFS.",
+            "surname": "Taylor",
+            "role": "author"
+        },
+        {
+            "index": 64,
+            "raw_name": "Tedersoo, Leho",
+            "given_name": "Leho",
+            "surname": "Tedersoo",
+            "role": "author"
+        },
+        {
+            "index": 65,
+            "raw_name": "Telleria, M.T.",
+            "given_name": "M.T.",
+            "surname": "Telleria",
+            "role": "author"
+        },
+        {
+            "index": 66,
+            "raw_name": "Udayanga, Dhanushka",
+            "given_name": "Dhanushka",
+            "surname": "Udayanga",
+            "role": "author"
+        },
+        {
+            "index": 67,
+            "raw_name": "Unterseher, Martin",
+            "given_name": "Martin",
+            "surname": "Unterseher",
+            "role": "author"
+        },
+        {
+            "index": 68,
+            "raw_name": "Volobuev, Sergey",
+            "given_name": "Sergey",
+            "surname": "Volobuev",
+            "role": "author"
+        },
+        {
+            "index": 69,
+            "raw_name": "Weiss, Michael",
+            "given_name": "Michael",
+            "surname": "Weiss",
+            "role": "author"
+        },
+        {
+            "index": 70,
+            "raw_name": "Wurzbacher, Christian",
+            "given_name": "Christian",
+            "surname": "Wurzbacher",
+            "role": "author"
+        }
+    ],
+    "refs": [],
+    "abstracts": [
+        {
+            "content": "UNITE provides a unified way for delimiting, identifying, communicating, and working with DNA-based Species Hypotheses (SH). All fungal ITS sequences in the international nucleotide sequence databases are clustered to approximately the species level by applying a set of dynamic distance values (&lt;0.5 - 3.0%). All species hypotheses are given a unique, stable name in the form of a DOI, and their taxonomic and ecological annotations are verified through distributed, web-based third-party annotation efforts. SHs are connected to a taxon name and its classification as far as possible (phylum, class, order, etc.) by taking into account identifications for all sequences in the SH. An automatically or manually designated sequence is chosen to represent each such SH. These sequences are released (https://unite.ut.ee/repository.php) for use by the scientific community in, for example, local sequence similarity searches and next-generation sequencing analysis pipelines. The system and the data are updated automatically as the number of public fungal ITS sequences grows.",
+            "mimetype": "text/plain"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_06.json b/python/tests/files/datacite/datacite_result_06.json
new file mode 100644
index 00000000..61f2549d
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_06.json
@@ -0,0 +1,26 @@
+{
+    "extra": {
+        "datacite": {
+            "license": [
+                {
+                    "rights": "ETH-Bibliothek Z\u00fcrich, Graphische Sammlung / D 6220 / Public Domain Mark 1.0"
+                }
+            ]
+        }
+    },
+    "title": "Der Eifer (Sedulitas), Blatt 7 der Folge \"Die Tugenden\"",
+    "release_type": "article",
+    "release_year": 1590,
+    "ext_ids": {
+        "doi": "10.16903/ethz-grs-d_006220"
+    },
+    "contribs": [
+        {
+            "index": 0,
+            "raw_name": "Crispijn De Passe (Der \u00c4ltere) (1564-1637)",
+            "role": "author"
+        }
+    ],
+    "refs": [],
+    "abstracts": []
+}
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_07.json b/python/tests/files/datacite/datacite_result_07.json
new file mode 100644
index 00000000..324bb663
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_07.json
@@ -0,0 +1,73 @@
+{
+    "extra": {
+        "datacite": {
+            "subjects": [
+                {
+                    "subject": "HEAT PUMP"
+                },
+                {
+                    "subject": "HOT WATER"
+                },
+                {
+                    "subject": "HEAT TRANSFER"
+                },
+                {
+                    "subject": "PERFORMANCE"
+                },
+                {
+                    "subject": "THERMAL STORAGE"
+                },
+                {
+                    "subject": "TANK"
+                },
+                {
+                    "subject": "MODEL"
+                }
+            ]
+        }
+    },
+    "title": "High efficient heat pump system using storage tanks to increase cop by means of the ISEC concept. 1: model validation.",
+    "release_type": "dataset",
+    "release_stage": "published",
+    "release_year": 2015,
+    "ext_ids": {
+        "doi": "10.18462/iir.icr.2015.0926"
+    },
+    "publisher": "International Institute of Refrigeration (IIR)",
+    "language": "en",
+    "contribs": [
+        {
+            "index": 0,
+            "raw_name": "ROTHUIZEN, E.",
+            "given_name": "E.",
+            "surname": "ROTHUIZEN",
+            "role": "author"
+        },
+        {
+            "index": 1,
+            "raw_name": "ELMEGAARD, B.",
+            "given_name": "B.",
+            "surname": "ELMEGAARD",
+            "role": "author"
+        },
+        {
+            "index": 2,
+            "raw_name": "MARKUSSEN W., B.",
+            "given_name": "B.",
+            "surname": "MARKUSSEN W.",
+            "role": "author"
+        },
+        {
+            "index": 3,
+            "raw_name": "Et Al.",
+            "role": "author"
+        }
+    ],
+    "refs": [],
+    "abstracts": [
+        {
+            "content": "The purpose of the ISEC concept is to provide a high-efficient heat pump system for hot water production. The ISEC concept uses two storage tanks for the water, one discharged and one charged. Hot water for the industrial process is tapped from the charged tank, while the other tank is charging. Charging is done by circulating the water in the tank through the condenser of a heat pump several times and thereby gradually heating the water. The charging is done with a higher mass flow rate than the discharging to reach several circulations of the water during the time frame of one discharging. This result in a lower condensing temperature than if the water was heated in one step. Two test setups were built, one to test the performance of the heat pump gradually heating the water and one to investigate the stratification in the storage tanks. Furthermore, a dynamic model of the system was implemented in Dymola, and validated by the use of test data from the two experimental setups. This paper shows that there is a good consistency between the model and the experimental tests.",
+            "mimetype": "text/plain"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_08.json b/python/tests/files/datacite/datacite_result_08.json
new file mode 100644
index 00000000..281c3679
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_08.json
@@ -0,0 +1,53 @@
+{
+    "extra": {
+        "datacite": {
+            "subjects": [
+                {
+                    "subject": "Land Economics/Use"
+                },
+                {
+                    "subject": "irrigation",
+                    "subjectScheme": "keyword"
+                },
+                {
+                    "subject": "industrialization",
+                    "subjectScheme": "keyword"
+                },
+                {
+                    "subject": "collective action",
+                    "subjectScheme": "keyword"
+                }
+            ]
+        }
+    },
+    "title": "Irrigation Policies under Rapid Industrialization and Labor Migration: Lessons from Japan, China and India",
+    "release_type": "article-journal",
+    "release_year": 2017,
+    "ext_ids": {
+        "doi": "10.22004/ag.econ.284864"
+    },
+    "language": "en",
+    "contribs": [
+        {
+            "index": 0,
+            "raw_name": "Kajisa, Kei",
+            "given_name": "Kei",
+            "surname": "Kajisa",
+            "role": "author"
+        },
+        {
+            "index": 1,
+            "raw_name": "Kajisa, Kei",
+            "given_name": "Kei",
+            "surname": "Kajisa",
+            "role": "author"
+        }
+    ],
+    "refs": [],
+    "abstracts": [
+        {
+            "content": "International society recognizes that the scarcity of fresh water is increasing and farming sectors suffer from lack of irrigation water. However, if we look at this issue with a framework of relative factor endowment, a different view will arise. In emerging states with rapid industrialization and labor migration, labor scarcity increases at a faster pace than that of irrigation water. Using the historical review of Japan\u2019s irrigation policies as well as the case studies of India and China, this paper shows that the introduction of policies which do not reflect the actual relative resource scarcity may mislead the development path. We argue that under increasing relative labor scarcity it is important to realize the substitution of capital for labor for surface irrigation system management and that the substitution needs public support because the service of surface irrigation system has some externalities. Through this argument, this paper also intends to shed the light back to the role of the state for local resource management which seems to be unfairly undervalued since the boom of community participatory approach in the 1980s.",
+            "mimetype": "text/plain"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_09.json b/python/tests/files/datacite/datacite_result_09.json
new file mode 100644
index 00000000..01f92f85
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_09.json
@@ -0,0 +1,35 @@
+{
+    "extra": {
+        "datacite": {
+            "subjects": [
+                {
+                    "subject": "Direktdiodenlasersysteme"
+                },
+                {
+                    "subject": "Physics",
+                    "subjectScheme": "linsearch"
+                }
+            ]
+        }
+    },
+    "title": "BrightLas : TP3.3. Module f\u00fcr Direktdiodenstrahlquellen bis 4kW und Untersuchungen zur Leistungsskalierung (Diodemodul) : zum Verbundvorhaben Direktdiodenlaseranlagen und -systeme (VP3) im F\u00f6rderschwerpunkt innovative regionale Wachstumskerne, BMBF : Abschlussbericht",
+    "release_type": "report",
+    "release_stage": "published",
+    "release_year": 2016,
+    "ext_ids": {
+        "doi": "10.2314/gbv:880813733"
+    },
+    "publisher": "[Lumics GmbH]",
+    "language": "de",
+    "contribs": [
+        {
+            "index": 0,
+            "raw_name": "Kirstaedter, Nils",
+            "given_name": "Nils",
+            "surname": "Kirstaedter",
+            "role": "author"
+        }
+    ],
+    "refs": [],
+    "abstracts": []
+}
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_10.json b/python/tests/files/datacite/datacite_result_10.json
new file mode 100644
index 00000000..325facf7
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_10.json
@@ -0,0 +1,32 @@
+{
+    "extra": {
+        "datacite": {
+            "subjects": [
+                {
+                    "subject": "housing areas"
+                },
+                {
+                    "subject": "Dwellings"
+                }
+            ]
+        }
+    },
+    "title": "WPA household census for 210 E VERNON, Los Angeles",
+    "release_type": "dataset",
+    "release_stage": "published",
+    "release_year": 2012,
+    "ext_ids": {
+        "doi": "10.25549/wpacards-m6171"
+    },
+    "publisher": "University of Southern California Digital Library (USC.DL)",
+    "language": "en",
+    "contribs": [
+        {
+            "index": 0,
+            "raw_name": "Unknown",
+            "role": "author"
+        }
+    ],
+    "refs": [],
+    "abstracts": []
+}
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_11.json b/python/tests/files/datacite/datacite_result_11.json
new file mode 100644
index 00000000..037c5ac2
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_11.json
@@ -0,0 +1,21 @@
+{
+    "extra": {},
+    "title": "N1 bei Safenwil",
+    "release_type": "graphic",
+    "release_stage": "published",
+    "release_year": 1965,
+    "ext_ids": {
+        "doi": "10.3932/ethz-a-000055869"
+    },
+    "publisher": "ETH-Bibliothek Z\u00fcrich, Bildarchiv",
+    "language": "de",
+    "contribs": [
+        {
+            "index": 0,
+            "raw_name": "Comet Photo AG (Z\u00fcrich)",
+            "role": "author"
+        }
+    ],
+    "refs": [],
+    "abstracts": []
+}
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_12.json b/python/tests/files/datacite/datacite_result_12.json
new file mode 100644
index 00000000..6b6cad4a
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_12.json
@@ -0,0 +1,44 @@
+{
+    "extra": {},
+    "title": "Anthropometric and Physiological Profile of Mixed Martial Art Athletes: A Brief Review",
+    "release_type": "article-journal",
+    "release_stage": "published",
+    "release_date": "2019-06-14",
+    "release_year": 2019,
+    "ext_ids": {
+        "doi": "10.5167/uzh-171449"
+    },
+    "publisher": "MDPI Publishing",
+    "contribs": [
+        {
+            "index": 0,
+            "raw_name": "Spanias, Charalampos",
+            "given_name": "Charalampos",
+            "surname": "Spanias",
+            "role": "author"
+        },
+        {
+            "index": 1,
+            "raw_name": "Nikolaidis, Pantelis T",
+            "given_name": "Pantelis T",
+            "surname": "Nikolaidis",
+            "role": "author"
+        },
+        {
+            "index": 2,
+            "raw_name": "Rosemann, Thomas",
+            "given_name": "Thomas",
+            "surname": "Rosemann",
+            "role": "author"
+        },
+        {
+            "index": 3,
+            "raw_name": "Knechtle, Beat",
+            "given_name": "Beat",
+            "surname": "Knechtle",
+            "role": "author"
+        }
+    ],
+    "refs": [],
+    "abstracts": []
+}
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_13.json b/python/tests/files/datacite/datacite_result_13.json
new file mode 100644
index 00000000..3da3816d
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_13.json
@@ -0,0 +1,28 @@
+{
+    "extra": {},
+    "title": "[M\u00fcssen wir des Gl\u00fccks uns sch\u00e4men?]",
+    "release_type": "article-journal",
+    "release_stage": "published",
+    "release_date": "1940-10-05",
+    "release_year": 1940,
+    "ext_ids": {
+        "doi": "10.5169/seals-314104"
+    },
+    "publisher": "Buchdruckerei B\u00fcchler & Co.",
+    "contribs": [
+        {
+            "index": 0,
+            "raw_name": "O.M.",
+            "role": "author"
+        },
+        {
+            "index": 1,
+            "raw_name": "Hiltbrunner, Hermann",
+            "given_name": "Hermann",
+            "surname": "Hiltbrunner",
+            "role": "author"
+        }
+    ],
+    "refs": [],
+    "abstracts": []
+}
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_14.json b/python/tests/files/datacite/datacite_result_14.json
new file mode 100644
index 00000000..94c00472
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_14.json
@@ -0,0 +1,110 @@
+{
+    "extra": {
+        "datacite": {
+            "subjects": [
+                {
+                    "subject": "Crystal Structure"
+                },
+                {
+                    "subject": "Experimental 3D Coordinates"
+                },
+                {
+                    "subject": "Crystal System"
+                },
+                {
+                    "subject": "Space Group"
+                },
+                {
+                    "subject": "Cell Parameters"
+                },
+                {
+                    "subject": "Crystallography"
+                },
+                {
+                    "subject": "bis(mu~2~-5-(3,5-Di-t-butylphenyl)-15-(4-(2-(diphenylphosphino)ethynyl)phenyl)-2,8,12,18-tetrahexyl-3,7,13,17-tetramethylporphyrinato)-(5,15-bis(3,5-di-t-butylphenyl)-2,8,12,18-tetraethyl-3,7,13,17-tetramethylporphyrinato)-di-nickel-ruthenium chloroform solvate"
+                }
+            ],
+            "relations": [
+                {
+                    "relationType": "IsSupplementTo",
+                    "relatedIdentifier": "10.1021/ic034699w",
+                    "relatedIdentifierType": "DOI"
+                }
+            ]
+        }
+    },
+    "title": "CCDC 222635: Experimental Crystal Structure Determination",
+    "release_type": "dataset",
+    "release_stage": "published",
+    "release_year": 2004,
+    "ext_ids": {
+        "doi": "10.5517/cc7gns3"
+    },
+    "publisher": "Cambridge Crystallographic Data Centre",
+    "language": "en",
+    "contribs": [
+        {
+            "index": 0,
+            "raw_name": "Stulz, E.",
+            "given_name": "E.",
+            "surname": "Stulz",
+            "role": "author"
+        },
+        {
+            "index": 1,
+            "raw_name": "Scott, S.M.",
+            "given_name": "S.M.",
+            "surname": "Scott",
+            "role": "author"
+        },
+        {
+            "index": 2,
+            "raw_name": "Ng, Yiu-Fai",
+            "given_name": "Yiu-Fai",
+            "surname": "Ng",
+            "role": "author"
+        },
+        {
+            "index": 3,
+            "raw_name": "Bond, A.D.",
+            "given_name": "A.D.",
+            "surname": "Bond",
+            "role": "author"
+        },
+        {
+            "index": 4,
+            "raw_name": "Teat, S.J.",
+            "given_name": "S.J.",
+            "surname": "Teat",
+            "role": "author"
+        },
+        {
+            "index": 5,
+            "raw_name": "Darling, S.L.",
+            "given_name": "S.L.",
+            "surname": "Darling",
+            "role": "author"
+        },
+        {
+            "index": 6,
+            "raw_name": "Feeder, N.",
+            "given_name": "N.",
+            "surname": "Feeder",
+            "role": "author"
+        },
+        {
+            "index": 7,
+            "raw_name": "Sanders, J.K.M.",
+            "given_name": "J.K.M.",
+            "surname": "Sanders",
+            "role": "author"
+        }
+    ],
+    "refs": [],
+    "abstracts": [
+        {
+            "content": "An entry from the Cambridge Structural Database, the world\u2019s repository for small molecule crystal structures. The entry contains experimental data from a crystal diffraction study. The deposited dataset for this entry is freely available from the CCDC and typically includes 3D coordinates, cell parameters, space group, experimental conditions and quality measures.",
+            "mimetype": "text/plain"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_15.json b/python/tests/files/datacite/datacite_result_15.json
new file mode 100644
index 00000000..0614f6ba
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_15.json
@@ -0,0 +1,22 @@
+{
+    "extra": {},
+    "title": "Parramore Island of the Virginia Coast Reserve Permanent Plot Resurvey: Tree data 1997",
+    "release_type": "dataset",
+    "release_stage": "published",
+    "release_year": 2017,
+    "ext_ids": {
+        "doi": "10.6073/pasta/95296d8416aae24f3d39b4ecb27f0b28"
+    },
+    "publisher": "Environmental Data Initiative",
+    "contribs": [
+        {
+            "index": 0,
+            "raw_name": "Richardson, David",
+            "given_name": "David",
+            "surname": "Richardson",
+            "role": "author"
+        }
+    ],
+    "refs": [],
+    "abstracts": []
+}
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_16.json b/python/tests/files/datacite/datacite_result_16.json
new file mode 100644
index 00000000..1d861cf6
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_16.json
@@ -0,0 +1,31 @@
+{
+    "extra": {
+        "datacite": {
+            "license": [
+                {
+                    "rights": "CC-BY",
+                    "rightsUri": "http://creativecommons.org/licenses/by/3.0/us"
+                }
+            ]
+        }
+    },
+    "title": "Testing the Connectivity of Networks",
+    "release_type": "dataset",
+    "release_stage": "published",
+    "release_year": 2014,
+    "ext_ids": {
+        "doi": "10.6084/m9.figshare.1282478"
+    },
+    "publisher": "Figshare",
+    "contribs": [
+        {
+            "index": 0,
+            "raw_name": "Sochi, Taha",
+            "given_name": "Taha",
+            "surname": "Sochi",
+            "role": "author"
+        }
+    ],
+    "refs": [],
+    "abstracts": []
+}
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_17.json b/python/tests/files/datacite/datacite_result_17.json
new file mode 100644
index 00000000..0852a09e
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_17.json
@@ -0,0 +1,20 @@
+{
+    "extra": {},
+    "title": "gel_BSA-FITC_Markov_segmntation0343.tif",
+    "release_type": "dataset",
+    "release_stage": "published",
+    "release_year": 2018,
+    "ext_ids": {
+        "doi": "10.7910/dvn/tsqfwc/yytj22"
+    },
+    "publisher": "Harvard Dataverse",
+    "contribs": [
+        {
+            "index": 0,
+            "raw_name": "Di Giovanna, Antonino Paolo (University Of Florence)",
+            "role": "author"
+        }
+    ],
+    "refs": [],
+    "abstracts": []
+}
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_18.json b/python/tests/files/datacite/datacite_result_18.json
new file mode 100644
index 00000000..12ab39fe
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_18.json
@@ -0,0 +1,15 @@
+{
+    "extra": {},
+    "title": "Eastern questionnaire, answer sheet for Interviewee 53215, page 064",
+    "release_type": "article",
+    "release_stage": "published",
+    "release_date": "2017-08-21",
+    "release_year": 2017,
+    "ext_ids": {
+        "doi": "10.7916/d81z522m"
+    },
+    "publisher": "Columbia University",
+    "contribs": [],
+    "refs": [],
+    "abstracts": []
+}
diff --git a/python/tests/files/datacite/datacite_result_19.json b/python/tests/files/datacite/datacite_result_19.json
new file mode 100644
index 00000000..1505db92
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_19.json
@@ -0,0 +1,15 @@
+{
+    "extra": {},
+    "title": "Eastern questionnaire, answer sheet for Interviewee 55236, page 092",
+    "release_type": "article",
+    "release_stage": "published",
+    "release_date": "2017-08-24",
+    "release_year": 2017,
+    "ext_ids": {
+        "doi": "10.7916/d86x0cg1"
+    },
+    "publisher": "Columbia University",
+    "contribs": [],
+    "refs": [],
+    "abstracts": []
+}
diff --git a/python/tests/files/datacite/datacite_result_20.json b/python/tests/files/datacite/datacite_result_20.json
new file mode 100644
index 00000000..1868eede
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_20.json
@@ -0,0 +1,14 @@
+{
+    "extra": {},
+    "title": "<h1>Eastern questionnaire</h1>",
+    "release_type": "article",
+    "release_stage": "published",
+    "release_date": "2017-08-24",
+    "release_year": 2017,
+    "ext_ids": {
+        "doi": "10.7916/d86x0cg1"
+    },
+    "contribs": [],
+    "refs": [],
+    "abstracts": []
+}
diff --git a/python/tests/files/datacite/datacite_result_21.json b/python/tests/files/datacite/datacite_result_21.json
new file mode 100644
index 00000000..9214065a
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_21.json
@@ -0,0 +1,15 @@
+{
+    "extra": {},
+    "title": "ABC",
+    "release_type": "article",
+    "release_stage": "published",
+    "release_date": "2017-08-24",
+    "release_year": 2017,
+    "ext_ids": {
+        "doi": "10.7916/d86x0cg1"
+    },
+    "contribs": [],
+    "refs": [],
+    "abstracts": [],
+    "language": "de"
+}
diff --git a/python/tests/files/datacite/datacite_result_22.json b/python/tests/files/datacite/datacite_result_22.json
new file mode 100644
index 00000000..e9939e09
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_22.json
@@ -0,0 +1,22 @@
+{
+    "extra": {},
+    "title": "ABC",
+    "release_type": "article",
+    "release_stage": "published",
+    "release_date": "2017-08-24",
+    "release_year": 2017,
+    "ext_ids": {
+        "doi": "10.7916/d86x0cg1"
+    },
+    "contribs": [
+        {
+            "raw_affiliation": "Department of pataphysics",
+            "index": 0,
+            "raw_name": "Anton Welch",
+            "role": "author"
+        }
+    ],
+    "refs": [],
+    "abstracts": [],
+    "language": "de"
+}
diff --git a/python/tests/files/datacite/datacite_result_23.json b/python/tests/files/datacite/datacite_result_23.json
new file mode 100644
index 00000000..2bf66eae
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_23.json
@@ -0,0 +1,22 @@
+{
+    "extra": {},
+    "title": "ABC",
+    "release_type": "article",
+    "release_stage": "published",
+    "release_date": "2017-08-24",
+    "release_year": 2017,
+    "ext_ids": {
+        "doi": "10.7916/d86x0cg1-xxx"
+    },
+    "contribs": [
+        {
+            "index": 0,
+            "raw_name": "Anton Welch",
+            "role": "author",
+            "raw_affiliation": "Department of pataphysics"
+        }
+    ],
+    "refs": [],
+    "abstracts": [],
+    "language": "de"
+}
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index bc47a185..cdc165d7 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -7,7 +7,8 @@ import datetime
 import pytest
 import gzip
 from fatcat_tools.importers import DataciteImporter, JsonLinePusher
-from fatcat_tools.importers.datacite import find_original_language_title, parse_datacite_titles, parse_datacite_dates
+from fatcat_tools.importers.datacite import find_original_language_title, parse_datacite_titles, parse_datacite_dates, clean_doi
+from fatcat_tools.transforms import entity_to_dict
 from fixtures import api
 import json
 
@@ -270,3 +271,26 @@ def test_datacite_dict_parse(datacite_importer):
         assert r.contribs[0].given_name == None
         assert r.contribs[0].surname == None
         assert len(r.refs) == 0
+
+def test_clean_doi():
+    assert clean_doi("10.25513/1812-3996.2017.1.34\u201342") == "10.25513/1812-3996.2017.1.34-42"
+    assert "123" == clean_doi("123")
+
+def test_datacite_conversions(datacite_importer):
+    """
+    Datacite JSON to release entity JSON representation. The count is hardcoded
+    for now.
+    """
+    datacite_importer.debug = True
+    for i in range(24):
+        src = 'tests/files/datacite/datacite_doc_{0:02d}.json'.format(i)
+        dst = 'tests/files/datacite/datacite_result_{0:02d}.json'.format(i)
+        print('testing mapping from {} => {}'.format(src, dst))
+        with open(src, 'r') as f:
+            re = datacite_importer.parse_record(json.load(f))
+            result = entity_to_dict(re)
+        with open(dst, 'r') as f:
+           expected = json.loads(f.read())
+
+        assert result == expected
+
-- 
cgit v1.2.3


From be43049db0da2df4343bd5e1392d6c5201fc67d0 Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Thu, 2 Jan 2020 18:11:35 +0100
Subject: datacite: address raw_name index form comment

> The convention for display_name and raw_name is to be how the name
would normally be printed, not in index form (surname comma given_name).
So we might need to un-encode names like "Tricart, Pierre".

Use an additional `index_form_to_display_name` function to convert index
from to display form, heuristically.
---
 python/fatcat_tools/importers/datacite.py          |  43 +++++++
 .../tests/files/datacite/datacite_result_00.json   |   4 +-
 .../tests/files/datacite/datacite_result_01.json   |   2 +-
 .../tests/files/datacite/datacite_result_02.json   |   2 +-
 .../tests/files/datacite/datacite_result_04.json   |   2 +-
 .../tests/files/datacite/datacite_result_05.json   | 142 ++++++++++-----------
 .../tests/files/datacite/datacite_result_07.json   |   6 +-
 .../tests/files/datacite/datacite_result_08.json   |   4 +-
 .../tests/files/datacite/datacite_result_09.json   |   2 +-
 .../tests/files/datacite/datacite_result_12.json   |   8 +-
 .../tests/files/datacite/datacite_result_13.json   |   2 +-
 .../tests/files/datacite/datacite_result_14.json   |  16 +--
 .../tests/files/datacite/datacite_result_15.json   |   2 +-
 .../tests/files/datacite/datacite_result_16.json   |   2 +-
 .../tests/files/datacite/datacite_result_18.json   |   2 +-
 .../tests/files/datacite/datacite_result_19.json   |   2 +-
 .../tests/files/datacite/datacite_result_20.json   |   2 +-
 .../tests/files/datacite/datacite_result_21.json   |   6 +-
 .../tests/files/datacite/datacite_result_22.json   |  10 +-
 .../tests/files/datacite/datacite_result_23.json   |   6 +-
 python/tests/import_datacite.py                    |  18 ++-
 21 files changed, 171 insertions(+), 112 deletions(-)

(limited to 'python/tests')

diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index a03587c0..bd135569 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -331,6 +331,10 @@ class DataciteImporter(EntityImporter):
                 if name in ('(:Unav)', 'NA', 'NN', '(:Null)'):
                     continue
 
+                # Unpack name, if we have an index form (e.g. 'Razis, Panos A') into 'Panos A razis'.
+                if name:
+                    name = index_form_to_display_name(name)
+
                 contribs.append(
                     fatcat_openapi_client.ReleaseContrib(
                         creator_id=creator_id,
@@ -859,3 +863,42 @@ def clean_doi(doi):
         doi = doi.replace(c, "-")
     return doi
 
+def index_form_to_display_name(s):
+    """
+    Try to convert an index form name, like 'Razis, Panos A' into display_name,
+    e.g. 'Panos A Razis'.
+    """
+    if ',' not in s:
+        return s
+    skip_on_chars = ['(', ')', '*']
+    for char in skip_on_chars:
+        if char in s:
+            return s
+    if s.count(',') > 1:
+        # "Dr. Hina, Dr. Muhammad Usman Shahid, Dr. Muhammad Zeeshan Khan"
+        return s
+    stopwords = [
+        'Archive',
+        'Collection',
+        'Coordinator',
+        'Department',
+        'Germany',
+        'International',
+        'National',
+        'Netherlands',
+        'Office',
+        'Organisation',
+        'Organization',
+        'Service',
+        'Services',
+        'United States',
+        'University',
+        'Verein',
+        'Volkshochschule',
+    ]
+    for stop in stopwords:
+        if stop.lower() in s.lower():
+            return s
+
+    a, b = s.split(',')
+    return '{} {}'.format(b.strip(), a.strip())
diff --git a/python/tests/files/datacite/datacite_result_00.json b/python/tests/files/datacite/datacite_result_00.json
index 085e23f3..a4b28076 100644
--- a/python/tests/files/datacite/datacite_result_00.json
+++ b/python/tests/files/datacite/datacite_result_00.json
@@ -32,14 +32,14 @@
     "contribs": [
         {
             "index": 0,
-            "raw_name": "Li, Qian-Jin",
+            "raw_name": "Qian-Jin Li",
             "given_name": "Qian-Jin",
             "surname": "Li",
             "role": "author"
         },
         {
             "index": 1,
-            "raw_name": "Yang, Chun-Long",
+            "raw_name": "Chun-Long Yang",
             "given_name": "Chun-Long",
             "surname": "Yang",
             "role": "author"
diff --git a/python/tests/files/datacite/datacite_result_01.json b/python/tests/files/datacite/datacite_result_01.json
index f8c6b930..46be2515 100644
--- a/python/tests/files/datacite/datacite_result_01.json
+++ b/python/tests/files/datacite/datacite_result_01.json
@@ -21,7 +21,7 @@
     "contribs": [
         {
             "index": 0,
-            "raw_name": "Dargenty, G.",
+            "raw_name": "G. Dargenty",
             "given_name": "G.",
             "surname": "Dargenty",
             "role": "author"
diff --git a/python/tests/files/datacite/datacite_result_02.json b/python/tests/files/datacite/datacite_result_02.json
index f8b85f38..bdcb4951 100644
--- a/python/tests/files/datacite/datacite_result_02.json
+++ b/python/tests/files/datacite/datacite_result_02.json
@@ -25,7 +25,7 @@
     "contribs": [
         {
             "index": 0,
-            "raw_name": "Weyersberg, Albert",
+            "raw_name": "Albert Weyersberg",
             "given_name": "Albert",
             "surname": "Weyersberg",
             "role": "author"
diff --git a/python/tests/files/datacite/datacite_result_04.json b/python/tests/files/datacite/datacite_result_04.json
index 7ca70d6c..54b19ef9 100644
--- a/python/tests/files/datacite/datacite_result_04.json
+++ b/python/tests/files/datacite/datacite_result_04.json
@@ -12,7 +12,7 @@
     "contribs": [
         {
             "index": 0,
-            "raw_name": "Nicollerat, Marc Andre",
+            "raw_name": "Marc Andre Nicollerat",
             "given_name": "Marc Andre",
             "surname": "Nicollerat",
             "role": "author"
diff --git a/python/tests/files/datacite/datacite_result_05.json b/python/tests/files/datacite/datacite_result_05.json
index e61769de..a790c26e 100644
--- a/python/tests/files/datacite/datacite_result_05.json
+++ b/python/tests/files/datacite/datacite_result_05.json
@@ -24,497 +24,497 @@
     "contribs": [
         {
             "index": 0,
-            "raw_name": "K\u00f5ljalg, Urmas",
+            "raw_name": "Urmas K\u00f5ljalg",
             "given_name": "Urmas",
             "surname": "K\u00f5ljalg",
             "role": "author"
         },
         {
             "index": 1,
-            "raw_name": "Abarenkov, Kessy",
+            "raw_name": "Kessy Abarenkov",
             "given_name": "Kessy",
             "surname": "Abarenkov",
             "role": "author"
         },
         {
             "index": 2,
-            "raw_name": "Nilsson, R. Henrik",
+            "raw_name": "R. Henrik Nilsson",
             "given_name": "R. Henrik",
             "surname": "Nilsson",
             "role": "author"
         },
         {
             "index": 3,
-            "raw_name": "Larsson, Karl-Henrik",
+            "raw_name": "Karl-Henrik Larsson",
             "given_name": "Karl-Henrik",
             "surname": "Larsson",
             "role": "author"
         },
         {
             "index": 4,
-            "raw_name": "Aas, Anders Bj\u00f8rnsgard",
+            "raw_name": "Anders Bj\u00f8rnsgard Aas",
             "given_name": "Anders Bj\u00f8rnsgard",
             "surname": "Aas",
             "role": "author"
         },
         {
             "index": 5,
-            "raw_name": "Adams, Rachel",
+            "raw_name": "Rachel Adams",
             "given_name": "Rachel",
             "surname": "Adams",
             "role": "author"
         },
         {
             "index": 6,
-            "raw_name": "Alves, Artur",
+            "raw_name": "Artur Alves",
             "given_name": "Artur",
             "surname": "Alves",
             "role": "author"
         },
         {
             "index": 7,
-            "raw_name": "Ammirati, Joseph F.",
+            "raw_name": "Joseph F. Ammirati",
             "given_name": "Joseph F.",
             "surname": "Ammirati",
             "role": "author"
         },
         {
             "index": 8,
-            "raw_name": "Arnold, A. Elizabeth",
+            "raw_name": "A. Elizabeth Arnold",
             "given_name": "A. Elizabeth",
             "surname": "Arnold",
             "role": "author"
         },
         {
             "index": 9,
-            "raw_name": "Bahram, Mohammad",
+            "raw_name": "Mohammad Bahram",
             "given_name": "Mohammad",
             "surname": "Bahram",
             "role": "author"
         },
         {
             "index": 10,
-            "raw_name": "Bengtsson-Palme, Johan",
+            "raw_name": "Johan Bengtsson-Palme",
             "given_name": "Johan",
             "surname": "Bengtsson-Palme",
             "role": "author"
         },
         {
             "index": 11,
-            "raw_name": "Berlin, Anna",
+            "raw_name": "Anna Berlin",
             "given_name": "Anna",
             "surname": "Berlin",
             "role": "author"
         },
         {
             "index": 12,
-            "raw_name": "Botnen, Synn\u00f8ve",
+            "raw_name": "Synn\u00f8ve Botnen",
             "given_name": "Synn\u00f8ve",
             "surname": "Botnen",
             "role": "author"
         },
         {
             "index": 13,
-            "raw_name": "Bourlat, Sarah",
+            "raw_name": "Sarah Bourlat",
             "given_name": "Sarah",
             "surname": "Bourlat",
             "role": "author"
         },
         {
             "index": 14,
-            "raw_name": "Cheeke, Tanya",
+            "raw_name": "Tanya Cheeke",
             "given_name": "Tanya",
             "surname": "Cheeke",
             "role": "author"
         },
         {
             "index": 15,
-            "raw_name": "Dima, B\u00e1lint",
+            "raw_name": "B\u00e1lint Dima",
             "given_name": "B\u00e1lint",
             "surname": "Dima",
             "role": "author"
         },
         {
             "index": 16,
-            "raw_name": "Drenkhan, Rein",
+            "raw_name": "Rein Drenkhan",
             "given_name": "Rein",
             "surname": "Drenkhan",
             "role": "author"
         },
         {
             "index": 17,
-            "raw_name": "Duarte, Camila",
+            "raw_name": "Camila Duarte",
             "given_name": "Camila",
             "surname": "Duarte",
             "role": "author"
         },
         {
             "index": 18,
-            "raw_name": "Due\u00f1as, Margarita",
+            "raw_name": "Margarita Due\u00f1as",
             "given_name": "Margarita",
             "surname": "Due\u00f1as",
             "role": "author"
         },
         {
             "index": 19,
-            "raw_name": "Eberhardt, Ursula",
+            "raw_name": "Ursula Eberhardt",
             "given_name": "Ursula",
             "surname": "Eberhardt",
             "role": "author"
         },
         {
             "index": 20,
-            "raw_name": "Friberg, Hanna",
+            "raw_name": "Hanna Friberg",
             "given_name": "Hanna",
             "surname": "Friberg",
             "role": "author"
         },
         {
             "index": 21,
-            "raw_name": "Fr\u00f8slev, Tobias G.",
+            "raw_name": "Tobias G. Fr\u00f8slev",
             "given_name": "Tobias G.",
             "surname": "Fr\u00f8slev",
             "role": "author"
         },
         {
             "index": 22,
-            "raw_name": "Garnica, Sigisfredo",
+            "raw_name": "Sigisfredo Garnica",
             "given_name": "Sigisfredo",
             "surname": "Garnica",
             "role": "author"
         },
         {
             "index": 23,
-            "raw_name": "Geml, J\u00f3zsef",
+            "raw_name": "J\u00f3zsef Geml",
             "given_name": "J\u00f3zsef",
             "surname": "Geml",
             "role": "author"
         },
         {
             "index": 24,
-            "raw_name": "Ghobad-Nejhad, Masoomeh",
+            "raw_name": "Masoomeh Ghobad-Nejhad",
             "given_name": "Masoomeh",
             "surname": "Ghobad-Nejhad",
             "role": "author"
         },
         {
             "index": 25,
-            "raw_name": "Grebenc, Tine",
+            "raw_name": "Tine Grebenc",
             "given_name": "Tine",
             "surname": "Grebenc",
             "role": "author"
         },
         {
             "index": 26,
-            "raw_name": "Griffith, Gareth W.",
+            "raw_name": "Gareth W. Griffith",
             "given_name": "Gareth W.",
             "surname": "Griffith",
             "role": "author"
         },
         {
             "index": 27,
-            "raw_name": "Hampe, Felix",
+            "raw_name": "Felix Hampe",
             "given_name": "Felix",
             "surname": "Hampe",
             "role": "author"
         },
         {
             "index": 28,
-            "raw_name": "Kennedy, Peter",
+            "raw_name": "Peter Kennedy",
             "given_name": "Peter",
             "surname": "Kennedy",
             "role": "author"
         },
         {
             "index": 29,
-            "raw_name": "Khomich, Maryia",
+            "raw_name": "Maryia Khomich",
             "given_name": "Maryia",
             "surname": "Khomich",
             "role": "author"
         },
         {
             "index": 30,
-            "raw_name": "Kohout, Petr",
+            "raw_name": "Petr Kohout",
             "given_name": "Petr",
             "surname": "Kohout",
             "role": "author"
         },
         {
             "index": 31,
-            "raw_name": "Kollom, Anu",
+            "raw_name": "Anu Kollom",
             "given_name": "Anu",
             "surname": "Kollom",
             "role": "author"
         },
         {
             "index": 32,
-            "raw_name": "Larsson, Ellen",
+            "raw_name": "Ellen Larsson",
             "given_name": "Ellen",
             "surname": "Larsson",
             "role": "author"
         },
         {
             "index": 33,
-            "raw_name": "Laszlo, Irinyi",
+            "raw_name": "Irinyi Laszlo",
             "given_name": "Irinyi",
             "surname": "Laszlo",
             "role": "author"
         },
         {
             "index": 34,
-            "raw_name": "Leavitt, Steven",
+            "raw_name": "Steven Leavitt",
             "given_name": "Steven",
             "surname": "Leavitt",
             "role": "author"
         },
         {
             "index": 35,
-            "raw_name": "Liimatainen, Kare",
+            "raw_name": "Kare Liimatainen",
             "given_name": "Kare",
             "surname": "Liimatainen",
             "role": "author"
         },
         {
             "index": 36,
-            "raw_name": "Lindahl, Bj\u00f6rn",
+            "raw_name": "Bj\u00f6rn Lindahl",
             "given_name": "Bj\u00f6rn",
             "surname": "Lindahl",
             "role": "author"
         },
         {
             "index": 37,
-            "raw_name": "Lodge, Deborah J.",
+            "raw_name": "Deborah J. Lodge",
             "given_name": "Deborah J.",
             "surname": "Lodge",
             "role": "author"
         },
         {
             "index": 38,
-            "raw_name": "Lumbsch, Helge Thorsten",
+            "raw_name": "Helge Thorsten Lumbsch",
             "given_name": "Helge Thorsten",
             "surname": "Lumbsch",
             "role": "author"
         },
         {
             "index": 39,
-            "raw_name": "Mart\u00edn Esteban, Mar\u00eda Paz",
+            "raw_name": "Mar\u00eda Paz Mart\u00edn Esteban",
             "given_name": "Mar\u00eda Paz",
             "surname": "Mart\u00edn Esteban",
             "role": "author"
         },
         {
             "index": 40,
-            "raw_name": "Meyer, Wieland",
+            "raw_name": "Wieland Meyer",
             "given_name": "Wieland",
             "surname": "Meyer",
             "role": "author"
         },
         {
             "index": 41,
-            "raw_name": "Miettinen, Otto",
+            "raw_name": "Otto Miettinen",
             "given_name": "Otto",
             "surname": "Miettinen",
             "role": "author"
         },
         {
             "index": 42,
-            "raw_name": "Nguyen, Nhu",
+            "raw_name": "Nhu Nguyen",
             "given_name": "Nhu",
             "surname": "Nguyen",
             "role": "author"
         },
         {
             "index": 43,
-            "raw_name": "Niskanen, Tuula",
+            "raw_name": "Tuula Niskanen",
             "given_name": "Tuula",
             "surname": "Niskanen",
             "role": "author"
         },
         {
             "index": 44,
-            "raw_name": "Oono, Ryoko",
+            "raw_name": "Ryoko Oono",
             "given_name": "Ryoko",
             "surname": "Oono",
             "role": "author"
         },
         {
             "index": 45,
-            "raw_name": "\u00d6pik, Maarja",
+            "raw_name": "Maarja \u00d6pik",
             "given_name": "Maarja",
             "surname": "\u00d6pik",
             "role": "author"
         },
         {
             "index": 46,
-            "raw_name": "Ordynets, Alexander",
+            "raw_name": "Alexander Ordynets",
             "given_name": "Alexander",
             "surname": "Ordynets",
             "role": "author"
         },
         {
             "index": 47,
-            "raw_name": "Paw\u0142owska, Julia",
+            "raw_name": "Julia Paw\u0142owska",
             "given_name": "Julia",
             "surname": "Paw\u0142owska",
             "role": "author"
         },
         {
             "index": 48,
-            "raw_name": "Peintner, Ursula",
+            "raw_name": "Ursula Peintner",
             "given_name": "Ursula",
             "surname": "Peintner",
             "role": "author"
         },
         {
             "index": 49,
-            "raw_name": "Pereira, Olinto Liparini",
+            "raw_name": "Olinto Liparini Pereira",
             "given_name": "Olinto Liparini",
             "surname": "Pereira",
             "role": "author"
         },
         {
             "index": 50,
-            "raw_name": "Pinho, Danilo Batista",
+            "raw_name": "Danilo Batista Pinho",
             "given_name": "Danilo Batista",
             "surname": "Pinho",
             "role": "author"
         },
         {
             "index": 51,
-            "raw_name": "P\u00f5ldmaa, Kadri",
+            "raw_name": "Kadri P\u00f5ldmaa",
             "given_name": "Kadri",
             "surname": "P\u00f5ldmaa",
             "role": "author"
         },
         {
             "index": 52,
-            "raw_name": "Runnel, Kadri",
+            "raw_name": "Kadri Runnel",
             "given_name": "Kadri",
             "surname": "Runnel",
             "role": "author"
         },
         {
             "index": 53,
-            "raw_name": "Ryberg, Martin",
+            "raw_name": "Martin Ryberg",
             "given_name": "Martin",
             "surname": "Ryberg",
             "role": "author"
         },
         {
             "index": 54,
-            "raw_name": "Saar, Irja",
+            "raw_name": "Irja Saar",
             "given_name": "Irja",
             "surname": "Saar",
             "role": "author"
         },
         {
             "index": 55,
-            "raw_name": "Sanli, Kemal",
+            "raw_name": "Kemal Sanli",
             "given_name": "Kemal",
             "surname": "Sanli",
             "role": "author"
         },
         {
             "index": 56,
-            "raw_name": "Scott, James",
+            "raw_name": "James Scott",
             "given_name": "James",
             "surname": "Scott",
             "role": "author"
         },
         {
             "index": 57,
-            "raw_name": "Spirin, Viacheslav",
+            "raw_name": "Viacheslav Spirin",
             "given_name": "Viacheslav",
             "surname": "Spirin",
             "role": "author"
         },
         {
             "index": 58,
-            "raw_name": "Suija, Ave",
+            "raw_name": "Ave Suija",
             "given_name": "Ave",
             "surname": "Suija",
             "role": "author"
         },
         {
             "index": 59,
-            "raw_name": "Svantesson, Sten",
+            "raw_name": "Sten Svantesson",
             "given_name": "Sten",
             "surname": "Svantesson",
             "role": "author"
         },
         {
             "index": 60,
-            "raw_name": "Tadych, Mariusz",
+            "raw_name": "Mariusz Tadych",
             "given_name": "Mariusz",
             "surname": "Tadych",
             "role": "author"
         },
         {
             "index": 61,
-            "raw_name": "Takamatsu, Susumu",
+            "raw_name": "Susumu Takamatsu",
             "given_name": "Susumu",
             "surname": "Takamatsu",
             "role": "author"
         },
         {
             "index": 62,
-            "raw_name": "Tamm, Heidi",
+            "raw_name": "Heidi Tamm",
             "given_name": "Heidi",
             "surname": "Tamm",
             "role": "author"
         },
         {
             "index": 63,
-            "raw_name": "Taylor, AFS.",
+            "raw_name": "AFS. Taylor",
             "given_name": "AFS.",
             "surname": "Taylor",
             "role": "author"
         },
         {
             "index": 64,
-            "raw_name": "Tedersoo, Leho",
+            "raw_name": "Leho Tedersoo",
             "given_name": "Leho",
             "surname": "Tedersoo",
             "role": "author"
         },
         {
             "index": 65,
-            "raw_name": "Telleria, M.T.",
+            "raw_name": "M.T. Telleria",
             "given_name": "M.T.",
             "surname": "Telleria",
             "role": "author"
         },
         {
             "index": 66,
-            "raw_name": "Udayanga, Dhanushka",
+            "raw_name": "Dhanushka Udayanga",
             "given_name": "Dhanushka",
             "surname": "Udayanga",
             "role": "author"
         },
         {
             "index": 67,
-            "raw_name": "Unterseher, Martin",
+            "raw_name": "Martin Unterseher",
             "given_name": "Martin",
             "surname": "Unterseher",
             "role": "author"
         },
         {
             "index": 68,
-            "raw_name": "Volobuev, Sergey",
+            "raw_name": "Sergey Volobuev",
             "given_name": "Sergey",
             "surname": "Volobuev",
             "role": "author"
         },
         {
             "index": 69,
-            "raw_name": "Weiss, Michael",
+            "raw_name": "Michael Weiss",
             "given_name": "Michael",
             "surname": "Weiss",
             "role": "author"
         },
         {
             "index": 70,
-            "raw_name": "Wurzbacher, Christian",
+            "raw_name": "Christian Wurzbacher",
             "given_name": "Christian",
             "surname": "Wurzbacher",
             "role": "author"
diff --git a/python/tests/files/datacite/datacite_result_07.json b/python/tests/files/datacite/datacite_result_07.json
index 324bb663..f572263c 100644
--- a/python/tests/files/datacite/datacite_result_07.json
+++ b/python/tests/files/datacite/datacite_result_07.json
@@ -38,21 +38,21 @@
     "contribs": [
         {
             "index": 0,
-            "raw_name": "ROTHUIZEN, E.",
+            "raw_name": "E. ROTHUIZEN",
             "given_name": "E.",
             "surname": "ROTHUIZEN",
             "role": "author"
         },
         {
             "index": 1,
-            "raw_name": "ELMEGAARD, B.",
+            "raw_name": "B. ELMEGAARD",
             "given_name": "B.",
             "surname": "ELMEGAARD",
             "role": "author"
         },
         {
             "index": 2,
-            "raw_name": "MARKUSSEN W., B.",
+            "raw_name": "B. MARKUSSEN W.",
             "given_name": "B.",
             "surname": "MARKUSSEN W.",
             "role": "author"
diff --git a/python/tests/files/datacite/datacite_result_08.json b/python/tests/files/datacite/datacite_result_08.json
index 281c3679..581ca1eb 100644
--- a/python/tests/files/datacite/datacite_result_08.json
+++ b/python/tests/files/datacite/datacite_result_08.json
@@ -30,14 +30,14 @@
     "contribs": [
         {
             "index": 0,
-            "raw_name": "Kajisa, Kei",
+            "raw_name": "Kei Kajisa",
             "given_name": "Kei",
             "surname": "Kajisa",
             "role": "author"
         },
         {
             "index": 1,
-            "raw_name": "Kajisa, Kei",
+            "raw_name": "Kei Kajisa",
             "given_name": "Kei",
             "surname": "Kajisa",
             "role": "author"
diff --git a/python/tests/files/datacite/datacite_result_09.json b/python/tests/files/datacite/datacite_result_09.json
index 01f92f85..db103d2b 100644
--- a/python/tests/files/datacite/datacite_result_09.json
+++ b/python/tests/files/datacite/datacite_result_09.json
@@ -24,7 +24,7 @@
     "contribs": [
         {
             "index": 0,
-            "raw_name": "Kirstaedter, Nils",
+            "raw_name": "Nils Kirstaedter",
             "given_name": "Nils",
             "surname": "Kirstaedter",
             "role": "author"
diff --git a/python/tests/files/datacite/datacite_result_12.json b/python/tests/files/datacite/datacite_result_12.json
index 6b6cad4a..192062e3 100644
--- a/python/tests/files/datacite/datacite_result_12.json
+++ b/python/tests/files/datacite/datacite_result_12.json
@@ -12,28 +12,28 @@
     "contribs": [
         {
             "index": 0,
-            "raw_name": "Spanias, Charalampos",
+            "raw_name": "Charalampos Spanias",
             "given_name": "Charalampos",
             "surname": "Spanias",
             "role": "author"
         },
         {
             "index": 1,
-            "raw_name": "Nikolaidis, Pantelis T",
+            "raw_name": "Pantelis T Nikolaidis",
             "given_name": "Pantelis T",
             "surname": "Nikolaidis",
             "role": "author"
         },
         {
             "index": 2,
-            "raw_name": "Rosemann, Thomas",
+            "raw_name": "Thomas Rosemann",
             "given_name": "Thomas",
             "surname": "Rosemann",
             "role": "author"
         },
         {
             "index": 3,
-            "raw_name": "Knechtle, Beat",
+            "raw_name": "Beat Knechtle",
             "given_name": "Beat",
             "surname": "Knechtle",
             "role": "author"
diff --git a/python/tests/files/datacite/datacite_result_13.json b/python/tests/files/datacite/datacite_result_13.json
index 3da3816d..c8971667 100644
--- a/python/tests/files/datacite/datacite_result_13.json
+++ b/python/tests/files/datacite/datacite_result_13.json
@@ -17,7 +17,7 @@
         },
         {
             "index": 1,
-            "raw_name": "Hiltbrunner, Hermann",
+            "raw_name": "Hermann Hiltbrunner",
             "given_name": "Hermann",
             "surname": "Hiltbrunner",
             "role": "author"
diff --git a/python/tests/files/datacite/datacite_result_14.json b/python/tests/files/datacite/datacite_result_14.json
index 94c00472..94ad000a 100644
--- a/python/tests/files/datacite/datacite_result_14.json
+++ b/python/tests/files/datacite/datacite_result_14.json
@@ -45,56 +45,56 @@
     "contribs": [
         {
             "index": 0,
-            "raw_name": "Stulz, E.",
+            "raw_name": "E. Stulz",
             "given_name": "E.",
             "surname": "Stulz",
             "role": "author"
         },
         {
             "index": 1,
-            "raw_name": "Scott, S.M.",
+            "raw_name": "S.M. Scott",
             "given_name": "S.M.",
             "surname": "Scott",
             "role": "author"
         },
         {
             "index": 2,
-            "raw_name": "Ng, Yiu-Fai",
+            "raw_name": "Yiu-Fai Ng",
             "given_name": "Yiu-Fai",
             "surname": "Ng",
             "role": "author"
         },
         {
             "index": 3,
-            "raw_name": "Bond, A.D.",
+            "raw_name": "A.D. Bond",
             "given_name": "A.D.",
             "surname": "Bond",
             "role": "author"
         },
         {
             "index": 4,
-            "raw_name": "Teat, S.J.",
+            "raw_name": "S.J. Teat",
             "given_name": "S.J.",
             "surname": "Teat",
             "role": "author"
         },
         {
             "index": 5,
-            "raw_name": "Darling, S.L.",
+            "raw_name": "S.L. Darling",
             "given_name": "S.L.",
             "surname": "Darling",
             "role": "author"
         },
         {
             "index": 6,
-            "raw_name": "Feeder, N.",
+            "raw_name": "N. Feeder",
             "given_name": "N.",
             "surname": "Feeder",
             "role": "author"
         },
         {
             "index": 7,
-            "raw_name": "Sanders, J.K.M.",
+            "raw_name": "J.K.M. Sanders",
             "given_name": "J.K.M.",
             "surname": "Sanders",
             "role": "author"
diff --git a/python/tests/files/datacite/datacite_result_15.json b/python/tests/files/datacite/datacite_result_15.json
index 0614f6ba..bdeb8426 100644
--- a/python/tests/files/datacite/datacite_result_15.json
+++ b/python/tests/files/datacite/datacite_result_15.json
@@ -11,7 +11,7 @@
     "contribs": [
         {
             "index": 0,
-            "raw_name": "Richardson, David",
+            "raw_name": "David Richardson",
             "given_name": "David",
             "surname": "Richardson",
             "role": "author"
diff --git a/python/tests/files/datacite/datacite_result_16.json b/python/tests/files/datacite/datacite_result_16.json
index 1d861cf6..ea8c2e59 100644
--- a/python/tests/files/datacite/datacite_result_16.json
+++ b/python/tests/files/datacite/datacite_result_16.json
@@ -20,7 +20,7 @@
     "contribs": [
         {
             "index": 0,
-            "raw_name": "Sochi, Taha",
+            "raw_name": "Taha Sochi",
             "given_name": "Taha",
             "surname": "Sochi",
             "role": "author"
diff --git a/python/tests/files/datacite/datacite_result_18.json b/python/tests/files/datacite/datacite_result_18.json
index 12ab39fe..274858c3 100644
--- a/python/tests/files/datacite/datacite_result_18.json
+++ b/python/tests/files/datacite/datacite_result_18.json
@@ -12,4 +12,4 @@
     "contribs": [],
     "refs": [],
     "abstracts": []
-}
+}
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_19.json b/python/tests/files/datacite/datacite_result_19.json
index 1505db92..8d797268 100644
--- a/python/tests/files/datacite/datacite_result_19.json
+++ b/python/tests/files/datacite/datacite_result_19.json
@@ -12,4 +12,4 @@
     "contribs": [],
     "refs": [],
     "abstracts": []
-}
+}
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_20.json b/python/tests/files/datacite/datacite_result_20.json
index 1868eede..97d7ae75 100644
--- a/python/tests/files/datacite/datacite_result_20.json
+++ b/python/tests/files/datacite/datacite_result_20.json
@@ -11,4 +11,4 @@
     "contribs": [],
     "refs": [],
     "abstracts": []
-}
+}
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_21.json b/python/tests/files/datacite/datacite_result_21.json
index 9214065a..0a05a7cd 100644
--- a/python/tests/files/datacite/datacite_result_21.json
+++ b/python/tests/files/datacite/datacite_result_21.json
@@ -8,8 +8,8 @@
     "ext_ids": {
         "doi": "10.7916/d86x0cg1"
     },
+    "language": "de",
     "contribs": [],
     "refs": [],
-    "abstracts": [],
-    "language": "de"
-}
+    "abstracts": []
+}
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_22.json b/python/tests/files/datacite/datacite_result_22.json
index e9939e09..9e4225b5 100644
--- a/python/tests/files/datacite/datacite_result_22.json
+++ b/python/tests/files/datacite/datacite_result_22.json
@@ -8,15 +8,15 @@
     "ext_ids": {
         "doi": "10.7916/d86x0cg1"
     },
+    "language": "de",
     "contribs": [
         {
-            "raw_affiliation": "Department of pataphysics",
             "index": 0,
             "raw_name": "Anton Welch",
-            "role": "author"
+            "role": "author",
+            "raw_affiliation": "Department of pataphysics"
         }
     ],
     "refs": [],
-    "abstracts": [],
-    "language": "de"
-}
+    "abstracts": []
+}
\ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_23.json b/python/tests/files/datacite/datacite_result_23.json
index 2bf66eae..46f60492 100644
--- a/python/tests/files/datacite/datacite_result_23.json
+++ b/python/tests/files/datacite/datacite_result_23.json
@@ -8,6 +8,7 @@
     "ext_ids": {
         "doi": "10.7916/d86x0cg1-xxx"
     },
+    "language": "de",
     "contribs": [
         {
             "index": 0,
@@ -17,6 +18,5 @@
         }
     ],
     "refs": [],
-    "abstracts": [],
-    "language": "de"
-}
+    "abstracts": []
+}
\ No newline at end of file
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index cdc165d7..3e47fce8 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -7,7 +7,7 @@ import datetime
 import pytest
 import gzip
 from fatcat_tools.importers import DataciteImporter, JsonLinePusher
-from fatcat_tools.importers.datacite import find_original_language_title, parse_datacite_titles, parse_datacite_dates, clean_doi
+from fatcat_tools.importers.datacite import find_original_language_title, parse_datacite_titles, parse_datacite_dates, clean_doi, index_form_to_display_name
 from fatcat_tools.transforms import entity_to_dict
 from fixtures import api
 import json
@@ -294,3 +294,19 @@ def test_datacite_conversions(datacite_importer):
 
         assert result == expected
 
+def test_index_form_to_display_name():
+    Case = collections.namedtuple('Case', 'input output')
+    cases = [
+        Case('', ''),
+        Case('ABC', 'ABC'),
+        Case('International Space Station', 'International Space Station'),
+        Case('Jin, Shan', 'Shan Jin'),
+        Case('Volkshochschule Der Bundesstadt Bonn', 'Volkshochschule Der Bundesstadt Bonn'),
+        Case('Solomon, P. M.', 'P. M. Solomon'),
+        Case('Sujeevan Ratnasingham', 'Sujeevan Ratnasingham'),
+        Case('Paul Stöckli (1906-1991), Künstler', 'Paul Stöckli (1906-1991), Künstler'),
+    ]
+
+    for c in cases:
+        assert c.output == index_form_to_display_name(c.input)
+
-- 
cgit v1.2.3


From b33782cabf60ec8b90338abd4986338683c30b72 Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Thu, 2 Jan 2020 18:52:53 +0100
Subject: datacite: add helper script to create new test case

---
 python/tests/files/datacite/casecreate.sh | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100755 python/tests/files/datacite/casecreate.sh

(limited to 'python/tests')

diff --git a/python/tests/files/datacite/casecreate.sh b/python/tests/files/datacite/casecreate.sh
new file mode 100755
index 00000000..36ea08d2
--- /dev/null
+++ b/python/tests/files/datacite/casecreate.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+#
+# casecreate.sh creates a new test case file pair by copying the last one.
+#
+set -eo pipefail
+
+max=$(find . -name 'datacite_doc_*' | sort -n | tail -1 | grep -Eo '[0-9]+')
+if [ -z $max ]; then
+    echo "failed, expected datacite_doc_[NUMBER]..."
+    exit 1
+fi
+new=$((max+1))
+cp "datacite_doc_$max.json" "datacite_doc_$new.json"
+cp "datacite_result_$max.json" "datacite_result_$new.json"
-- 
cgit v1.2.3


From 9d4385210518266d73964f140f47995774656c3f Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Thu, 2 Jan 2020 19:01:47 +0100
Subject: datacite: open case for editing after creation

---
 python/tests/files/datacite/casecreate.sh | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'python/tests')

diff --git a/python/tests/files/datacite/casecreate.sh b/python/tests/files/datacite/casecreate.sh
index 36ea08d2..82655dc3 100755
--- a/python/tests/files/datacite/casecreate.sh
+++ b/python/tests/files/datacite/casecreate.sh
@@ -12,3 +12,5 @@ fi
 new=$((max+1))
 cp "datacite_doc_$max.json" "datacite_doc_$new.json"
 cp "datacite_result_$max.json" "datacite_result_$new.json"
+
+[ -f ./caseview.sh ] && ./caseview.sh "$new"
-- 
cgit v1.2.3


From 61f0bbfbfdaf41be799fa41c88077806ef913188 Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Thu, 2 Jan 2020 19:02:04 +0100
Subject: datacite: add another test case

---
 python/tests/files/datacite/datacite_doc_24.json   | 48 ++++++++++++++++++++++
 .../tests/files/datacite/datacite_result_24.json   | 22 ++++++++++
 python/tests/import_datacite.py                    |  2 +-
 3 files changed, 71 insertions(+), 1 deletion(-)
 create mode 100644 python/tests/files/datacite/datacite_doc_24.json
 create mode 100644 python/tests/files/datacite/datacite_result_24.json

(limited to 'python/tests')

diff --git a/python/tests/files/datacite/datacite_doc_24.json b/python/tests/files/datacite/datacite_doc_24.json
new file mode 100644
index 00000000..6123350b
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_24.json
@@ -0,0 +1,48 @@
+{
+    "attributes": {
+      "doi": "10.7916/d86x0cg1",
+      "creators": [
+        {
+          "name": "Anton Welch",
+          "affiliation": [
+            "Department of pataphysics"
+          ],
+          "nameIdentifiers": []
+        }
+      ],
+      "titles": [
+        {
+          "title": "ABC"
+        },
+        {
+          "title": "DEF",
+          "titleType": "Subtitle"
+        }
+      ],
+      "publicationYear": 2016,
+      "language": "DE-CH",
+      "types": {
+        "ris": "GEN",
+        "bibtex": "misc",
+        "citeproc": "article",
+        "schemaOrg": "CreativeWork"
+      },
+      "dates": [
+        {
+          "date": "2017-08-24",
+          "dateType": "Created"
+        },
+        {
+          "date": "2019-08-04",
+          "dateType": "Updated"
+        },
+        {
+          "date": "2017",
+          "dateType": "Issued"
+        }
+      ],
+      "isActive": true,
+      "state": "findable"
+    }
+  }
+
diff --git a/python/tests/files/datacite/datacite_result_24.json b/python/tests/files/datacite/datacite_result_24.json
new file mode 100644
index 00000000..42859275
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_24.json
@@ -0,0 +1,22 @@
+{
+    "extra": {},
+    "title": "ABC",
+    "subtitle": "DEF",
+    "release_type": "article",
+    "release_stage": "published",
+    "release_date": "2017-08-24",
+    "release_year": 2017,
+    "ext_ids": {
+        "doi": "10.7916/d86x0cg1"
+    },
+    "contribs": [
+        {
+            "index": 0,
+            "raw_name": "Anton Welch",
+            "role": "author",
+            "raw_affiliation": "Department of pataphysics"
+        }
+    ],
+    "refs": [],
+    "abstracts": []
+}
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index 3e47fce8..54a529c5 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -282,7 +282,7 @@ def test_datacite_conversions(datacite_importer):
     for now.
     """
     datacite_importer.debug = True
-    for i in range(24):
+    for i in range(25):
         src = 'tests/files/datacite/datacite_doc_{0:02d}.json'.format(i)
         dst = 'tests/files/datacite/datacite_result_{0:02d}.json'.format(i)
         print('testing mapping from {} => {}'.format(src, dst))
-- 
cgit v1.2.3


From 391565cbbc0ba17ffd8c4f5d88d4dfda8a8b323c Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Fri, 3 Jan 2020 13:46:05 +0100
Subject: datacite: remove --lang-detect flag

Estimated time for a single call is in the order of 50ms.
---
 python/fatcat_import.py                             |  4 ----
 python/fatcat_tools/importers/datacite.py           | 17 ++++++-----------
 python/tests/files/datacite/datacite_result_04.json |  5 +++--
 python/tests/files/datacite/datacite_result_05.json |  5 +++--
 python/tests/files/datacite/datacite_result_07.json |  5 +++--
 python/tests/files/datacite/datacite_result_08.json |  5 +++--
 python/tests/files/datacite/datacite_result_14.json |  5 +++--
 7 files changed, 21 insertions(+), 25 deletions(-)

(limited to 'python/tests')

diff --git a/python/fatcat_import.py b/python/fatcat_import.py
index a17029cc..6b04d547 100755
--- a/python/fatcat_import.py
+++ b/python/fatcat_import.py
@@ -172,7 +172,6 @@ def run_datacite(args):
         edit_batch_size=args.batch_size,
         bezerk_mode=args.bezerk_mode,
         debug=args.debug,
-        lang_detect=args.lang_detect,
         extid_map_file=args.extid_map_file,
         insert_log_file=args.insert_log_file)
     if args.kafka_mode:
@@ -474,9 +473,6 @@ def main():
     sub_datacite.add_argument('--debug',
         action='store_true',
         help="write converted JSON to stdout")
-    sub_datacite.add_argument('--lang-detect',
-        action='store_true',
-        help="try to detect language (slow)")
     sub_datacite.add_argument('--insert-log-file',
         default='',
         type=str,
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index bd135569..8034a5c1 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -196,7 +196,6 @@ class DataciteImporter(EntityImporter):
                  api,
                  issn_map_file,
                  debug=False,
-                 lang_detect=False,
                  insert_log_file=None,
                  **kwargs):
 
@@ -225,12 +224,9 @@ class DataciteImporter(EntityImporter):
 
         self.read_issn_map_file(issn_map_file)
         self.debug = debug
-        self.lang_detect = lang_detect
         self.insert_log_file = insert_log_file
 
-        print('datacite with debug={}, lang_detect={}'.format(
-            self.debug, self.lang_detect),
-              file=sys.stderr)
+        print('datacite with debug={}'.format(self.debug), file=sys.stderr)
 
     def lookup_ext_ids(self, doi):
         """
@@ -537,12 +533,11 @@ class DataciteImporter(EntityImporter):
             if len(text) > MAX_ABSTRACT_LENGTH:
                 text = text[:MAX_ABSTRACT_LENGTH] + " [...]"
             lang = None
-            if self.lang_detect:
-                try:
-                    lang = langdetect.detect(text)
-                except langdetect.lang_detect_exception.LangDetectException as err:
-                    print('[{}] language detection failed: {}'.format(doi, err),
-                          file=sys.stderr)
+            try:
+                lang = langdetect.detect(text)
+            except langdetect.lang_detect_exception.LangDetectException as err:
+                print('[{}] language detection failed: {}'.format(doi, err),
+                      file=sys.stderr)
             abstracts.append(
                 fatcat_openapi_client.ReleaseAbstract(
                     mimetype="text/plain",
diff --git a/python/tests/files/datacite/datacite_result_04.json b/python/tests/files/datacite/datacite_result_04.json
index 54b19ef9..94fa1f94 100644
--- a/python/tests/files/datacite/datacite_result_04.json
+++ b/python/tests/files/datacite/datacite_result_04.json
@@ -22,7 +22,8 @@
     "abstracts": [
         {
             "content": "Let A be an abelian category, I the full subcategory of A consisting of injective objects of A, and K(A) the category whose objects are cochain complexes of elements of A, and whose morphisms are homotopy classes of cochain maps.  In (5), lemma 4.6., p. 42, R. Hartshorne has proved that, under certain conditions, a cochain complex X\u02d9 \u03b5. |KA)| can be embedded in a complex I\u02d9 \u03b5. |K(I)| in such a way that I\u02d9 has the same cohomology as X\u02d9.  In Chapter I we show that the construction given in the two first parts of Hartshorne's Lemma is natural i.e. there exists a functor  J : K(A) \u2192 K(I) and a natural transformation [formula omitted]  (where E : K(I) \u2192 K(A) is the embedding functor) such that [formula omitted] is  injective and induces isomorphism in cohomology. The question whether the construction given in the third part of the lemma is functorial is still open.  We also prove that J is left adjoint to E, so that K(I) is a reflective subcategory of K(A).  In the special case where A is a category [formula omitted] of left A-modules, and [formula omitted] the category of cochain complexes in [formula omitted] and cochain maps (not homotopy classes), we prove the existence of a functor [formula omitted]  In Chapter II we study the natural homomorphism [formula omitted]   where A, B are rings, and M, L, N modules or chain complexes. In particular we give several sufficient conditions under which v is an isomorphism, or induces isomorphism in homology.  In the appendix we give a detailed proof of Hartshorne's Lemma. We think that this is useful, as no complete proof is, to our knowledge, to be found in the literature.",
-            "mimetype": "text/plain"
+            "mimetype": "text/plain",
+            "lang": "en"
         }
     ]
-}
\ No newline at end of file
+}
diff --git a/python/tests/files/datacite/datacite_result_05.json b/python/tests/files/datacite/datacite_result_05.json
index a790c26e..ff998c0f 100644
--- a/python/tests/files/datacite/datacite_result_05.json
+++ b/python/tests/files/datacite/datacite_result_05.json
@@ -524,7 +524,8 @@
     "abstracts": [
         {
             "content": "UNITE provides a unified way for delimiting, identifying, communicating, and working with DNA-based Species Hypotheses (SH). All fungal ITS sequences in the international nucleotide sequence databases are clustered to approximately the species level by applying a set of dynamic distance values (&lt;0.5 - 3.0%). All species hypotheses are given a unique, stable name in the form of a DOI, and their taxonomic and ecological annotations are verified through distributed, web-based third-party annotation efforts. SHs are connected to a taxon name and its classification as far as possible (phylum, class, order, etc.) by taking into account identifications for all sequences in the SH. An automatically or manually designated sequence is chosen to represent each such SH. These sequences are released (https://unite.ut.ee/repository.php) for use by the scientific community in, for example, local sequence similarity searches and next-generation sequencing analysis pipelines. The system and the data are updated automatically as the number of public fungal ITS sequences grows.",
-            "mimetype": "text/plain"
+            "mimetype": "text/plain",
+            "lang": "en"
         }
     ]
-}
\ No newline at end of file
+}
diff --git a/python/tests/files/datacite/datacite_result_07.json b/python/tests/files/datacite/datacite_result_07.json
index f572263c..f694ddef 100644
--- a/python/tests/files/datacite/datacite_result_07.json
+++ b/python/tests/files/datacite/datacite_result_07.json
@@ -67,7 +67,8 @@
     "abstracts": [
         {
             "content": "The purpose of the ISEC concept is to provide a high-efficient heat pump system for hot water production. The ISEC concept uses two storage tanks for the water, one discharged and one charged. Hot water for the industrial process is tapped from the charged tank, while the other tank is charging. Charging is done by circulating the water in the tank through the condenser of a heat pump several times and thereby gradually heating the water. The charging is done with a higher mass flow rate than the discharging to reach several circulations of the water during the time frame of one discharging. This result in a lower condensing temperature than if the water was heated in one step. Two test setups were built, one to test the performance of the heat pump gradually heating the water and one to investigate the stratification in the storage tanks. Furthermore, a dynamic model of the system was implemented in Dymola, and validated by the use of test data from the two experimental setups. This paper shows that there is a good consistency between the model and the experimental tests.",
-            "mimetype": "text/plain"
+            "mimetype": "text/plain",
+            "lang": "en"
         }
     ]
-}
\ No newline at end of file
+}
diff --git a/python/tests/files/datacite/datacite_result_08.json b/python/tests/files/datacite/datacite_result_08.json
index 581ca1eb..cc0e968b 100644
--- a/python/tests/files/datacite/datacite_result_08.json
+++ b/python/tests/files/datacite/datacite_result_08.json
@@ -47,7 +47,8 @@
     "abstracts": [
         {
             "content": "International society recognizes that the scarcity of fresh water is increasing and farming sectors suffer from lack of irrigation water. However, if we look at this issue with a framework of relative factor endowment, a different view will arise. In emerging states with rapid industrialization and labor migration, labor scarcity increases at a faster pace than that of irrigation water. Using the historical review of Japan\u2019s irrigation policies as well as the case studies of India and China, this paper shows that the introduction of policies which do not reflect the actual relative resource scarcity may mislead the development path. We argue that under increasing relative labor scarcity it is important to realize the substitution of capital for labor for surface irrigation system management and that the substitution needs public support because the service of surface irrigation system has some externalities. Through this argument, this paper also intends to shed the light back to the role of the state for local resource management which seems to be unfairly undervalued since the boom of community participatory approach in the 1980s.",
-            "mimetype": "text/plain"
+            "mimetype": "text/plain",
+            "lang": "en"
         }
     ]
-}
\ No newline at end of file
+}
diff --git a/python/tests/files/datacite/datacite_result_14.json b/python/tests/files/datacite/datacite_result_14.json
index 94ad000a..4521f891 100644
--- a/python/tests/files/datacite/datacite_result_14.json
+++ b/python/tests/files/datacite/datacite_result_14.json
@@ -104,7 +104,8 @@
     "abstracts": [
         {
             "content": "An entry from the Cambridge Structural Database, the world\u2019s repository for small molecule crystal structures. The entry contains experimental data from a crystal diffraction study. The deposited dataset for this entry is freely available from the CCDC and typically includes 3D coordinates, cell parameters, space group, experimental conditions and quality measures.",
-            "mimetype": "text/plain"
+            "mimetype": "text/plain",
+            "lang": "en"
         }
     ]
-}
\ No newline at end of file
+}
-- 
cgit v1.2.3


From e4402d6d4b162d57507d5beb57de88017cea549d Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Fri, 3 Jan 2020 19:51:53 +0100
Subject: datacite: prepare release_month (stub)

---
 python/fatcat_tools/importers/datacite.py | 20 ++++++++++----------
 python/tests/import_datacite.py           | 28 ++++++++++++++--------------
 2 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'python/tests')

diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index d13e855e..45c8a421 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -378,7 +378,7 @@ class DataciteImporter(EntityImporter):
         # "attributes.dates[].dateType", values: "Accepted", "Available"
         # "Collected", "Copyrighted", "Created", "Issued", "Submitted",
         # "Updated", "Valid".
-        release_date, release_year = parse_datacite_dates(
+        release_date, release_month, release_year = parse_datacite_dates(
             attributes.get('dates', []))
 
         # Start with clear stages, e.g. published. TODO(martin): we could
@@ -762,10 +762,10 @@ def parse_datacite_dates(dates):
     Given a list of date fields (under .dates), return tuple, (release_date,
     release_year).
     """
-    release_date, release_year = None, None
+    release_date, release_month, release_year = None, None, None
 
     if not dates:
-        return release_date, release_year
+        return release_date, release_month, release_year
 
     if not isinstance(dates, list):
         raise ValueError('expected a list of date items')
@@ -789,7 +789,7 @@ def parse_datacite_dates(dates):
 
     def parse_item(item):
         result, value, year_only = None, item.get('date', ''), False
-        release_date, release_year = None, None
+        release_date, release_month, release_year = None, None, None
 
         for pattern in common_patterns:
             try:
@@ -808,24 +808,24 @@ def parse_datacite_dates(dates):
             except TypeError as err:
                 print("{} date parsing failed with: {}".format(value, err),
                       file=sys.stderr)
-                return result_date, result_year
+                return result_date, release_month, result_year
 
         if result is None:
             # Unparsable date.
-            return release_date, release_year
+            return release_date, release_month, release_year
 
         if not year_only:
             release_date = result.date()
         release_year = result.year
 
-        return release_date, release_year
+        return release_date, release_month, release_year
 
     for prio in date_type_prio:
         for item in dates:
             if not item.get('dateType') == prio:
                 continue
 
-            release_date, release_year = parse_item(item)
+            release_date, release_month, release_year = parse_item(item)
             if release_date is None and release_year is None:
                 continue
 
@@ -841,11 +841,11 @@ def parse_datacite_dates(dates):
 
     if release_date is None and release_year is None:
         for item in dates:
-            release_date, release_year = parse_item(item)
+            release_date, release_month, release_year = parse_item(item)
             if release_year or release_date:
                 break
 
-    return release_date, release_year
+    return release_date, release_month, release_year
 
 def clean_doi(doi):
     """
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index 54a529c5..29c608ee 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -170,41 +170,41 @@ def test_parse_datacite_dates():
     """
     Case = collections.namedtuple('Case', 'about input result')
     cases = [
-        Case('None is None', None, (None, None)),
-        Case('empty list is None', [], (None, None)),
-        Case('empty item is None', [{}], (None, None)),
-        Case('empty item is None', [{'date': '2019'}], (None, 2019)),
-        Case('first wins', [{'date': '2019'}, {'date': '2020'}], (None, 2019)),
-        Case('skip bogus year', [{'date': 'abc'}, {'date': '2020'}], (None, 2020)),
+        Case('None is None', None, (None, None, None)),
+        Case('empty list is None', [], (None, None, None)),
+        Case('empty item is None', [{}], (None, None, None)),
+        Case('empty item is None', [{'date': '2019'}], (None, None, 2019)),
+        Case('first wins', [{'date': '2019'}, {'date': '2020'}], (None, None, 2019)),
+        Case('skip bogus year', [{'date': 'abc'}, {'date': '2020'}], (None, None, 2020)),
         Case('first with type', [
             {'date': '2019', 'dateType': 'Accepted'}, {'date': '2020'}
-        ], (None, 2019)),
+        ], (None, None, 2019)),
         Case('full date', [
             {'date': '2019-12-01', 'dateType': 'Valid'},
-        ], (datetime.date(2019, 12, 1), 2019)),
+        ], (datetime.date(2019, 12, 1), None, 2019)),
         Case('date type prio', [
             {'date': '2000-12-01', 'dateType': 'Valid'},
             {'date': '2010-01-01', 'dateType': 'Updated'},
-        ], (datetime.date(2000, 12, 1), 2000)),
+        ], (datetime.date(2000, 12, 1), None, 2000)),
         Case('date type prio, Available > Updated', [
             {'date': '2010-01-01', 'dateType': 'Updated'},
             {'date': '2000-12-01', 'dateType': 'Available'},
-        ], (datetime.date(2000, 12, 1), 2000)),
+        ], (datetime.date(2000, 12, 1), None, 2000)),
         Case('allow different date formats, Available > Updated', [
             {'date': '2010-01-01T10:00:00', 'dateType': 'Updated'},
             {'date': '2000-12-01T10:00:00', 'dateType': 'Available'},
-        ], (datetime.date(2000, 12, 1), 2000)),
+        ], (datetime.date(2000, 12, 1), None, 2000)),
         Case('allow different date formats, Available > Updated', [
             {'date': '2010-01-01T10:00:00Z', 'dateType': 'Updated'},
             {'date': '2000-12-01T10:00:00Z', 'dateType': 'Available'},
-        ], (datetime.date(2000, 12, 1), 2000)),
+        ], (datetime.date(2000, 12, 1), None, 2000)),
         Case('allow fuzzy date formats, Available > Updated', [
             {'date': '2010', 'dateType': 'Updated'},
             {'date': '2000 Dec 01', 'dateType': 'Available'},
-        ], (datetime.date(2000, 12, 1), 2000)),
+        ], (datetime.date(2000, 12, 1), None, 2000)),
         Case('ignore broken date', [
             {'date': 'Febrrr 45', 'dateType': 'Updated'},
-        ], (None, None)),
+        ], (None, None, None)),
     ]
     for case in cases:
         result = parse_datacite_dates(case.input)
-- 
cgit v1.2.3


From 55dcece5a476b1492bf6c7f4597a469b48b41264 Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Fri, 3 Jan 2020 22:40:53 +0100
Subject: datacite: parse_datacite_dates returns month

As [...] we will soon add support for release_month field in the release schema.
---
 python/fatcat_tools/importers/datacite.py | 45 ++++++++++++++++++++++++-------
 python/tests/import_datacite.py           | 23 +++++++++++-----
 2 files changed, 51 insertions(+), 17 deletions(-)

(limited to 'python/tests')

diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 45c8a421..5891f8de 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -9,6 +9,7 @@ functions (parse_datacite_...), which can be tested more easily.
 """
 
 from .common import EntityImporter, clean
+import collections
 import dateparser
 import datetime
 import fatcat_openapi_client
@@ -783,43 +784,68 @@ def parse_datacite_dates(dates):
         'Updated',
     )
 
+    # We need to note the granularity, since a string like "2019" would be
+    # parsed into "2019-01-01", even though the month is unknown. Use 3
+    # granularity types: 'y', 'm', 'd'.
+    Pattern = collections.namedtuple('Pattern', 'layout granularity')
+
     # Before using (expensive) dateparser, try a few common patterns.
-    common_patterns = ('%Y-%m-%d', '%Y-%m', '%Y-%m-%dT%H:%M:%SZ',
-                       '%Y-%m-%dT%H:%M:%S', '%Y')
+    common_patterns = (
+        Pattern('%Y-%m-%d', 'd'),
+        Pattern('%Y-%m', 'm'),
+        Pattern('%Y-%m-%dT%H:%M:%SZ', 'd'),
+        Pattern('%Y-%m-%dT%H:%M:%S', 'd'),
+        Pattern('%Y', 'y'),
+    )
 
     def parse_item(item):
         result, value, year_only = None, item.get('date', ''), False
         release_date, release_month, release_year = None, None, None
 
-        for pattern in common_patterns:
+        for layout, granularity in common_patterns:
             try:
-                result = datetime.datetime.strptime(value, pattern)
+                result = datetime.datetime.strptime(value, layout)
             except ValueError:
                 continue
             else:
-                if pattern == '%Y':
+                if granularity == 'y':
                     year_only = True
                 break
 
         if result is None:
             print('fallback for {}'.format(value), file=sys.stderr)
+            parser = dateparser.DateDataParser()
             try:
-                result = dateparser.parse(value)
+                # Results in a dict with keys: date_obj, period, locale.
+                parse_result = parser.get_date_data(value)
+
+                # A datetime object, later we need a date, only.
+                result = parse_result['date_obj']
+                if result is not None:
+                    if parse_result['period'] == 'year':
+                        return None, None, result.year
+                    elif parse_result['period'] == 'month':
+                        return None, result.month, result.year
+                    else:
+                        return result.date(), result.month, result.year
             except TypeError as err:
                 print("{} date parsing failed with: {}".format(value, err),
                       file=sys.stderr)
-                return result_date, release_month, result_year
 
         if result is None:
             # Unparsable date.
             return release_date, release_month, release_year
 
-        if not year_only:
+        if granularity != 'y':
             release_date = result.date()
         release_year = result.year
+        if granularity in ('m', 'd'):
+            release_month = result.month
 
         return release_date, release_month, release_year
 
+    today = datetime.date.today()
+
     for prio in date_type_prio:
         for item in dates:
             if not item.get('dateType') == prio:
@@ -829,8 +855,7 @@ def parse_datacite_dates(dates):
             if release_date is None and release_year is None:
                 continue
 
-            if release_year < 1000 or release_year > datetime.date.today(
-            ).year + 5:
+            if release_year < 1000 or release_year > today.year + 5:
                 # Skip possibly bogus dates.
                 release_year = None
                 continue
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index 29c608ee..c2fcdec9 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -173,7 +173,7 @@ def test_parse_datacite_dates():
         Case('None is None', None, (None, None, None)),
         Case('empty list is None', [], (None, None, None)),
         Case('empty item is None', [{}], (None, None, None)),
-        Case('empty item is None', [{'date': '2019'}], (None, None, 2019)),
+        Case('year only yields year only', [{'date': '2019'}], (None, None, 2019)),
         Case('first wins', [{'date': '2019'}, {'date': '2020'}], (None, None, 2019)),
         Case('skip bogus year', [{'date': 'abc'}, {'date': '2020'}], (None, None, 2020)),
         Case('first with type', [
@@ -181,27 +181,36 @@ def test_parse_datacite_dates():
         ], (None, None, 2019)),
         Case('full date', [
             {'date': '2019-12-01', 'dateType': 'Valid'},
-        ], (datetime.date(2019, 12, 1), None, 2019)),
+        ], (datetime.date(2019, 12, 1), 12, 2019)),
         Case('date type prio', [
             {'date': '2000-12-01', 'dateType': 'Valid'},
             {'date': '2010-01-01', 'dateType': 'Updated'},
-        ], (datetime.date(2000, 12, 1), None, 2000)),
+        ], (datetime.date(2000, 12, 1), 12, 2000)),
         Case('date type prio, Available > Updated', [
             {'date': '2010-01-01', 'dateType': 'Updated'},
             {'date': '2000-12-01', 'dateType': 'Available'},
-        ], (datetime.date(2000, 12, 1), None, 2000)),
+        ], (datetime.date(2000, 12, 1), 12, 2000)),
         Case('allow different date formats, Available > Updated', [
             {'date': '2010-01-01T10:00:00', 'dateType': 'Updated'},
             {'date': '2000-12-01T10:00:00', 'dateType': 'Available'},
-        ], (datetime.date(2000, 12, 1), None, 2000)),
+        ], (datetime.date(2000, 12, 1), 12, 2000)),
         Case('allow different date formats, Available > Updated', [
             {'date': '2010-01-01T10:00:00Z', 'dateType': 'Updated'},
             {'date': '2000-12-01T10:00:00Z', 'dateType': 'Available'},
-        ], (datetime.date(2000, 12, 1), None, 2000)),
+        ], (datetime.date(2000, 12, 1), 12, 2000)),
         Case('allow fuzzy date formats, Available > Updated', [
             {'date': '2010', 'dateType': 'Updated'},
             {'date': '2000 Dec 01', 'dateType': 'Available'},
-        ], (datetime.date(2000, 12, 1), None, 2000)),
+        ], (datetime.date(2000, 12, 1), 12, 2000)),
+        Case('fuzzy year only', [
+            {'date': 'Year 2010', 'dateType': 'Issued'},
+        ], (None, None, 2010)),
+        Case('fuzzy year and month', [
+            {'date': 'Year 2010 Feb', 'dateType': 'Issued'},
+        ], (None, 2, 2010)),
+        Case('fuzzy year, month, day', [
+            {'date': 'Year 2010 Feb 24', 'dateType': 'Issued'},
+        ], (datetime.date(2010, 2, 24), 2, 2010)),
         Case('ignore broken date', [
             {'date': 'Febrrr 45', 'dateType': 'Updated'},
         ], (None, None, None)),
-- 
cgit v1.2.3


From 328d7901df30ba94685d34d6a428e798b4604839 Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Fri, 3 Jan 2020 22:53:23 +0100
Subject: datacite: use normal.clean_doi

---
 python/fatcat_tools/importers/datacite.py | 12 +-----------
 python/tests/import_datacite.py           |  4 ----
 2 files changed, 1 insertion(+), 15 deletions(-)

(limited to 'python/tests')

diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 5891f8de..d0c75b6e 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -20,6 +20,7 @@ import langdetect
 import sqlite3
 import sys
 from fatcat_tools.transforms import entity_to_dict
+from fatcat_tools.normal import clean_doi
 
 
 # Cutoff length for abstracts.
@@ -872,17 +873,6 @@ def parse_datacite_dates(dates):
 
     return release_date, release_month, release_year
 
-def clean_doi(doi):
-    """
-    10.25513/1812-3996.2017.1.34–42 // 8211, Hex 2013, Octal 20023
-    See also: https://github.com/miku/throwaway-check-doi
-
-    Replace unicode HYPHEN..HORIZONTAL BAR with HYPHEN-MINUS.
-    """
-    for c in ('\u2010', '\u2011', '\u2012', '\u2013', '\u2014', '\u2015'):
-        doi = doi.replace(c, "-")
-    return doi
-
 def index_form_to_display_name(s):
     """
     Try to convert an index form name, like 'Razis, Panos A' into display_name,
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index c2fcdec9..881452ed 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -281,10 +281,6 @@ def test_datacite_dict_parse(datacite_importer):
         assert r.contribs[0].surname == None
         assert len(r.refs) == 0
 
-def test_clean_doi():
-    assert clean_doi("10.25513/1812-3996.2017.1.34\u201342") == "10.25513/1812-3996.2017.1.34-42"
-    assert "123" == clean_doi("123")
-
 def test_datacite_conversions(datacite_importer):
     """
     Datacite JSON to release entity JSON representation. The count is hardcoded
-- 
cgit v1.2.3


From e6feb6fd6d48f7b179389e79dfeb994d1b0f797b Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Sat, 4 Jan 2020 00:19:56 +0100
Subject: datacite: always include "datacite" key in extra

> always include extra values for the respective DOI registrars
(datacite, crossref, jalc), even if they are empty ({}), to be used as a
flag so we know which DOI registrar supplied the metadata.
---
 python/fatcat_tools/importers/datacite.py           | 4 ++--
 python/tests/files/datacite/datacite_result_03.json | 4 ++--
 python/tests/files/datacite/datacite_result_04.json | 2 +-
 python/tests/files/datacite/datacite_result_11.json | 4 ++--
 python/tests/files/datacite/datacite_result_12.json | 4 ++--
 python/tests/files/datacite/datacite_result_13.json | 4 ++--
 python/tests/files/datacite/datacite_result_15.json | 4 ++--
 python/tests/files/datacite/datacite_result_17.json | 4 ++--
 python/tests/files/datacite/datacite_result_18.json | 4 ++--
 python/tests/files/datacite/datacite_result_19.json | 4 ++--
 python/tests/files/datacite/datacite_result_20.json | 4 ++--
 python/tests/files/datacite/datacite_result_21.json | 4 ++--
 python/tests/files/datacite/datacite_result_22.json | 4 ++--
 python/tests/files/datacite/datacite_result_23.json | 4 ++--
 python/tests/files/datacite/datacite_result_24.json | 2 +-
 15 files changed, 28 insertions(+), 28 deletions(-)

(limited to 'python/tests')

diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index d0c75b6e..2fad1264 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -604,8 +604,8 @@ class DataciteImporter(EntityImporter):
         if not container_id and container_name:
             extra['container_name'] = container_name
 
-        if extra_datacite:
-            extra['datacite'] = extra_datacite
+        # Always include datacite key, even if value is empty (dict).
+        extra['datacite'] = extra_datacite
 
         extids = self.lookup_ext_ids(doi=doi)
 
diff --git a/python/tests/files/datacite/datacite_result_03.json b/python/tests/files/datacite/datacite_result_03.json
index 3e3c2bd5..e8367e8f 100644
--- a/python/tests/files/datacite/datacite_result_03.json
+++ b/python/tests/files/datacite/datacite_result_03.json
@@ -1,5 +1,5 @@
 {
-    "extra": {},
+    "extra": {"datacite": {}},
     "title": "midterm ah30903",
     "release_type": "article",
     "release_year": 2016,
@@ -16,4 +16,4 @@
     ],
     "refs": [],
     "abstracts": []
-}
\ No newline at end of file
+}
diff --git a/python/tests/files/datacite/datacite_result_04.json b/python/tests/files/datacite/datacite_result_04.json
index 94fa1f94..5b956836 100644
--- a/python/tests/files/datacite/datacite_result_04.json
+++ b/python/tests/files/datacite/datacite_result_04.json
@@ -1,5 +1,5 @@
 {
-    "extra": {},
+    "extra": {"datacite": {}},
     "title": "On chain maps inducing isomorphisms in homology",
     "release_type": "article-journal",
     "release_stage": "published",
diff --git a/python/tests/files/datacite/datacite_result_11.json b/python/tests/files/datacite/datacite_result_11.json
index 037c5ac2..3045701f 100644
--- a/python/tests/files/datacite/datacite_result_11.json
+++ b/python/tests/files/datacite/datacite_result_11.json
@@ -1,5 +1,5 @@
 {
-    "extra": {},
+    "extra": {"datacite": {}},
     "title": "N1 bei Safenwil",
     "release_type": "graphic",
     "release_stage": "published",
@@ -18,4 +18,4 @@
     ],
     "refs": [],
     "abstracts": []
-}
\ No newline at end of file
+}
diff --git a/python/tests/files/datacite/datacite_result_12.json b/python/tests/files/datacite/datacite_result_12.json
index 192062e3..5dbcd8d0 100644
--- a/python/tests/files/datacite/datacite_result_12.json
+++ b/python/tests/files/datacite/datacite_result_12.json
@@ -1,5 +1,5 @@
 {
-    "extra": {},
+    "extra": {"datacite": {}},
     "title": "Anthropometric and Physiological Profile of Mixed Martial Art Athletes: A Brief Review",
     "release_type": "article-journal",
     "release_stage": "published",
@@ -41,4 +41,4 @@
     ],
     "refs": [],
     "abstracts": []
-}
\ No newline at end of file
+}
diff --git a/python/tests/files/datacite/datacite_result_13.json b/python/tests/files/datacite/datacite_result_13.json
index c8971667..2509f27e 100644
--- a/python/tests/files/datacite/datacite_result_13.json
+++ b/python/tests/files/datacite/datacite_result_13.json
@@ -1,5 +1,5 @@
 {
-    "extra": {},
+    "extra": {"datacite": {}},
     "title": "[M\u00fcssen wir des Gl\u00fccks uns sch\u00e4men?]",
     "release_type": "article-journal",
     "release_stage": "published",
@@ -25,4 +25,4 @@
     ],
     "refs": [],
     "abstracts": []
-}
\ No newline at end of file
+}
diff --git a/python/tests/files/datacite/datacite_result_15.json b/python/tests/files/datacite/datacite_result_15.json
index bdeb8426..1b430a7d 100644
--- a/python/tests/files/datacite/datacite_result_15.json
+++ b/python/tests/files/datacite/datacite_result_15.json
@@ -1,5 +1,5 @@
 {
-    "extra": {},
+    "extra": {"datacite": {}},
     "title": "Parramore Island of the Virginia Coast Reserve Permanent Plot Resurvey: Tree data 1997",
     "release_type": "dataset",
     "release_stage": "published",
@@ -19,4 +19,4 @@
     ],
     "refs": [],
     "abstracts": []
-}
\ No newline at end of file
+}
diff --git a/python/tests/files/datacite/datacite_result_17.json b/python/tests/files/datacite/datacite_result_17.json
index 0852a09e..73b082d9 100644
--- a/python/tests/files/datacite/datacite_result_17.json
+++ b/python/tests/files/datacite/datacite_result_17.json
@@ -1,5 +1,5 @@
 {
-    "extra": {},
+    "extra": {"datacite": {}},
     "title": "gel_BSA-FITC_Markov_segmntation0343.tif",
     "release_type": "dataset",
     "release_stage": "published",
@@ -17,4 +17,4 @@
     ],
     "refs": [],
     "abstracts": []
-}
\ No newline at end of file
+}
diff --git a/python/tests/files/datacite/datacite_result_18.json b/python/tests/files/datacite/datacite_result_18.json
index 274858c3..d0b53222 100644
--- a/python/tests/files/datacite/datacite_result_18.json
+++ b/python/tests/files/datacite/datacite_result_18.json
@@ -1,5 +1,5 @@
 {
-    "extra": {},
+    "extra": {"datacite": {}},
     "title": "Eastern questionnaire, answer sheet for Interviewee 53215, page 064",
     "release_type": "article",
     "release_stage": "published",
@@ -12,4 +12,4 @@
     "contribs": [],
     "refs": [],
     "abstracts": []
-}
\ No newline at end of file
+}
diff --git a/python/tests/files/datacite/datacite_result_19.json b/python/tests/files/datacite/datacite_result_19.json
index 8d797268..55b43684 100644
--- a/python/tests/files/datacite/datacite_result_19.json
+++ b/python/tests/files/datacite/datacite_result_19.json
@@ -1,5 +1,5 @@
 {
-    "extra": {},
+    "extra": {"datacite": {}},
     "title": "Eastern questionnaire, answer sheet for Interviewee 55236, page 092",
     "release_type": "article",
     "release_stage": "published",
@@ -12,4 +12,4 @@
     "contribs": [],
     "refs": [],
     "abstracts": []
-}
\ No newline at end of file
+}
diff --git a/python/tests/files/datacite/datacite_result_20.json b/python/tests/files/datacite/datacite_result_20.json
index 97d7ae75..48063d9d 100644
--- a/python/tests/files/datacite/datacite_result_20.json
+++ b/python/tests/files/datacite/datacite_result_20.json
@@ -1,5 +1,5 @@
 {
-    "extra": {},
+    "extra": {"datacite": {}},
     "title": "<h1>Eastern questionnaire</h1>",
     "release_type": "article",
     "release_stage": "published",
@@ -11,4 +11,4 @@
     "contribs": [],
     "refs": [],
     "abstracts": []
-}
\ No newline at end of file
+}
diff --git a/python/tests/files/datacite/datacite_result_21.json b/python/tests/files/datacite/datacite_result_21.json
index 0a05a7cd..99dcad1b 100644
--- a/python/tests/files/datacite/datacite_result_21.json
+++ b/python/tests/files/datacite/datacite_result_21.json
@@ -1,5 +1,5 @@
 {
-    "extra": {},
+    "extra": {"datacite": {}},
     "title": "ABC",
     "release_type": "article",
     "release_stage": "published",
@@ -12,4 +12,4 @@
     "contribs": [],
     "refs": [],
     "abstracts": []
-}
\ No newline at end of file
+}
diff --git a/python/tests/files/datacite/datacite_result_22.json b/python/tests/files/datacite/datacite_result_22.json
index 9e4225b5..30d75a3d 100644
--- a/python/tests/files/datacite/datacite_result_22.json
+++ b/python/tests/files/datacite/datacite_result_22.json
@@ -1,5 +1,5 @@
 {
-    "extra": {},
+    "extra": {"datacite": {}},
     "title": "ABC",
     "release_type": "article",
     "release_stage": "published",
@@ -19,4 +19,4 @@
     ],
     "refs": [],
     "abstracts": []
-}
\ No newline at end of file
+}
diff --git a/python/tests/files/datacite/datacite_result_23.json b/python/tests/files/datacite/datacite_result_23.json
index 46f60492..f79053df 100644
--- a/python/tests/files/datacite/datacite_result_23.json
+++ b/python/tests/files/datacite/datacite_result_23.json
@@ -1,5 +1,5 @@
 {
-    "extra": {},
+    "extra": {"datacite": {}},
     "title": "ABC",
     "release_type": "article",
     "release_stage": "published",
@@ -19,4 +19,4 @@
     ],
     "refs": [],
     "abstracts": []
-}
\ No newline at end of file
+}
diff --git a/python/tests/files/datacite/datacite_result_24.json b/python/tests/files/datacite/datacite_result_24.json
index 42859275..a7fc59ba 100644
--- a/python/tests/files/datacite/datacite_result_24.json
+++ b/python/tests/files/datacite/datacite_result_24.json
@@ -1,5 +1,5 @@
 {
-    "extra": {},
+    "extra": {"datacite": {}},
     "title": "ABC",
     "subtitle": "DEF",
     "release_type": "article",
-- 
cgit v1.2.3


From 3590cf0e06b6c4f1b1c9621a94c9567e398bca04 Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Mon, 6 Jan 2020 21:47:13 +0100
Subject: datacite: clean abstracts, use unknown value tokens

Datacite defines placeholders for unknown values:

* https://support.datacite.org/docs/schema-values-unknown-information-v43

Clean abstracts.
---
 python/fatcat_tools/importers/datacite.py          | 30 +++++++++++++++++++---
 .../tests/files/datacite/datacite_result_05.json   |  2 +-
 .../tests/files/datacite/datacite_result_08.json   |  2 +-
 .../tests/files/datacite/datacite_result_14.json   |  2 +-
 4 files changed, 29 insertions(+), 7 deletions(-)

(limited to 'python/tests')

diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index c3d6138e..f9d1b49a 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -125,6 +125,29 @@ DATACITE_TYPE_MAP = {
     }
 }
 
+# DATACITE_UNKNOWN_MARKERS via https://support.datacite.org/docs/schema-values-unknown-information-v43.
+DATACITE_UNKNOWN_MARKERS = (
+    '(:unac)',  # temporarily inaccessible
+    '(:unal)',  # unallowed, suppressed intentionally
+    '(:unap)',  # not applicable, makes no sense
+    '(:unas)',  # value unassigned (e.g., Untitled)
+    '(:unav)',  # value unavailable, possibly unknown
+    '(:unkn)',  # known to be unknown (e.g., Anonymous, Inconnue)
+    '(:none)',  # never had a value, never will
+    '(:null)',  # explicitly and meaningfully empty
+    '(:tba)',  # to be assigned or announced later
+    '(:etal)',  # too numerous to list (et alia)
+)
+
+# UNKNOWN_MARKERS joins official datacite markers with a generic tokens marking
+# unknown values.
+UNKNOWN_MARKERS = set(DATACITE_UNKNOWN_MARKERS).union(set((
+    'NA',
+    'NN',
+    'n.a.',
+    '[s.n.]',
+)))
+
 # TODO(martin): merge this with other maps, maybe.
 LICENSE_SLUG_MAP = {
     "//creativecommons.org/licenses/by/2.0/": "CC-BY",
@@ -326,7 +349,7 @@ class DataciteImporter(EntityImporter):
                 if raw_affiliation == '':
                     continue
 
-                if name in ('(:Unav)', 'NA', 'NN', '(:Null)'):
+                if name.lower() in UNKNOWN_MARKERS:
                     continue
 
                 # Unpack name, if we have an index form (e.g. 'Razis, Panos A') into 'Panos A razis'.
@@ -345,7 +368,7 @@ class DataciteImporter(EntityImporter):
                     ))
             elif nameType == 'Organizational':
                 name = c.get('name', '') or ''
-                if name == 'NN':
+                if name in UNKNOWN_MARKERS:
                     continue
                 if len(name) < 3:
                     continue
@@ -394,8 +417,7 @@ class DataciteImporter(EntityImporter):
         # Publisher. A few NA values. A few bogus values.
         publisher = attributes.get('publisher')
 
-        if publisher in ('(:unav)', 'Unknown', 'n.a.', '[s.n.]', '(:unap)',
-                         '(:none)', 'Unpublished'):
+        if publisher in UNKNOWN_MARKERS | set(('Unpublished', 'Unknown')):
             publisher = None
             release_stage = None
         if publisher is not None and len(publisher) > 80:
diff --git a/python/tests/files/datacite/datacite_result_05.json b/python/tests/files/datacite/datacite_result_05.json
index ff998c0f..1840884e 100644
--- a/python/tests/files/datacite/datacite_result_05.json
+++ b/python/tests/files/datacite/datacite_result_05.json
@@ -523,7 +523,7 @@
     "refs": [],
     "abstracts": [
         {
-            "content": "UNITE provides a unified way for delimiting, identifying, communicating, and working with DNA-based Species Hypotheses (SH). All fungal ITS sequences in the international nucleotide sequence databases are clustered to approximately the species level by applying a set of dynamic distance values (&lt;0.5 - 3.0%). All species hypotheses are given a unique, stable name in the form of a DOI, and their taxonomic and ecological annotations are verified through distributed, web-based third-party annotation efforts. SHs are connected to a taxon name and its classification as far as possible (phylum, class, order, etc.) by taking into account identifications for all sequences in the SH. An automatically or manually designated sequence is chosen to represent each such SH. These sequences are released (https://unite.ut.ee/repository.php) for use by the scientific community in, for example, local sequence similarity searches and next-generation sequencing analysis pipelines. The system and the data are updated automatically as the number of public fungal ITS sequences grows.",
+            "content": "UNITE provides a unified way for delimiting, identifying, communicating, and working with DNA-based Species Hypotheses (SH). All fungal ITS sequences in the international nucleotide sequence databases are clustered to approximately the species level by applying a set of dynamic distance values (<0.5 - 3.0%). All species hypotheses are given a unique, stable name in the form of a DOI, and their taxonomic and ecological annotations are verified through distributed, web-based third-party annotation efforts. SHs are connected to a taxon name and its classification as far as possible (phylum, class, order, etc.) by taking into account identifications for all sequences in the SH. An automatically or manually designated sequence is chosen to represent each such SH. These sequences are released (https://unite.ut.ee/repository.php) for use by the scientific community in, for example, local sequence similarity searches and next-generation sequencing analysis pipelines. The system and the data are updated automatically as the number of public fungal ITS sequences grows.",
             "mimetype": "text/plain",
             "lang": "en"
         }
diff --git a/python/tests/files/datacite/datacite_result_08.json b/python/tests/files/datacite/datacite_result_08.json
index cc0e968b..46ef5b44 100644
--- a/python/tests/files/datacite/datacite_result_08.json
+++ b/python/tests/files/datacite/datacite_result_08.json
@@ -46,7 +46,7 @@
     "refs": [],
     "abstracts": [
         {
-            "content": "International society recognizes that the scarcity of fresh water is increasing and farming sectors suffer from lack of irrigation water. However, if we look at this issue with a framework of relative factor endowment, a different view will arise. In emerging states with rapid industrialization and labor migration, labor scarcity increases at a faster pace than that of irrigation water. Using the historical review of Japan\u2019s irrigation policies as well as the case studies of India and China, this paper shows that the introduction of policies which do not reflect the actual relative resource scarcity may mislead the development path. We argue that under increasing relative labor scarcity it is important to realize the substitution of capital for labor for surface irrigation system management and that the substitution needs public support because the service of surface irrigation system has some externalities. Through this argument, this paper also intends to shed the light back to the role of the state for local resource management which seems to be unfairly undervalued since the boom of community participatory approach in the 1980s.",
+            "content": "International society recognizes that the scarcity of fresh water is increasing and farming sectors suffer from lack of irrigation water. However, if we look at this issue with a framework of relative factor endowment, a different view will arise. In emerging states with rapid industrialization and labor migration, labor scarcity increases at a faster pace than that of irrigation water. Using the historical review of Japan's irrigation policies as well as the case studies of India and China, this paper shows that the introduction of policies which do not reflect the actual relative resource scarcity may mislead the development path. We argue that under increasing relative labor scarcity it is important to realize the substitution of capital for labor for surface irrigation system management and that the substitution needs public support because the service of surface irrigation system has some externalities. Through this argument, this paper also intends to shed the light back to the role of the state for local resource management which seems to be unfairly undervalued since the boom of community participatory approach in the 1980s.",
             "mimetype": "text/plain",
             "lang": "en"
         }
diff --git a/python/tests/files/datacite/datacite_result_14.json b/python/tests/files/datacite/datacite_result_14.json
index 4521f891..c3719aeb 100644
--- a/python/tests/files/datacite/datacite_result_14.json
+++ b/python/tests/files/datacite/datacite_result_14.json
@@ -103,7 +103,7 @@
     "refs": [],
     "abstracts": [
         {
-            "content": "An entry from the Cambridge Structural Database, the world\u2019s repository for small molecule crystal structures. The entry contains experimental data from a crystal diffraction study. The deposited dataset for this entry is freely available from the CCDC and typically includes 3D coordinates, cell parameters, space group, experimental conditions and quality measures.",
+            "content": "An entry from the Cambridge Structural Database, the world's repository for small molecule crystal structures. The entry contains experimental data from a crystal diffraction study. The deposited dataset for this entry is freely available from the CCDC and typically includes 3D coordinates, cell parameters, space group, experimental conditions and quality measures.",
             "mimetype": "text/plain",
             "lang": "en"
         }
-- 
cgit v1.2.3


From 171c4ae9f48984438e59bf521b3ec9dd78ce6d3d Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Mon, 6 Jan 2020 22:25:26 +0100
Subject: datacite: indicate mismatched file in test

---
 python/tests/import_datacite.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'python/tests')

diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index 881452ed..9ee479e8 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -297,7 +297,7 @@ def test_datacite_conversions(datacite_importer):
         with open(dst, 'r') as f:
            expected = json.loads(f.read())
 
-        assert result == expected
+        assert result == expected, 'output mismatch in {}'.format(dst)
 
 def test_index_form_to_display_name():
     Case = collections.namedtuple('Case', 'input output')
-- 
cgit v1.2.3


From ff37b97e4bbf642efbd830111fe3dbd45ae56dad Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Mon, 6 Jan 2020 22:25:53 +0100
Subject: datacite: include month in extra

> include release_month as a top-level extra field [...] to
auto-populate the schema field from that
---
 python/fatcat_tools/importers/datacite.py           | 2 ++
 python/tests/files/datacite/datacite_result_00.json | 3 ++-
 python/tests/files/datacite/datacite_result_05.json | 3 ++-
 python/tests/files/datacite/datacite_result_12.json | 2 +-
 python/tests/files/datacite/datacite_result_13.json | 2 +-
 python/tests/files/datacite/datacite_result_18.json | 2 +-
 python/tests/files/datacite/datacite_result_19.json | 2 +-
 python/tests/files/datacite/datacite_result_20.json | 2 +-
 python/tests/files/datacite/datacite_result_21.json | 2 +-
 python/tests/files/datacite/datacite_result_22.json | 2 +-
 python/tests/files/datacite/datacite_result_23.json | 2 +-
 python/tests/files/datacite/datacite_result_24.json | 2 +-
 12 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'python/tests')

diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index f9d1b49a..a673f00b 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -603,6 +603,8 @@ class DataciteImporter(EntityImporter):
             extra_datacite['license'] = license_extra
         if attributes.get('subjects'):
             extra_datacite['subjects'] = attributes['subjects']
+        if release_month:
+            extra_datacite['month'] = release_month
 
         # Include certain relations from relatedIdentifiers. Keeping the
         # original structure of data here, which is a list of dicts, with
diff --git a/python/tests/files/datacite/datacite_result_00.json b/python/tests/files/datacite/datacite_result_00.json
index a4b28076..ad917b92 100644
--- a/python/tests/files/datacite/datacite_result_00.json
+++ b/python/tests/files/datacite/datacite_result_00.json
@@ -2,6 +2,7 @@
     "extra": {
         "container_name": "Journal of Chemical Crystallography",
         "datacite": {
+            "month": 5,
             "license": [
                 {
                     "rightsUri": "http://www.springer.com/tdm"
@@ -84,4 +85,4 @@
         }
     ],
     "abstracts": []
-}
\ No newline at end of file
+}
diff --git a/python/tests/files/datacite/datacite_result_05.json b/python/tests/files/datacite/datacite_result_05.json
index 1840884e..cea2a25c 100644
--- a/python/tests/files/datacite/datacite_result_05.json
+++ b/python/tests/files/datacite/datacite_result_05.json
@@ -6,7 +6,8 @@
                     "rights": "Attribution-NonCommercial (CC BY-NC)",
                     "rightsUri": "http://creativecommons.org/licenses/by-nc/4.0"
                 }
-            ]
+            ],
+            "month": 10
         }
     },
     "title": "SH409843.07FU",
diff --git a/python/tests/files/datacite/datacite_result_12.json b/python/tests/files/datacite/datacite_result_12.json
index 5dbcd8d0..646299cf 100644
--- a/python/tests/files/datacite/datacite_result_12.json
+++ b/python/tests/files/datacite/datacite_result_12.json
@@ -1,5 +1,5 @@
 {
-    "extra": {"datacite": {}},
+    "extra": {"datacite": {"month": 6}},
     "title": "Anthropometric and Physiological Profile of Mixed Martial Art Athletes: A Brief Review",
     "release_type": "article-journal",
     "release_stage": "published",
diff --git a/python/tests/files/datacite/datacite_result_13.json b/python/tests/files/datacite/datacite_result_13.json
index 2509f27e..fea722c7 100644
--- a/python/tests/files/datacite/datacite_result_13.json
+++ b/python/tests/files/datacite/datacite_result_13.json
@@ -1,5 +1,5 @@
 {
-    "extra": {"datacite": {}},
+    "extra": {"datacite": {"month": 10}},
     "title": "[M\u00fcssen wir des Gl\u00fccks uns sch\u00e4men?]",
     "release_type": "article-journal",
     "release_stage": "published",
diff --git a/python/tests/files/datacite/datacite_result_18.json b/python/tests/files/datacite/datacite_result_18.json
index d0b53222..6599fe08 100644
--- a/python/tests/files/datacite/datacite_result_18.json
+++ b/python/tests/files/datacite/datacite_result_18.json
@@ -1,5 +1,5 @@
 {
-    "extra": {"datacite": {}},
+    "extra": {"datacite": {"month": 8}},
     "title": "Eastern questionnaire, answer sheet for Interviewee 53215, page 064",
     "release_type": "article",
     "release_stage": "published",
diff --git a/python/tests/files/datacite/datacite_result_19.json b/python/tests/files/datacite/datacite_result_19.json
index 55b43684..5598ccee 100644
--- a/python/tests/files/datacite/datacite_result_19.json
+++ b/python/tests/files/datacite/datacite_result_19.json
@@ -1,5 +1,5 @@
 {
-    "extra": {"datacite": {}},
+    "extra": {"datacite": {"month": 8}},
     "title": "Eastern questionnaire, answer sheet for Interviewee 55236, page 092",
     "release_type": "article",
     "release_stage": "published",
diff --git a/python/tests/files/datacite/datacite_result_20.json b/python/tests/files/datacite/datacite_result_20.json
index 48063d9d..ec2dfc38 100644
--- a/python/tests/files/datacite/datacite_result_20.json
+++ b/python/tests/files/datacite/datacite_result_20.json
@@ -1,5 +1,5 @@
 {
-    "extra": {"datacite": {}},
+    "extra": {"datacite": {"month": 8}},
     "title": "<h1>Eastern questionnaire</h1>",
     "release_type": "article",
     "release_stage": "published",
diff --git a/python/tests/files/datacite/datacite_result_21.json b/python/tests/files/datacite/datacite_result_21.json
index 99dcad1b..b5e2207a 100644
--- a/python/tests/files/datacite/datacite_result_21.json
+++ b/python/tests/files/datacite/datacite_result_21.json
@@ -1,5 +1,5 @@
 {
-    "extra": {"datacite": {}},
+    "extra": {"datacite": {"month": 8}},
     "title": "ABC",
     "release_type": "article",
     "release_stage": "published",
diff --git a/python/tests/files/datacite/datacite_result_22.json b/python/tests/files/datacite/datacite_result_22.json
index 30d75a3d..bd1290c2 100644
--- a/python/tests/files/datacite/datacite_result_22.json
+++ b/python/tests/files/datacite/datacite_result_22.json
@@ -1,5 +1,5 @@
 {
-    "extra": {"datacite": {}},
+    "extra": {"datacite": {"month": 8}},
     "title": "ABC",
     "release_type": "article",
     "release_stage": "published",
diff --git a/python/tests/files/datacite/datacite_result_23.json b/python/tests/files/datacite/datacite_result_23.json
index f79053df..599d1b37 100644
--- a/python/tests/files/datacite/datacite_result_23.json
+++ b/python/tests/files/datacite/datacite_result_23.json
@@ -1,5 +1,5 @@
 {
-    "extra": {"datacite": {}},
+    "extra": {"datacite": {"month": 8}},
     "title": "ABC",
     "release_type": "article",
     "release_stage": "published",
diff --git a/python/tests/files/datacite/datacite_result_24.json b/python/tests/files/datacite/datacite_result_24.json
index a7fc59ba..a3649867 100644
--- a/python/tests/files/datacite/datacite_result_24.json
+++ b/python/tests/files/datacite/datacite_result_24.json
@@ -1,5 +1,5 @@
 {
-    "extra": {"datacite": {}},
+    "extra": {"datacite": {"month": 8}},
     "title": "ABC",
     "subtitle": "DEF",
     "release_type": "article",
-- 
cgit v1.2.3


From d38dda53dd29024c8c855c64dfbb1529d0aaac83 Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Mon, 6 Jan 2020 22:30:20 +0100
Subject: datacite: month field should be top-level

---
 python/fatcat_tools/importers/datacite.py           | 4 ++--
 python/tests/files/datacite/datacite_result_00.json | 4 ++--
 python/tests/files/datacite/datacite_result_05.json | 6 +++---
 python/tests/files/datacite/datacite_result_12.json | 2 +-
 python/tests/files/datacite/datacite_result_13.json | 2 +-
 python/tests/files/datacite/datacite_result_18.json | 2 +-
 python/tests/files/datacite/datacite_result_19.json | 2 +-
 python/tests/files/datacite/datacite_result_20.json | 2 +-
 python/tests/files/datacite/datacite_result_21.json | 2 +-
 python/tests/files/datacite/datacite_result_22.json | 2 +-
 python/tests/files/datacite/datacite_result_23.json | 2 +-
 python/tests/files/datacite/datacite_result_24.json | 2 +-
 12 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'python/tests')

diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index a673f00b..1cee6db3 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -603,8 +603,6 @@ class DataciteImporter(EntityImporter):
             extra_datacite['license'] = license_extra
         if attributes.get('subjects'):
             extra_datacite['subjects'] = attributes['subjects']
-        if release_month:
-            extra_datacite['month'] = release_month
 
         # Include certain relations from relatedIdentifiers. Keeping the
         # original structure of data here, which is a list of dicts, with
@@ -630,6 +628,8 @@ class DataciteImporter(EntityImporter):
 
         # Always include datacite key, even if value is empty (dict).
         extra['datacite'] = extra_datacite
+        if release_month:
+            extra['month'] = release_month
 
         extids = self.lookup_ext_ids(doi=doi)
 
diff --git a/python/tests/files/datacite/datacite_result_00.json b/python/tests/files/datacite/datacite_result_00.json
index ad917b92..e76aa391 100644
--- a/python/tests/files/datacite/datacite_result_00.json
+++ b/python/tests/files/datacite/datacite_result_00.json
@@ -2,7 +2,6 @@
     "extra": {
         "container_name": "Journal of Chemical Crystallography",
         "datacite": {
-            "month": 5,
             "license": [
                 {
                     "rightsUri": "http://www.springer.com/tdm"
@@ -16,7 +15,8 @@
                     "relatedIdentifierType": "ISSN"
                 }
             ]
-        }
+        },
+        "month": 5
     },
     "title": "Synthesis and Crystal Structure of a Compound with Two Conformational Isomers: N-(2-methylbenzoyl)-N\u2032-(4-nitrophenyl)thiourea",
     "release_type": "article-journal",
diff --git a/python/tests/files/datacite/datacite_result_05.json b/python/tests/files/datacite/datacite_result_05.json
index cea2a25c..1352fe29 100644
--- a/python/tests/files/datacite/datacite_result_05.json
+++ b/python/tests/files/datacite/datacite_result_05.json
@@ -6,9 +6,9 @@
                     "rights": "Attribution-NonCommercial (CC BY-NC)",
                     "rightsUri": "http://creativecommons.org/licenses/by-nc/4.0"
                 }
-            ],
-            "month": 10
-        }
+            ]
+        },
+        "month": 10
     },
     "title": "SH409843.07FU",
     "subtitle": "Gomphales",
diff --git a/python/tests/files/datacite/datacite_result_12.json b/python/tests/files/datacite/datacite_result_12.json
index 646299cf..c3a9071c 100644
--- a/python/tests/files/datacite/datacite_result_12.json
+++ b/python/tests/files/datacite/datacite_result_12.json
@@ -1,5 +1,5 @@
 {
-    "extra": {"datacite": {"month": 6}},
+    "extra": {"datacite": {}, "month": 6},
     "title": "Anthropometric and Physiological Profile of Mixed Martial Art Athletes: A Brief Review",
     "release_type": "article-journal",
     "release_stage": "published",
diff --git a/python/tests/files/datacite/datacite_result_13.json b/python/tests/files/datacite/datacite_result_13.json
index fea722c7..d6ed2985 100644
--- a/python/tests/files/datacite/datacite_result_13.json
+++ b/python/tests/files/datacite/datacite_result_13.json
@@ -1,5 +1,5 @@
 {
-    "extra": {"datacite": {"month": 10}},
+    "extra": {"datacite": {}, "month": 10},
     "title": "[M\u00fcssen wir des Gl\u00fccks uns sch\u00e4men?]",
     "release_type": "article-journal",
     "release_stage": "published",
diff --git a/python/tests/files/datacite/datacite_result_18.json b/python/tests/files/datacite/datacite_result_18.json
index 6599fe08..fb109de2 100644
--- a/python/tests/files/datacite/datacite_result_18.json
+++ b/python/tests/files/datacite/datacite_result_18.json
@@ -1,5 +1,5 @@
 {
-    "extra": {"datacite": {"month": 8}},
+    "extra": {"datacite": {}, "month": 8},
     "title": "Eastern questionnaire, answer sheet for Interviewee 53215, page 064",
     "release_type": "article",
     "release_stage": "published",
diff --git a/python/tests/files/datacite/datacite_result_19.json b/python/tests/files/datacite/datacite_result_19.json
index 5598ccee..85bada92 100644
--- a/python/tests/files/datacite/datacite_result_19.json
+++ b/python/tests/files/datacite/datacite_result_19.json
@@ -1,5 +1,5 @@
 {
-    "extra": {"datacite": {"month": 8}},
+    "extra": {"datacite": {}, "month": 8},
     "title": "Eastern questionnaire, answer sheet for Interviewee 55236, page 092",
     "release_type": "article",
     "release_stage": "published",
diff --git a/python/tests/files/datacite/datacite_result_20.json b/python/tests/files/datacite/datacite_result_20.json
index ec2dfc38..891cb41e 100644
--- a/python/tests/files/datacite/datacite_result_20.json
+++ b/python/tests/files/datacite/datacite_result_20.json
@@ -1,5 +1,5 @@
 {
-    "extra": {"datacite": {"month": 8}},
+    "extra": {"datacite": {}, "month": 8},
     "title": "<h1>Eastern questionnaire</h1>",
     "release_type": "article",
     "release_stage": "published",
diff --git a/python/tests/files/datacite/datacite_result_21.json b/python/tests/files/datacite/datacite_result_21.json
index b5e2207a..73df8216 100644
--- a/python/tests/files/datacite/datacite_result_21.json
+++ b/python/tests/files/datacite/datacite_result_21.json
@@ -1,5 +1,5 @@
 {
-    "extra": {"datacite": {"month": 8}},
+    "extra": {"datacite": {}, "month": 8},
     "title": "ABC",
     "release_type": "article",
     "release_stage": "published",
diff --git a/python/tests/files/datacite/datacite_result_22.json b/python/tests/files/datacite/datacite_result_22.json
index bd1290c2..97f35da5 100644
--- a/python/tests/files/datacite/datacite_result_22.json
+++ b/python/tests/files/datacite/datacite_result_22.json
@@ -1,5 +1,5 @@
 {
-    "extra": {"datacite": {"month": 8}},
+    "extra": {"datacite": {}, "month": 8},
     "title": "ABC",
     "release_type": "article",
     "release_stage": "published",
diff --git a/python/tests/files/datacite/datacite_result_23.json b/python/tests/files/datacite/datacite_result_23.json
index 599d1b37..93385c70 100644
--- a/python/tests/files/datacite/datacite_result_23.json
+++ b/python/tests/files/datacite/datacite_result_23.json
@@ -1,5 +1,5 @@
 {
-    "extra": {"datacite": {"month": 8}},
+    "extra": {"datacite": {}, "month": 8},
     "title": "ABC",
     "release_type": "article",
     "release_stage": "published",
diff --git a/python/tests/files/datacite/datacite_result_24.json b/python/tests/files/datacite/datacite_result_24.json
index a3649867..cb08e67b 100644
--- a/python/tests/files/datacite/datacite_result_24.json
+++ b/python/tests/files/datacite/datacite_result_24.json
@@ -1,5 +1,5 @@
 {
-    "extra": {"datacite": {"month": 8}},
+    "extra": {"datacite": {}, "month": 8},
     "title": "ABC",
     "subtitle": "DEF",
     "release_type": "article",
-- 
cgit v1.2.3


From f9c711f77bba992e6e9e1d75929d35e8da828f61 Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Tue, 7 Jan 2020 15:20:25 +0100
Subject: datacite: adding datacite-specific extra metadata

* attributes.metadataVersion
* attributes.schemaVersion
* attributes.version (source dependent values, follows suggestions in
https://schema.datacite.org/meta/kernel-4.3/doc/DataCite-MetadataKernel_v4.3.pdf#page=26,
but values vary)

Furthermore:

* attributes.types.resourceTypeGeneral
* attributes.types.resourceType
---
 python/fatcat_tools/importers/datacite.py          |   28 +
 python/tests/files/datacite/datacite_doc_20.json   |   77 +-
 python/tests/files/datacite/datacite_doc_21.json   |   77 +-
 python/tests/files/datacite/datacite_doc_22.json   |   81 +-
 python/tests/files/datacite/datacite_doc_23.json   |   81 +-
 python/tests/files/datacite/datacite_doc_24.json   |   89 +-
 .../tests/files/datacite/datacite_result_00.json   |  168 ++--
 .../tests/files/datacite/datacite_result_01.json   |   62 +-
 .../tests/files/datacite/datacite_result_02.json   |   70 +-
 .../tests/files/datacite/datacite_result_03.json   |   38 +-
 .../tests/files/datacite/datacite_result_04.json   |   61 +-
 .../tests/files/datacite/datacite_result_05.json   | 1060 ++++++++++----------
 .../tests/files/datacite/datacite_result_06.json   |   49 +-
 .../tests/files/datacite/datacite_result_07.json   |  128 +--
 .../tests/files/datacite/datacite_result_08.json   |   97 +-
 .../tests/files/datacite/datacite_result_09.json   |   69 +-
 .../tests/files/datacite/datacite_result_10.json   |   61 +-
 .../tests/files/datacite/datacite_result_11.json   |   44 +-
 .../tests/files/datacite/datacite_result_12.json   |   87 +-
 .../tests/files/datacite/datacite_result_13.json   |   58 +-
 .../tests/files/datacite/datacite_result_14.json   |  189 ++--
 .../tests/files/datacite/datacite_result_15.json   |   47 +-
 .../tests/files/datacite/datacite_result_16.json   |   59 +-
 .../tests/files/datacite/datacite_result_17.json   |   41 +-
 .../tests/files/datacite/datacite_result_18.json   |   30 +-
 .../tests/files/datacite/datacite_result_19.json   |   30 +-
 .../tests/files/datacite/datacite_result_20.json   |   27 +-
 .../tests/files/datacite/datacite_result_21.json   |   29 +-
 .../tests/files/datacite/datacite_result_22.json   |   43 +-
 .../tests/files/datacite/datacite_result_23.json   |   43 +-
 .../tests/files/datacite/datacite_result_24.json   |   43 +-
 31 files changed, 1598 insertions(+), 1468 deletions(-)

(limited to 'python/tests')

diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index d7fbd269..c2725aeb 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -607,6 +607,25 @@ class DataciteImporter(EntityImporter):
         if attributes.get('subjects'):
             extra_datacite['subjects'] = attributes['subjects']
 
+        # Include version information.
+        metadata_version = attributes.get('metadataVersion') or ''
+        schema_version = attributes.get('schemaVersion') or ''
+
+        if metadata_version:
+            extra_datacite['metadataVersion'] = metadata_version
+        if schema_version:
+            extra_datacite['schemaVersion'] = schema_version
+
+        # Include resource types.
+        types = attributes.get('types', {}) or {}
+        resource_type = types.get('resourceType', '') or ''
+        resource_type_general = types.get('resourceTypeGeneral', '') or ''
+
+        if resource_type:
+            extra_datacite['resourceType'] = resource_type
+        if resource_type_general:
+            extra_datacite['resourceTypeGeneral'] = resource_type_general
+
         # Include certain relations from relatedIdentifiers. Keeping the
         # original structure of data here, which is a list of dicts, with
         # relation type, identifer and identifier type (mostly).
@@ -625,6 +644,14 @@ class DataciteImporter(EntityImporter):
 
         extra = dict()
 
+        # "1.0.0", "v1.305.2019", "Final", "v1.0.0", "v0.3.0", "1", "0.19.0",
+        # "3.1", "v1.1", "{version}", "4.0", "10329", "11672", "11555",
+        # "v1.4.5", "2", "V1", "v3.0", "v0", "v0.6", "11124", "v1.0-beta", "1st
+        # Edition", "20191024", "v2.0.0", "v0.9.3", "10149", "2.0", null,
+        # "v0.1.1", "3.0", "1.0", "3", "v1.12.2", "20191018", "v0.3.1", "v1.0",
+        # "10161", "10010691", "10780", # "Presentación"
+        version = attributes.get('version')
+
         # top-level extra keys
         if not container_id and container_name:
             extra['container_name'] = container_name
@@ -666,6 +693,7 @@ class DataciteImporter(EntityImporter):
             refs=refs,
             extra=extra,
             license_slug=license_slug,
+            version=version,
         )
         return re
 
diff --git a/python/tests/files/datacite/datacite_doc_20.json b/python/tests/files/datacite/datacite_doc_20.json
index 964e2cbb..cc6cc1fb 100644
--- a/python/tests/files/datacite/datacite_doc_20.json
+++ b/python/tests/files/datacite/datacite_doc_20.json
@@ -1,42 +1,41 @@
 {
-    "attributes": {
-      "doi": "10.7916/d86x0cg1",
-      "creators": [
-        {
-          "name": "(:Unav)",
-          "affiliation": [],
-          "nameIdentifiers": []
-        }
-      ],
-      "titles": [
-        {
-          "title": "<h1>Eastern questionnaire</h1>"
-        }
-      ],
-      "publicationYear": 2017,
-      "dates": [
-        {
-          "date": "2017-08-24",
-          "dateType": "Created"
-        },
-        {
-          "date": "2019-08-04",
-          "dateType": "Updated"
-        },
-        {
-          "date": "2017",
-          "dateType": "Issued"
-        }
-      ],
-      "language": null,
-      "types": {
-        "ris": "GEN",
-        "bibtex": "misc",
-        "citeproc": "article",
-        "schemaOrg": "CreativeWork"
+  "attributes": {
+    "doi": "10.7916/d86x0cg1",
+    "creators": [
+      {
+        "name": "(:Unav)",
+        "affiliation": [],
+        "nameIdentifiers": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "<h1>Eastern questionnaire</h1>"
+      }
+    ],
+    "publicationYear": 2017,
+    "dates": [
+      {
+        "date": "2017-08-24",
+        "dateType": "Created"
       },
-      "isActive": true,
-      "state": "findable"
-    }
+      {
+        "date": "2019-08-04",
+        "dateType": "Updated"
+      },
+      {
+        "date": "2017",
+        "dateType": "Issued"
+      }
+    ],
+    "language": null,
+    "types": {
+      "ris": "GEN",
+      "bibtex": "misc",
+      "citeproc": "article",
+      "schemaOrg": "CreativeWork"
+    },
+    "isActive": true,
+    "state": "findable"
   }
-  
\ No newline at end of file
+}
diff --git a/python/tests/files/datacite/datacite_doc_21.json b/python/tests/files/datacite/datacite_doc_21.json
index cae7f40f..04b196a6 100644
--- a/python/tests/files/datacite/datacite_doc_21.json
+++ b/python/tests/files/datacite/datacite_doc_21.json
@@ -1,42 +1,41 @@
 {
-    "attributes": {
-      "doi": "10.7916/d86x0cg1",
-      "creators": [
-        {
-          "name": "(:Unav)",
-          "affiliation": [],
-          "nameIdentifiers": []
-        }
-      ],
-      "titles": [
-        {
-          "title": "ABC"
-        }
-      ],
-      "publicationYear": 2017,
-      "language": "GERMAN",
-      "types": {
-        "ris": "GEN",
-        "bibtex": "misc",
-        "citeproc": "article",
-        "schemaOrg": "CreativeWork"
+  "attributes": {
+    "doi": "10.7916/d86x0cg1",
+    "creators": [
+      {
+        "name": "(:Unav)",
+        "affiliation": [],
+        "nameIdentifiers": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "ABC"
+      }
+    ],
+    "publicationYear": 2017,
+    "language": "GERMAN",
+    "types": {
+      "ris": "GEN",
+      "bibtex": "misc",
+      "citeproc": "article",
+      "schemaOrg": "CreativeWork"
+    },
+    "dates": [
+      {
+        "date": "2017-08-24",
+        "dateType": "Created"
       },
-      "dates": [
-        {
-          "date": "2017-08-24",
-          "dateType": "Created"
-        },
-        {
-          "date": "2019-08-04",
-          "dateType": "Updated"
-        },
-        {
-          "date": "2017",
-          "dateType": "Issued"
-        }
-      ],
-      "isActive": true,
-      "state": "findable"
-    }
+      {
+        "date": "2019-08-04",
+        "dateType": "Updated"
+      },
+      {
+        "date": "2017",
+        "dateType": "Issued"
+      }
+    ],
+    "isActive": true,
+    "state": "findable"
   }
-  
\ No newline at end of file
+}
diff --git a/python/tests/files/datacite/datacite_doc_22.json b/python/tests/files/datacite/datacite_doc_22.json
index 42448ddf..365b1361 100644
--- a/python/tests/files/datacite/datacite_doc_22.json
+++ b/python/tests/files/datacite/datacite_doc_22.json
@@ -1,44 +1,43 @@
 {
-    "attributes": {
-      "doi": "10.7916/d86x0cg1",
-      "creators": [
-        {
-          "name": "Anton Welch",
-          "affiliation": [
-            "Department of pataphysics"
-          ],
-          "nameIdentifiers": []
-        }
-      ],
-      "titles": [
-        {
-          "title": "ABC"
-        }
-      ],
-      "publicationYear": 2017,
-      "language": "GERMAN",
-      "types": {
-        "ris": "GEN",
-        "bibtex": "misc",
-        "citeproc": "article",
-        "schemaOrg": "CreativeWork"
+  "attributes": {
+    "doi": "10.7916/d86x0cg1",
+    "creators": [
+      {
+        "name": "Anton Welch",
+        "affiliation": [
+          "Department of pataphysics"
+        ],
+        "nameIdentifiers": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "ABC"
+      }
+    ],
+    "publicationYear": 2017,
+    "language": "GERMAN",
+    "types": {
+      "ris": "GEN",
+      "bibtex": "misc",
+      "citeproc": "article",
+      "schemaOrg": "CreativeWork"
+    },
+    "dates": [
+      {
+        "date": "2017-08-24",
+        "dateType": "Created"
       },
-      "dates": [
-        {
-          "date": "2017-08-24",
-          "dateType": "Created"
-        },
-        {
-          "date": "2019-08-04",
-          "dateType": "Updated"
-        },
-        {
-          "date": "2017",
-          "dateType": "Issued"
-        }
-      ],
-      "isActive": true,
-      "state": "findable"
-    }
+      {
+        "date": "2019-08-04",
+        "dateType": "Updated"
+      },
+      {
+        "date": "2017",
+        "dateType": "Issued"
+      }
+    ],
+    "isActive": true,
+    "state": "findable"
   }
-
+}
diff --git a/python/tests/files/datacite/datacite_doc_23.json b/python/tests/files/datacite/datacite_doc_23.json
index 1e5bcc3f..1dcdfc27 100644
--- a/python/tests/files/datacite/datacite_doc_23.json
+++ b/python/tests/files/datacite/datacite_doc_23.json
@@ -1,44 +1,43 @@
 {
-    "attributes": {
-      "doi": "10.7916/d86x0cg1\u2013xxx",
-      "creators": [
-        {
-          "name": "Anton Welch",
-          "affiliation": [
-            "Department of pataphysics"
-          ],
-          "nameIdentifiers": []
-        }
-      ],
-      "titles": [
-        {
-          "title": "ABC"
-        }
-      ],
-      "publicationYear": 2017,
-      "language": "GERMAN",
-      "types": {
-        "ris": "GEN",
-        "bibtex": "misc",
-        "citeproc": "article",
-        "schemaOrg": "CreativeWork"
+  "attributes": {
+    "doi": "10.7916/d86x0cg1–xxx",
+    "creators": [
+      {
+        "name": "Anton Welch",
+        "affiliation": [
+          "Department of pataphysics"
+        ],
+        "nameIdentifiers": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "ABC"
+      }
+    ],
+    "publicationYear": 2017,
+    "language": "GERMAN",
+    "types": {
+      "ris": "GEN",
+      "bibtex": "misc",
+      "citeproc": "article",
+      "schemaOrg": "CreativeWork"
+    },
+    "dates": [
+      {
+        "date": "2017-08-24",
+        "dateType": "Created"
       },
-      "dates": [
-        {
-          "date": "2017-08-24",
-          "dateType": "Created"
-        },
-        {
-          "date": "2019-08-04",
-          "dateType": "Updated"
-        },
-        {
-          "date": "2017",
-          "dateType": "Issued"
-        }
-      ],
-      "isActive": true,
-      "state": "findable"
-    }
+      {
+        "date": "2019-08-04",
+        "dateType": "Updated"
+      },
+      {
+        "date": "2017",
+        "dateType": "Issued"
+      }
+    ],
+    "isActive": true,
+    "state": "findable"
   }
-
+}
diff --git a/python/tests/files/datacite/datacite_doc_24.json b/python/tests/files/datacite/datacite_doc_24.json
index 6123350b..4ea6945f 100644
--- a/python/tests/files/datacite/datacite_doc_24.json
+++ b/python/tests/files/datacite/datacite_doc_24.json
@@ -1,48 +1,47 @@
 {
-    "attributes": {
-      "doi": "10.7916/d86x0cg1",
-      "creators": [
-        {
-          "name": "Anton Welch",
-          "affiliation": [
-            "Department of pataphysics"
-          ],
-          "nameIdentifiers": []
-        }
-      ],
-      "titles": [
-        {
-          "title": "ABC"
-        },
-        {
-          "title": "DEF",
-          "titleType": "Subtitle"
-        }
-      ],
-      "publicationYear": 2016,
-      "language": "DE-CH",
-      "types": {
-        "ris": "GEN",
-        "bibtex": "misc",
-        "citeproc": "article",
-        "schemaOrg": "CreativeWork"
+  "attributes": {
+    "doi": "10.7916/d86x0cg1",
+    "creators": [
+      {
+        "name": "Anton Welch",
+        "affiliation": [
+          "Department of pataphysics"
+        ],
+        "nameIdentifiers": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "ABC"
       },
-      "dates": [
-        {
-          "date": "2017-08-24",
-          "dateType": "Created"
-        },
-        {
-          "date": "2019-08-04",
-          "dateType": "Updated"
-        },
-        {
-          "date": "2017",
-          "dateType": "Issued"
-        }
-      ],
-      "isActive": true,
-      "state": "findable"
-    }
+      {
+        "title": "DEF",
+        "titleType": "Subtitle"
+      }
+    ],
+    "publicationYear": 2016,
+    "language": "DE-CH",
+    "types": {
+      "ris": "GEN",
+      "bibtex": "misc",
+      "citeproc": "article",
+      "schemaOrg": "CreativeWork"
+    },
+    "dates": [
+      {
+        "date": "2017-08-24",
+        "dateType": "Created"
+      },
+      {
+        "date": "2019-08-04",
+        "dateType": "Updated"
+      },
+      {
+        "date": "2017",
+        "dateType": "Issued"
+      }
+    ],
+    "isActive": true,
+    "state": "findable"
   }
-
+}
diff --git a/python/tests/files/datacite/datacite_result_00.json b/python/tests/files/datacite/datacite_result_00.json
index e76aa391..28da5397 100644
--- a/python/tests/files/datacite/datacite_result_00.json
+++ b/python/tests/files/datacite/datacite_result_00.json
@@ -1,88 +1,92 @@
 {
-    "extra": {
-        "container_name": "Journal of Chemical Crystallography",
-        "datacite": {
-            "license": [
-                {
-                    "rightsUri": "http://www.springer.com/tdm"
-                }
-            ],
-            "relations": [
-                {
-                    "relationType": "IsPartOf",
-                    "relatedIdentifier": "1074-1542",
-                    "resourceTypeGeneral": "Collection",
-                    "relatedIdentifierType": "ISSN"
-                }
-            ]
-        },
-        "month": 5
-    },
-    "title": "Synthesis and Crystal Structure of a Compound with Two Conformational Isomers: N-(2-methylbenzoyl)-N\u2032-(4-nitrophenyl)thiourea",
-    "release_type": "article-journal",
-    "release_stage": "published",
-    "release_date": "2019-05-31",
-    "release_year": 2019,
-    "ext_ids": {
-        "doi": "10.1007/s10870-008-9413-z"
-    },
-    "volume": "38",
-    "issue": "12",
-    "pages": "927-930",
-    "publisher": "Springer Science and Business Media LLC",
-    "contribs": [
+  "extra": {
+    "container_name": "Journal of Chemical Crystallography",
+    "datacite": {
+      "license": [
         {
-            "index": 0,
-            "raw_name": "Qian-Jin Li",
-            "given_name": "Qian-Jin",
-            "surname": "Li",
-            "role": "author"
-        },
-        {
-            "index": 1,
-            "raw_name": "Chun-Long Yang",
-            "given_name": "Chun-Long",
-            "surname": "Yang",
-            "role": "author"
+          "rightsUri": "http://www.springer.com/tdm"
         }
-    ],
-    "refs": [
-        {
-            "index": 0,
-            "extra": {
-                "doi": "10.1016/j.bmcl.2005.09.033"
-            }
-        },
-        {
-            "index": 1,
-            "extra": {
-                "doi": "10.1016/s0022-1139(02)00330-5"
-            }
-        },
-        {
-            "index": 2,
-            "extra": {
-                "doi": "10.1016/s0010-8545(01)00337-x"
-            }
-        },
-        {
-            "index": 3,
-            "extra": {
-                "doi": "10.1016/j.tetlet.2005.06.135"
-            }
-        },
-        {
-            "index": 4,
-            "extra": {
-                "doi": "10.1039/p298700000s1"
-            }
-        },
+      ],
+      "relations": [
         {
-            "index": 5,
-            "extra": {
-                "doi": "10.1002/anie.199515551"
-            }
+          "relationType": "IsPartOf",
+          "relatedIdentifier": "1074-1542",
+          "resourceTypeGeneral": "Collection",
+          "relatedIdentifierType": "ISSN"
         }
-    ],
-    "abstracts": []
+      ],
+      "resourceType": "JournalArticle",
+      "resourceTypeGeneral": "Text",
+      "schemaVersion": "http://datacite.org/schema/kernel-4",
+      "metadataVersion": 1
+    },
+    "month": 5
+  },
+  "title": "Synthesis and Crystal Structure of a Compound with Two Conformational Isomers: N-(2-methylbenzoyl)-N′-(4-nitrophenyl)thiourea",
+  "release_type": "article-journal",
+  "release_stage": "published",
+  "release_date": "2019-05-31",
+  "release_year": 2019,
+  "ext_ids": {
+    "doi": "10.1007/s10870-008-9413-z"
+  },
+  "volume": "38",
+  "issue": "12",
+  "pages": "927-930",
+  "publisher": "Springer Science and Business Media LLC",
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "Qian-Jin Li",
+      "given_name": "Qian-Jin",
+      "surname": "Li",
+      "role": "author"
+    },
+    {
+      "index": 1,
+      "raw_name": "Chun-Long Yang",
+      "given_name": "Chun-Long",
+      "surname": "Yang",
+      "role": "author"
+    }
+  ],
+  "refs": [
+    {
+      "index": 0,
+      "extra": {
+        "doi": "10.1016/j.bmcl.2005.09.033"
+      }
+    },
+    {
+      "index": 1,
+      "extra": {
+        "doi": "10.1016/s0022-1139(02)00330-5"
+      }
+    },
+    {
+      "index": 2,
+      "extra": {
+        "doi": "10.1016/s0010-8545(01)00337-x"
+      }
+    },
+    {
+      "index": 3,
+      "extra": {
+        "doi": "10.1016/j.tetlet.2005.06.135"
+      }
+    },
+    {
+      "index": 4,
+      "extra": {
+        "doi": "10.1039/p298700000s1"
+      }
+    },
+    {
+      "index": 5,
+      "extra": {
+        "doi": "10.1002/anie.199515551"
+      }
+    }
+  ],
+  "abstracts": []
 }
diff --git a/python/tests/files/datacite/datacite_result_01.json b/python/tests/files/datacite/datacite_result_01.json
index 46be2515..956357b8 100644
--- a/python/tests/files/datacite/datacite_result_01.json
+++ b/python/tests/files/datacite/datacite_result_01.json
@@ -1,32 +1,36 @@
 {
-    "extra": {
-        "datacite": {
-            "license": [
-                {
-                    "lang": "de",
-                    "rights": "Standard (Creative Commons - Namensnennung - Weitergabe unter gleichen Bedingungen) - http://www.ub.uni-heidelberg.de/helios/digi/nutzung/Welcome.html"
-                }
-            ]
-        }
-    },
-    "title": "Ferdinand Gaillard, [1]: n\u00e9 \u00e0 Paris le 16 janvier 1834, mort \u00e0 Paris le 19 janvier 1887",
-    "release_type": "article-journal",
-    "release_stage": "published",
-    "release_year": 1887,
-    "ext_ids": {
-        "doi": "10.11588/diglit.25558.39"
-    },
-    "publisher": "University Library Heidelberg",
-    "language": "fr",
-    "contribs": [
+  "extra": {
+    "datacite": {
+      "license": [
         {
-            "index": 0,
-            "raw_name": "G. Dargenty",
-            "given_name": "G.",
-            "surname": "Dargenty",
-            "role": "author"
+          "lang": "de",
+          "rights": "Standard (Creative Commons - Namensnennung - Weitergabe unter gleichen Bedingungen) - http://www.ub.uni-heidelberg.de/helios/digi/nutzung/Welcome.html"
         }
-    ],
-    "refs": [],
-    "abstracts": []
-}
\ No newline at end of file
+      ],
+      "metadataVersion": 4,
+      "resourceType": "DigitalisatDigital copy",
+      "resourceTypeGeneral": "Text",
+      "schemaVersion": "http://datacite.org/schema/kernel-4"
+    }
+  },
+  "title": "Ferdinand Gaillard, [1]: né à Paris le 16 janvier 1834, mort à Paris le 19 janvier 1887",
+  "release_type": "article-journal",
+  "release_stage": "published",
+  "release_year": 1887,
+  "ext_ids": {
+    "doi": "10.11588/diglit.25558.39"
+  },
+  "publisher": "University Library Heidelberg",
+  "language": "fr",
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "G. Dargenty",
+      "given_name": "G.",
+      "surname": "Dargenty",
+      "role": "author"
+    }
+  ],
+  "refs": [],
+  "abstracts": []
+}
diff --git a/python/tests/files/datacite/datacite_result_02.json b/python/tests/files/datacite/datacite_result_02.json
index bdcb4951..322baf59 100644
--- a/python/tests/files/datacite/datacite_result_02.json
+++ b/python/tests/files/datacite/datacite_result_02.json
@@ -1,36 +1,40 @@
 {
-    "extra": {
-        "datacite": {
-            "license": [
-                {
-                    "lang": "de",
-                    "rights": "Creative Commons - Namensnennung - Weitergabe unter gleichen Bedingungen - https://creativecommons.org/licenses/by-sa/3.0/de/"
-                },
-                {
-                    "lang": "en",
-                    "rights": "Creative Commons - Namensnennung - Weitergabe unter gleichen Bedingungen - https://creativecommons.org/licenses/by-sa/3.0/"
-                }
-            ]
-        }
-    },
-    "title": "Solinger Schwertschmiede-Familien, [4]",
-    "release_type": "article-journal",
-    "release_stage": "published",
-    "release_year": 1897,
-    "ext_ids": {
-        "doi": "10.11588/diglit.37715.57"
-    },
-    "publisher": "University Library Heidelberg",
-    "language": "de",
-    "contribs": [
+  "extra": {
+    "datacite": {
+      "license": [
+        {
+          "lang": "de",
+          "rights": "Creative Commons - Namensnennung - Weitergabe unter gleichen Bedingungen - https://creativecommons.org/licenses/by-sa/3.0/de/"
+        },
         {
-            "index": 0,
-            "raw_name": "Albert Weyersberg",
-            "given_name": "Albert",
-            "surname": "Weyersberg",
-            "role": "author"
+          "lang": "en",
+          "rights": "Creative Commons - Namensnennung - Weitergabe unter gleichen Bedingungen - https://creativecommons.org/licenses/by-sa/3.0/"
         }
-    ],
-    "refs": [],
-    "abstracts": []
-}
\ No newline at end of file
+      ],
+      "metadataVersion": 2,
+      "resourceType": "DigitalisatDigital copy",
+      "resourceTypeGeneral": "Text",
+      "schemaVersion": "http://datacite.org/schema/kernel-4"
+    }
+  },
+  "title": "Solinger Schwertschmiede-Familien, [4]",
+  "release_type": "article-journal",
+  "release_stage": "published",
+  "release_year": 1897,
+  "ext_ids": {
+    "doi": "10.11588/diglit.37715.57"
+  },
+  "publisher": "University Library Heidelberg",
+  "language": "de",
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "Albert Weyersberg",
+      "given_name": "Albert",
+      "surname": "Weyersberg",
+      "role": "author"
+    }
+  ],
+  "refs": [],
+  "abstracts": []
+}
diff --git a/python/tests/files/datacite/datacite_result_03.json b/python/tests/files/datacite/datacite_result_03.json
index e8367e8f..41d8d4cd 100644
--- a/python/tests/files/datacite/datacite_result_03.json
+++ b/python/tests/files/datacite/datacite_result_03.json
@@ -1,19 +1,23 @@
 {
-    "extra": {"datacite": {}},
-    "title": "midterm ah30903",
-    "release_type": "article",
-    "release_year": 2016,
-    "ext_ids": {
-        "doi": "10.13140/rg.2.2.30434.53446"
-    },
-    "language": "ms",
-    "contribs": [
-        {
-            "index": 0,
-            "raw_name": "Mastura Yahya",
-            "role": "author"
-        }
-    ],
-    "refs": [],
-    "abstracts": []
+  "extra": {
+    "datacite": {
+      "schemaVersion": "http://datacite.org/schema/kernel-3"
+    }
+  },
+  "title": "midterm ah30903",
+  "release_type": "article",
+  "release_year": 2016,
+  "ext_ids": {
+    "doi": "10.13140/rg.2.2.30434.53446"
+  },
+  "language": "ms",
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "Mastura Yahya",
+      "role": "author"
+    }
+  ],
+  "refs": [],
+  "abstracts": []
 }
diff --git a/python/tests/files/datacite/datacite_result_04.json b/python/tests/files/datacite/datacite_result_04.json
index 5b956836..0976e40e 100644
--- a/python/tests/files/datacite/datacite_result_04.json
+++ b/python/tests/files/datacite/datacite_result_04.json
@@ -1,29 +1,36 @@
 {
-    "extra": {"datacite": {}},
-    "title": "On chain maps inducing isomorphisms in homology",
-    "release_type": "article-journal",
-    "release_stage": "published",
-    "release_year": 1973,
-    "ext_ids": {
-        "doi": "10.14288/1.0080520"
-    },
-    "publisher": "University of British Columbia",
-    "language": "en",
-    "contribs": [
-        {
-            "index": 0,
-            "raw_name": "Marc Andre Nicollerat",
-            "given_name": "Marc Andre",
-            "surname": "Nicollerat",
-            "role": "author"
-        }
-    ],
-    "refs": [],
-    "abstracts": [
-        {
-            "content": "Let A be an abelian category, I the full subcategory of A consisting of injective objects of A, and K(A) the category whose objects are cochain complexes of elements of A, and whose morphisms are homotopy classes of cochain maps.  In (5), lemma 4.6., p. 42, R. Hartshorne has proved that, under certain conditions, a cochain complex X\u02d9 \u03b5. |KA)| can be embedded in a complex I\u02d9 \u03b5. |K(I)| in such a way that I\u02d9 has the same cohomology as X\u02d9.  In Chapter I we show that the construction given in the two first parts of Hartshorne's Lemma is natural i.e. there exists a functor  J : K(A) \u2192 K(I) and a natural transformation [formula omitted]  (where E : K(I) \u2192 K(A) is the embedding functor) such that [formula omitted] is  injective and induces isomorphism in cohomology. The question whether the construction given in the third part of the lemma is functorial is still open.  We also prove that J is left adjoint to E, so that K(I) is a reflective subcategory of K(A).  In the special case where A is a category [formula omitted] of left A-modules, and [formula omitted] the category of cochain complexes in [formula omitted] and cochain maps (not homotopy classes), we prove the existence of a functor [formula omitted]  In Chapter II we study the natural homomorphism [formula omitted]   where A, B are rings, and M, L, N modules or chain complexes. In particular we give several sufficient conditions under which v is an isomorphism, or induces isomorphism in homology.  In the appendix we give a detailed proof of Hartshorne's Lemma. We think that this is useful, as no complete proof is, to our knowledge, to be found in the literature.",
-            "mimetype": "text/plain",
-            "lang": "en"
-        }
-    ]
+  "extra": {
+    "datacite": {
+      "metadataVersion": 5,
+      "resourceType": "Text",
+      "resourceTypeGeneral": "Text",
+      "schemaVersion": "http://datacite.org/schema/kernel-3"
+    }
+  },
+  "title": "On chain maps inducing isomorphisms in homology",
+  "release_type": "article-journal",
+  "release_stage": "published",
+  "release_year": 1973,
+  "ext_ids": {
+    "doi": "10.14288/1.0080520"
+  },
+  "publisher": "University of British Columbia",
+  "language": "en",
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "Marc Andre Nicollerat",
+      "given_name": "Marc Andre",
+      "surname": "Nicollerat",
+      "role": "author"
+    }
+  ],
+  "refs": [],
+  "abstracts": [
+    {
+      "content": "Let A be an abelian category, I the full subcategory of A consisting of injective objects of A, and K(A) the category whose objects are cochain complexes of elements of A, and whose morphisms are homotopy classes of cochain maps.  In (5), lemma 4.6., p. 42, R. Hartshorne has proved that, under certain conditions, a cochain complex X˙ ε. |KA)| can be embedded in a complex I˙ ε. |K(I)| in such a way that I˙ has the same cohomology as X˙.  In Chapter I we show that the construction given in the two first parts of Hartshorne's Lemma is natural i.e. there exists a functor  J : K(A) → K(I) and a natural transformation [formula omitted]  (where E : K(I) → K(A) is the embedding functor) such that [formula omitted] is  injective and induces isomorphism in cohomology. The question whether the construction given in the third part of the lemma is functorial is still open.  We also prove that J is left adjoint to E, so that K(I) is a reflective subcategory of K(A).  In the special case where A is a category [formula omitted] of left A-modules, and [formula omitted] the category of cochain complexes in [formula omitted] and cochain maps (not homotopy classes), we prove the existence of a functor [formula omitted]  In Chapter II we study the natural homomorphism [formula omitted]   where A, B are rings, and M, L, N modules or chain complexes. In particular we give several sufficient conditions under which v is an isomorphism, or induces isomorphism in homology.  In the appendix we give a detailed proof of Hartshorne's Lemma. We think that this is useful, as no complete proof is, to our knowledge, to be found in the literature.",
+      "mimetype": "text/plain",
+      "lang": "en"
+    }
+  ]
 }
diff --git a/python/tests/files/datacite/datacite_result_05.json b/python/tests/files/datacite/datacite_result_05.json
index 1352fe29..961ad72a 100644
--- a/python/tests/files/datacite/datacite_result_05.json
+++ b/python/tests/files/datacite/datacite_result_05.json
@@ -1,532 +1,536 @@
 {
-    "extra": {
-        "datacite": {
-            "license": [
-                {
-                    "rights": "Attribution-NonCommercial (CC BY-NC)",
-                    "rightsUri": "http://creativecommons.org/licenses/by-nc/4.0"
-                }
-            ]
-        },
-        "month": 10
-    },
-    "title": "SH409843.07FU",
-    "subtitle": "Gomphales",
-    "release_type": "dataset",
-    "release_stage": "published",
-    "release_date": "2014-10-05",
-    "release_year": 2014,
-    "ext_ids": {
-        "doi": "10.15156/bio/sh409843.07fu"
-    },
-    "publisher": "UNITE Community",
-    "language": "en",
-    "license_slug": "CC-BY-NC",
-    "contribs": [
+  "extra": {
+    "datacite": {
+      "license": [
         {
-            "index": 0,
-            "raw_name": "Urmas K\u00f5ljalg",
-            "given_name": "Urmas",
-            "surname": "K\u00f5ljalg",
-            "role": "author"
-        },
-        {
-            "index": 1,
-            "raw_name": "Kessy Abarenkov",
-            "given_name": "Kessy",
-            "surname": "Abarenkov",
-            "role": "author"
-        },
-        {
-            "index": 2,
-            "raw_name": "R. Henrik Nilsson",
-            "given_name": "R. Henrik",
-            "surname": "Nilsson",
-            "role": "author"
-        },
-        {
-            "index": 3,
-            "raw_name": "Karl-Henrik Larsson",
-            "given_name": "Karl-Henrik",
-            "surname": "Larsson",
-            "role": "author"
-        },
-        {
-            "index": 4,
-            "raw_name": "Anders Bj\u00f8rnsgard Aas",
-            "given_name": "Anders Bj\u00f8rnsgard",
-            "surname": "Aas",
-            "role": "author"
-        },
-        {
-            "index": 5,
-            "raw_name": "Rachel Adams",
-            "given_name": "Rachel",
-            "surname": "Adams",
-            "role": "author"
-        },
-        {
-            "index": 6,
-            "raw_name": "Artur Alves",
-            "given_name": "Artur",
-            "surname": "Alves",
-            "role": "author"
-        },
-        {
-            "index": 7,
-            "raw_name": "Joseph F. Ammirati",
-            "given_name": "Joseph F.",
-            "surname": "Ammirati",
-            "role": "author"
-        },
-        {
-            "index": 8,
-            "raw_name": "A. Elizabeth Arnold",
-            "given_name": "A. Elizabeth",
-            "surname": "Arnold",
-            "role": "author"
-        },
-        {
-            "index": 9,
-            "raw_name": "Mohammad Bahram",
-            "given_name": "Mohammad",
-            "surname": "Bahram",
-            "role": "author"
-        },
-        {
-            "index": 10,
-            "raw_name": "Johan Bengtsson-Palme",
-            "given_name": "Johan",
-            "surname": "Bengtsson-Palme",
-            "role": "author"
-        },
-        {
-            "index": 11,
-            "raw_name": "Anna Berlin",
-            "given_name": "Anna",
-            "surname": "Berlin",
-            "role": "author"
-        },
-        {
-            "index": 12,
-            "raw_name": "Synn\u00f8ve Botnen",
-            "given_name": "Synn\u00f8ve",
-            "surname": "Botnen",
-            "role": "author"
-        },
-        {
-            "index": 13,
-            "raw_name": "Sarah Bourlat",
-            "given_name": "Sarah",
-            "surname": "Bourlat",
-            "role": "author"
-        },
-        {
-            "index": 14,
-            "raw_name": "Tanya Cheeke",
-            "given_name": "Tanya",
-            "surname": "Cheeke",
-            "role": "author"
-        },
-        {
-            "index": 15,
-            "raw_name": "B\u00e1lint Dima",
-            "given_name": "B\u00e1lint",
-            "surname": "Dima",
-            "role": "author"
-        },
-        {
-            "index": 16,
-            "raw_name": "Rein Drenkhan",
-            "given_name": "Rein",
-            "surname": "Drenkhan",
-            "role": "author"
-        },
-        {
-            "index": 17,
-            "raw_name": "Camila Duarte",
-            "given_name": "Camila",
-            "surname": "Duarte",
-            "role": "author"
-        },
-        {
-            "index": 18,
-            "raw_name": "Margarita Due\u00f1as",
-            "given_name": "Margarita",
-            "surname": "Due\u00f1as",
-            "role": "author"
-        },
-        {
-            "index": 19,
-            "raw_name": "Ursula Eberhardt",
-            "given_name": "Ursula",
-            "surname": "Eberhardt",
-            "role": "author"
-        },
-        {
-            "index": 20,
-            "raw_name": "Hanna Friberg",
-            "given_name": "Hanna",
-            "surname": "Friberg",
-            "role": "author"
-        },
-        {
-            "index": 21,
-            "raw_name": "Tobias G. Fr\u00f8slev",
-            "given_name": "Tobias G.",
-            "surname": "Fr\u00f8slev",
-            "role": "author"
-        },
-        {
-            "index": 22,
-            "raw_name": "Sigisfredo Garnica",
-            "given_name": "Sigisfredo",
-            "surname": "Garnica",
-            "role": "author"
-        },
-        {
-            "index": 23,
-            "raw_name": "J\u00f3zsef Geml",
-            "given_name": "J\u00f3zsef",
-            "surname": "Geml",
-            "role": "author"
-        },
-        {
-            "index": 24,
-            "raw_name": "Masoomeh Ghobad-Nejhad",
-            "given_name": "Masoomeh",
-            "surname": "Ghobad-Nejhad",
-            "role": "author"
-        },
-        {
-            "index": 25,
-            "raw_name": "Tine Grebenc",
-            "given_name": "Tine",
-            "surname": "Grebenc",
-            "role": "author"
-        },
-        {
-            "index": 26,
-            "raw_name": "Gareth W. Griffith",
-            "given_name": "Gareth W.",
-            "surname": "Griffith",
-            "role": "author"
-        },
-        {
-            "index": 27,
-            "raw_name": "Felix Hampe",
-            "given_name": "Felix",
-            "surname": "Hampe",
-            "role": "author"
-        },
-        {
-            "index": 28,
-            "raw_name": "Peter Kennedy",
-            "given_name": "Peter",
-            "surname": "Kennedy",
-            "role": "author"
-        },
-        {
-            "index": 29,
-            "raw_name": "Maryia Khomich",
-            "given_name": "Maryia",
-            "surname": "Khomich",
-            "role": "author"
-        },
-        {
-            "index": 30,
-            "raw_name": "Petr Kohout",
-            "given_name": "Petr",
-            "surname": "Kohout",
-            "role": "author"
-        },
-        {
-            "index": 31,
-            "raw_name": "Anu Kollom",
-            "given_name": "Anu",
-            "surname": "Kollom",
-            "role": "author"
-        },
-        {
-            "index": 32,
-            "raw_name": "Ellen Larsson",
-            "given_name": "Ellen",
-            "surname": "Larsson",
-            "role": "author"
-        },
-        {
-            "index": 33,
-            "raw_name": "Irinyi Laszlo",
-            "given_name": "Irinyi",
-            "surname": "Laszlo",
-            "role": "author"
-        },
-        {
-            "index": 34,
-            "raw_name": "Steven Leavitt",
-            "given_name": "Steven",
-            "surname": "Leavitt",
-            "role": "author"
-        },
-        {
-            "index": 35,
-            "raw_name": "Kare Liimatainen",
-            "given_name": "Kare",
-            "surname": "Liimatainen",
-            "role": "author"
-        },
-        {
-            "index": 36,
-            "raw_name": "Bj\u00f6rn Lindahl",
-            "given_name": "Bj\u00f6rn",
-            "surname": "Lindahl",
-            "role": "author"
-        },
-        {
-            "index": 37,
-            "raw_name": "Deborah J. Lodge",
-            "given_name": "Deborah J.",
-            "surname": "Lodge",
-            "role": "author"
-        },
-        {
-            "index": 38,
-            "raw_name": "Helge Thorsten Lumbsch",
-            "given_name": "Helge Thorsten",
-            "surname": "Lumbsch",
-            "role": "author"
-        },
-        {
-            "index": 39,
-            "raw_name": "Mar\u00eda Paz Mart\u00edn Esteban",
-            "given_name": "Mar\u00eda Paz",
-            "surname": "Mart\u00edn Esteban",
-            "role": "author"
-        },
-        {
-            "index": 40,
-            "raw_name": "Wieland Meyer",
-            "given_name": "Wieland",
-            "surname": "Meyer",
-            "role": "author"
-        },
-        {
-            "index": 41,
-            "raw_name": "Otto Miettinen",
-            "given_name": "Otto",
-            "surname": "Miettinen",
-            "role": "author"
-        },
-        {
-            "index": 42,
-            "raw_name": "Nhu Nguyen",
-            "given_name": "Nhu",
-            "surname": "Nguyen",
-            "role": "author"
-        },
-        {
-            "index": 43,
-            "raw_name": "Tuula Niskanen",
-            "given_name": "Tuula",
-            "surname": "Niskanen",
-            "role": "author"
-        },
-        {
-            "index": 44,
-            "raw_name": "Ryoko Oono",
-            "given_name": "Ryoko",
-            "surname": "Oono",
-            "role": "author"
-        },
-        {
-            "index": 45,
-            "raw_name": "Maarja \u00d6pik",
-            "given_name": "Maarja",
-            "surname": "\u00d6pik",
-            "role": "author"
-        },
-        {
-            "index": 46,
-            "raw_name": "Alexander Ordynets",
-            "given_name": "Alexander",
-            "surname": "Ordynets",
-            "role": "author"
-        },
-        {
-            "index": 47,
-            "raw_name": "Julia Paw\u0142owska",
-            "given_name": "Julia",
-            "surname": "Paw\u0142owska",
-            "role": "author"
-        },
-        {
-            "index": 48,
-            "raw_name": "Ursula Peintner",
-            "given_name": "Ursula",
-            "surname": "Peintner",
-            "role": "author"
-        },
-        {
-            "index": 49,
-            "raw_name": "Olinto Liparini Pereira",
-            "given_name": "Olinto Liparini",
-            "surname": "Pereira",
-            "role": "author"
-        },
-        {
-            "index": 50,
-            "raw_name": "Danilo Batista Pinho",
-            "given_name": "Danilo Batista",
-            "surname": "Pinho",
-            "role": "author"
-        },
-        {
-            "index": 51,
-            "raw_name": "Kadri P\u00f5ldmaa",
-            "given_name": "Kadri",
-            "surname": "P\u00f5ldmaa",
-            "role": "author"
-        },
-        {
-            "index": 52,
-            "raw_name": "Kadri Runnel",
-            "given_name": "Kadri",
-            "surname": "Runnel",
-            "role": "author"
-        },
-        {
-            "index": 53,
-            "raw_name": "Martin Ryberg",
-            "given_name": "Martin",
-            "surname": "Ryberg",
-            "role": "author"
-        },
-        {
-            "index": 54,
-            "raw_name": "Irja Saar",
-            "given_name": "Irja",
-            "surname": "Saar",
-            "role": "author"
-        },
-        {
-            "index": 55,
-            "raw_name": "Kemal Sanli",
-            "given_name": "Kemal",
-            "surname": "Sanli",
-            "role": "author"
-        },
-        {
-            "index": 56,
-            "raw_name": "James Scott",
-            "given_name": "James",
-            "surname": "Scott",
-            "role": "author"
-        },
-        {
-            "index": 57,
-            "raw_name": "Viacheslav Spirin",
-            "given_name": "Viacheslav",
-            "surname": "Spirin",
-            "role": "author"
-        },
-        {
-            "index": 58,
-            "raw_name": "Ave Suija",
-            "given_name": "Ave",
-            "surname": "Suija",
-            "role": "author"
-        },
-        {
-            "index": 59,
-            "raw_name": "Sten Svantesson",
-            "given_name": "Sten",
-            "surname": "Svantesson",
-            "role": "author"
-        },
-        {
-            "index": 60,
-            "raw_name": "Mariusz Tadych",
-            "given_name": "Mariusz",
-            "surname": "Tadych",
-            "role": "author"
-        },
-        {
-            "index": 61,
-            "raw_name": "Susumu Takamatsu",
-            "given_name": "Susumu",
-            "surname": "Takamatsu",
-            "role": "author"
-        },
-        {
-            "index": 62,
-            "raw_name": "Heidi Tamm",
-            "given_name": "Heidi",
-            "surname": "Tamm",
-            "role": "author"
-        },
-        {
-            "index": 63,
-            "raw_name": "AFS. Taylor",
-            "given_name": "AFS.",
-            "surname": "Taylor",
-            "role": "author"
-        },
-        {
-            "index": 64,
-            "raw_name": "Leho Tedersoo",
-            "given_name": "Leho",
-            "surname": "Tedersoo",
-            "role": "author"
-        },
-        {
-            "index": 65,
-            "raw_name": "M.T. Telleria",
-            "given_name": "M.T.",
-            "surname": "Telleria",
-            "role": "author"
-        },
-        {
-            "index": 66,
-            "raw_name": "Dhanushka Udayanga",
-            "given_name": "Dhanushka",
-            "surname": "Udayanga",
-            "role": "author"
-        },
-        {
-            "index": 67,
-            "raw_name": "Martin Unterseher",
-            "given_name": "Martin",
-            "surname": "Unterseher",
-            "role": "author"
-        },
-        {
-            "index": 68,
-            "raw_name": "Sergey Volobuev",
-            "given_name": "Sergey",
-            "surname": "Volobuev",
-            "role": "author"
-        },
-        {
-            "index": 69,
-            "raw_name": "Michael Weiss",
-            "given_name": "Michael",
-            "surname": "Weiss",
-            "role": "author"
-        },
-        {
-            "index": 70,
-            "raw_name": "Christian Wurzbacher",
-            "given_name": "Christian",
-            "surname": "Wurzbacher",
-            "role": "author"
-        }
-    ],
-    "refs": [],
-    "abstracts": [
-        {
-            "content": "UNITE provides a unified way for delimiting, identifying, communicating, and working with DNA-based Species Hypotheses (SH). All fungal ITS sequences in the international nucleotide sequence databases are clustered to approximately the species level by applying a set of dynamic distance values (<0.5 - 3.0%). All species hypotheses are given a unique, stable name in the form of a DOI, and their taxonomic and ecological annotations are verified through distributed, web-based third-party annotation efforts. SHs are connected to a taxon name and its classification as far as possible (phylum, class, order, etc.) by taking into account identifications for all sequences in the SH. An automatically or manually designated sequence is chosen to represent each such SH. These sequences are released (https://unite.ut.ee/repository.php) for use by the scientific community in, for example, local sequence similarity searches and next-generation sequencing analysis pipelines. The system and the data are updated automatically as the number of public fungal ITS sequences grows.",
-            "mimetype": "text/plain",
-            "lang": "en"
+          "rights": "Attribution-NonCommercial (CC BY-NC)",
+          "rightsUri": "http://creativecommons.org/licenses/by-nc/4.0"
         }
-    ]
+      ],
+      "metadataVersion": 1,
+      "resourceType": "Dataset/UNITE Species Hypothesis",
+      "resourceTypeGeneral": "Dataset",
+      "schemaVersion": "http://datacite.org/schema/kernel-3"
+    },
+    "month": 10
+  },
+  "title": "SH409843.07FU",
+  "subtitle": "Gomphales",
+  "release_type": "dataset",
+  "release_stage": "published",
+  "release_date": "2014-10-05",
+  "release_year": 2014,
+  "ext_ids": {
+    "doi": "10.15156/bio/sh409843.07fu"
+  },
+  "publisher": "UNITE Community",
+  "language": "en",
+  "license_slug": "CC-BY-NC",
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "Urmas Kõljalg",
+      "given_name": "Urmas",
+      "surname": "Kõljalg",
+      "role": "author"
+    },
+    {
+      "index": 1,
+      "raw_name": "Kessy Abarenkov",
+      "given_name": "Kessy",
+      "surname": "Abarenkov",
+      "role": "author"
+    },
+    {
+      "index": 2,
+      "raw_name": "R. Henrik Nilsson",
+      "given_name": "R. Henrik",
+      "surname": "Nilsson",
+      "role": "author"
+    },
+    {
+      "index": 3,
+      "raw_name": "Karl-Henrik Larsson",
+      "given_name": "Karl-Henrik",
+      "surname": "Larsson",
+      "role": "author"
+    },
+    {
+      "index": 4,
+      "raw_name": "Anders Bjørnsgard Aas",
+      "given_name": "Anders Bjørnsgard",
+      "surname": "Aas",
+      "role": "author"
+    },
+    {
+      "index": 5,
+      "raw_name": "Rachel Adams",
+      "given_name": "Rachel",
+      "surname": "Adams",
+      "role": "author"
+    },
+    {
+      "index": 6,
+      "raw_name": "Artur Alves",
+      "given_name": "Artur",
+      "surname": "Alves",
+      "role": "author"
+    },
+    {
+      "index": 7,
+      "raw_name": "Joseph F. Ammirati",
+      "given_name": "Joseph F.",
+      "surname": "Ammirati",
+      "role": "author"
+    },
+    {
+      "index": 8,
+      "raw_name": "A. Elizabeth Arnold",
+      "given_name": "A. Elizabeth",
+      "surname": "Arnold",
+      "role": "author"
+    },
+    {
+      "index": 9,
+      "raw_name": "Mohammad Bahram",
+      "given_name": "Mohammad",
+      "surname": "Bahram",
+      "role": "author"
+    },
+    {
+      "index": 10,
+      "raw_name": "Johan Bengtsson-Palme",
+      "given_name": "Johan",
+      "surname": "Bengtsson-Palme",
+      "role": "author"
+    },
+    {
+      "index": 11,
+      "raw_name": "Anna Berlin",
+      "given_name": "Anna",
+      "surname": "Berlin",
+      "role": "author"
+    },
+    {
+      "index": 12,
+      "raw_name": "Synnøve Botnen",
+      "given_name": "Synnøve",
+      "surname": "Botnen",
+      "role": "author"
+    },
+    {
+      "index": 13,
+      "raw_name": "Sarah Bourlat",
+      "given_name": "Sarah",
+      "surname": "Bourlat",
+      "role": "author"
+    },
+    {
+      "index": 14,
+      "raw_name": "Tanya Cheeke",
+      "given_name": "Tanya",
+      "surname": "Cheeke",
+      "role": "author"
+    },
+    {
+      "index": 15,
+      "raw_name": "Bálint Dima",
+      "given_name": "Bálint",
+      "surname": "Dima",
+      "role": "author"
+    },
+    {
+      "index": 16,
+      "raw_name": "Rein Drenkhan",
+      "given_name": "Rein",
+      "surname": "Drenkhan",
+      "role": "author"
+    },
+    {
+      "index": 17,
+      "raw_name": "Camila Duarte",
+      "given_name": "Camila",
+      "surname": "Duarte",
+      "role": "author"
+    },
+    {
+      "index": 18,
+      "raw_name": "Margarita Dueñas",
+      "given_name": "Margarita",
+      "surname": "Dueñas",
+      "role": "author"
+    },
+    {
+      "index": 19,
+      "raw_name": "Ursula Eberhardt",
+      "given_name": "Ursula",
+      "surname": "Eberhardt",
+      "role": "author"
+    },
+    {
+      "index": 20,
+      "raw_name": "Hanna Friberg",
+      "given_name": "Hanna",
+      "surname": "Friberg",
+      "role": "author"
+    },
+    {
+      "index": 21,
+      "raw_name": "Tobias G. Frøslev",
+      "given_name": "Tobias G.",
+      "surname": "Frøslev",
+      "role": "author"
+    },
+    {
+      "index": 22,
+      "raw_name": "Sigisfredo Garnica",
+      "given_name": "Sigisfredo",
+      "surname": "Garnica",
+      "role": "author"
+    },
+    {
+      "index": 23,
+      "raw_name": "József Geml",
+      "given_name": "József",
+      "surname": "Geml",
+      "role": "author"
+    },
+    {
+      "index": 24,
+      "raw_name": "Masoomeh Ghobad-Nejhad",
+      "given_name": "Masoomeh",
+      "surname": "Ghobad-Nejhad",
+      "role": "author"
+    },
+    {
+      "index": 25,
+      "raw_name": "Tine Grebenc",
+      "given_name": "Tine",
+      "surname": "Grebenc",
+      "role": "author"
+    },
+    {
+      "index": 26,
+      "raw_name": "Gareth W. Griffith",
+      "given_name": "Gareth W.",
+      "surname": "Griffith",
+      "role": "author"
+    },
+    {
+      "index": 27,
+      "raw_name": "Felix Hampe",
+      "given_name": "Felix",
+      "surname": "Hampe",
+      "role": "author"
+    },
+    {
+      "index": 28,
+      "raw_name": "Peter Kennedy",
+      "given_name": "Peter",
+      "surname": "Kennedy",
+      "role": "author"
+    },
+    {
+      "index": 29,
+      "raw_name": "Maryia Khomich",
+      "given_name": "Maryia",
+      "surname": "Khomich",
+      "role": "author"
+    },
+    {
+      "index": 30,
+      "raw_name": "Petr Kohout",
+      "given_name": "Petr",
+      "surname": "Kohout",
+      "role": "author"
+    },
+    {
+      "index": 31,
+      "raw_name": "Anu Kollom",
+      "given_name": "Anu",
+      "surname": "Kollom",
+      "role": "author"
+    },
+    {
+      "index": 32,
+      "raw_name": "Ellen Larsson",
+      "given_name": "Ellen",
+      "surname": "Larsson",
+      "role": "author"
+    },
+    {
+      "index": 33,
+      "raw_name": "Irinyi Laszlo",
+      "given_name": "Irinyi",
+      "surname": "Laszlo",
+      "role": "author"
+    },
+    {
+      "index": 34,
+      "raw_name": "Steven Leavitt",
+      "given_name": "Steven",
+      "surname": "Leavitt",
+      "role": "author"
+    },
+    {
+      "index": 35,
+      "raw_name": "Kare Liimatainen",
+      "given_name": "Kare",
+      "surname": "Liimatainen",
+      "role": "author"
+    },
+    {
+      "index": 36,
+      "raw_name": "Björn Lindahl",
+      "given_name": "Björn",
+      "surname": "Lindahl",
+      "role": "author"
+    },
+    {
+      "index": 37,
+      "raw_name": "Deborah J. Lodge",
+      "given_name": "Deborah J.",
+      "surname": "Lodge",
+      "role": "author"
+    },
+    {
+      "index": 38,
+      "raw_name": "Helge Thorsten Lumbsch",
+      "given_name": "Helge Thorsten",
+      "surname": "Lumbsch",
+      "role": "author"
+    },
+    {
+      "index": 39,
+      "raw_name": "María Paz Martín Esteban",
+      "given_name": "María Paz",
+      "surname": "Martín Esteban",
+      "role": "author"
+    },
+    {
+      "index": 40,
+      "raw_name": "Wieland Meyer",
+      "given_name": "Wieland",
+      "surname": "Meyer",
+      "role": "author"
+    },
+    {
+      "index": 41,
+      "raw_name": "Otto Miettinen",
+      "given_name": "Otto",
+      "surname": "Miettinen",
+      "role": "author"
+    },
+    {
+      "index": 42,
+      "raw_name": "Nhu Nguyen",
+      "given_name": "Nhu",
+      "surname": "Nguyen",
+      "role": "author"
+    },
+    {
+      "index": 43,
+      "raw_name": "Tuula Niskanen",
+      "given_name": "Tuula",
+      "surname": "Niskanen",
+      "role": "author"
+    },
+    {
+      "index": 44,
+      "raw_name": "Ryoko Oono",
+      "given_name": "Ryoko",
+      "surname": "Oono",
+      "role": "author"
+    },
+    {
+      "index": 45,
+      "raw_name": "Maarja Öpik",
+      "given_name": "Maarja",
+      "surname": "Öpik",
+      "role": "author"
+    },
+    {
+      "index": 46,
+      "raw_name": "Alexander Ordynets",
+      "given_name": "Alexander",
+      "surname": "Ordynets",
+      "role": "author"
+    },
+    {
+      "index": 47,
+      "raw_name": "Julia Pawłowska",
+      "given_name": "Julia",
+      "surname": "Pawłowska",
+      "role": "author"
+    },
+    {
+      "index": 48,
+      "raw_name": "Ursula Peintner",
+      "given_name": "Ursula",
+      "surname": "Peintner",
+      "role": "author"
+    },
+    {
+      "index": 49,
+      "raw_name": "Olinto Liparini Pereira",
+      "given_name": "Olinto Liparini",
+      "surname": "Pereira",
+      "role": "author"
+    },
+    {
+      "index": 50,
+      "raw_name": "Danilo Batista Pinho",
+      "given_name": "Danilo Batista",
+      "surname": "Pinho",
+      "role": "author"
+    },
+    {
+      "index": 51,
+      "raw_name": "Kadri Põldmaa",
+      "given_name": "Kadri",
+      "surname": "Põldmaa",
+      "role": "author"
+    },
+    {
+      "index": 52,
+      "raw_name": "Kadri Runnel",
+      "given_name": "Kadri",
+      "surname": "Runnel",
+      "role": "author"
+    },
+    {
+      "index": 53,
+      "raw_name": "Martin Ryberg",
+      "given_name": "Martin",
+      "surname": "Ryberg",
+      "role": "author"
+    },
+    {
+      "index": 54,
+      "raw_name": "Irja Saar",
+      "given_name": "Irja",
+      "surname": "Saar",
+      "role": "author"
+    },
+    {
+      "index": 55,
+      "raw_name": "Kemal Sanli",
+      "given_name": "Kemal",
+      "surname": "Sanli",
+      "role": "author"
+    },
+    {
+      "index": 56,
+      "raw_name": "James Scott",
+      "given_name": "James",
+      "surname": "Scott",
+      "role": "author"
+    },
+    {
+      "index": 57,
+      "raw_name": "Viacheslav Spirin",
+      "given_name": "Viacheslav",
+      "surname": "Spirin",
+      "role": "author"
+    },
+    {
+      "index": 58,
+      "raw_name": "Ave Suija",
+      "given_name": "Ave",
+      "surname": "Suija",
+      "role": "author"
+    },
+    {
+      "index": 59,
+      "raw_name": "Sten Svantesson",
+      "given_name": "Sten",
+      "surname": "Svantesson",
+      "role": "author"
+    },
+    {
+      "index": 60,
+      "raw_name": "Mariusz Tadych",
+      "given_name": "Mariusz",
+      "surname": "Tadych",
+      "role": "author"
+    },
+    {
+      "index": 61,
+      "raw_name": "Susumu Takamatsu",
+      "given_name": "Susumu",
+      "surname": "Takamatsu",
+      "role": "author"
+    },
+    {
+      "index": 62,
+      "raw_name": "Heidi Tamm",
+      "given_name": "Heidi",
+      "surname": "Tamm",
+      "role": "author"
+    },
+    {
+      "index": 63,
+      "raw_name": "AFS. Taylor",
+      "given_name": "AFS.",
+      "surname": "Taylor",
+      "role": "author"
+    },
+    {
+      "index": 64,
+      "raw_name": "Leho Tedersoo",
+      "given_name": "Leho",
+      "surname": "Tedersoo",
+      "role": "author"
+    },
+    {
+      "index": 65,
+      "raw_name": "M.T. Telleria",
+      "given_name": "M.T.",
+      "surname": "Telleria",
+      "role": "author"
+    },
+    {
+      "index": 66,
+      "raw_name": "Dhanushka Udayanga",
+      "given_name": "Dhanushka",
+      "surname": "Udayanga",
+      "role": "author"
+    },
+    {
+      "index": 67,
+      "raw_name": "Martin Unterseher",
+      "given_name": "Martin",
+      "surname": "Unterseher",
+      "role": "author"
+    },
+    {
+      "index": 68,
+      "raw_name": "Sergey Volobuev",
+      "given_name": "Sergey",
+      "surname": "Volobuev",
+      "role": "author"
+    },
+    {
+      "index": 69,
+      "raw_name": "Michael Weiss",
+      "given_name": "Michael",
+      "surname": "Weiss",
+      "role": "author"
+    },
+    {
+      "index": 70,
+      "raw_name": "Christian Wurzbacher",
+      "given_name": "Christian",
+      "surname": "Wurzbacher",
+      "role": "author"
+    }
+  ],
+  "refs": [],
+  "abstracts": [
+    {
+      "content": "UNITE provides a unified way for delimiting, identifying, communicating, and working with DNA-based Species Hypotheses (SH). All fungal ITS sequences in the international nucleotide sequence databases are clustered to approximately the species level by applying a set of dynamic distance values (<0.5 - 3.0%). All species hypotheses are given a unique, stable name in the form of a DOI, and their taxonomic and ecological annotations are verified through distributed, web-based third-party annotation efforts. SHs are connected to a taxon name and its classification as far as possible (phylum, class, order, etc.) by taking into account identifications for all sequences in the SH. An automatically or manually designated sequence is chosen to represent each such SH. These sequences are released (https://unite.ut.ee/repository.php) for use by the scientific community in, for example, local sequence similarity searches and next-generation sequencing analysis pipelines. The system and the data are updated automatically as the number of public fungal ITS sequences grows.",
+      "mimetype": "text/plain",
+      "lang": "en"
+    }
+  ]
 }
diff --git a/python/tests/files/datacite/datacite_result_06.json b/python/tests/files/datacite/datacite_result_06.json
index 61f2549d..18880100 100644
--- a/python/tests/files/datacite/datacite_result_06.json
+++ b/python/tests/files/datacite/datacite_result_06.json
@@ -1,26 +1,29 @@
 {
-    "extra": {
-        "datacite": {
-            "license": [
-                {
-                    "rights": "ETH-Bibliothek Z\u00fcrich, Graphische Sammlung / D 6220 / Public Domain Mark 1.0"
-                }
-            ]
-        }
-    },
-    "title": "Der Eifer (Sedulitas), Blatt 7 der Folge \"Die Tugenden\"",
-    "release_type": "article",
-    "release_year": 1590,
-    "ext_ids": {
-        "doi": "10.16903/ethz-grs-d_006220"
-    },
-    "contribs": [
+  "extra": {
+    "datacite": {
+      "license": [
         {
-            "index": 0,
-            "raw_name": "Crispijn De Passe (Der \u00c4ltere) (1564-1637)",
-            "role": "author"
+          "rights": "ETH-Bibliothek Zürich, Graphische Sammlung / D 6220 / Public Domain Mark 1.0"
         }
-    ],
-    "refs": [],
-    "abstracts": []
-}
\ No newline at end of file
+      ],
+      "metadataVersion": 1,
+      "resourceTypeGeneral": "InteractiveResource",
+      "schemaVersion": "http://datacite.org/schema/kernel-3"
+    }
+  },
+  "title": "Der Eifer (Sedulitas), Blatt 7 der Folge \"Die Tugenden\"",
+  "release_type": "article",
+  "release_year": 1590,
+  "ext_ids": {
+    "doi": "10.16903/ethz-grs-d_006220"
+  },
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "Crispijn De Passe (Der Ältere) (1564-1637)",
+      "role": "author"
+    }
+  ],
+  "refs": [],
+  "abstracts": []
+}
diff --git a/python/tests/files/datacite/datacite_result_07.json b/python/tests/files/datacite/datacite_result_07.json
index f694ddef..23b63d50 100644
--- a/python/tests/files/datacite/datacite_result_07.json
+++ b/python/tests/files/datacite/datacite_result_07.json
@@ -1,74 +1,76 @@
 {
-    "extra": {
-        "datacite": {
-            "subjects": [
-                {
-                    "subject": "HEAT PUMP"
-                },
-                {
-                    "subject": "HOT WATER"
-                },
-                {
-                    "subject": "HEAT TRANSFER"
-                },
-                {
-                    "subject": "PERFORMANCE"
-                },
-                {
-                    "subject": "THERMAL STORAGE"
-                },
-                {
-                    "subject": "TANK"
-                },
-                {
-                    "subject": "MODEL"
-                }
-            ]
-        }
-    },
-    "title": "High efficient heat pump system using storage tanks to increase cop by means of the ISEC concept. 1: model validation.",
-    "release_type": "dataset",
-    "release_stage": "published",
-    "release_year": 2015,
-    "ext_ids": {
-        "doi": "10.18462/iir.icr.2015.0926"
-    },
-    "publisher": "International Institute of Refrigeration (IIR)",
-    "language": "en",
-    "contribs": [
+  "extra": {
+    "datacite": {
+      "subjects": [
         {
-            "index": 0,
-            "raw_name": "E. ROTHUIZEN",
-            "given_name": "E.",
-            "surname": "ROTHUIZEN",
-            "role": "author"
+          "subject": "HEAT PUMP"
         },
         {
-            "index": 1,
-            "raw_name": "B. ELMEGAARD",
-            "given_name": "B.",
-            "surname": "ELMEGAARD",
-            "role": "author"
+          "subject": "HOT WATER"
         },
         {
-            "index": 2,
-            "raw_name": "B. MARKUSSEN W.",
-            "given_name": "B.",
-            "surname": "MARKUSSEN W.",
-            "role": "author"
+          "subject": "HEAT TRANSFER"
         },
         {
-            "index": 3,
-            "raw_name": "Et Al.",
-            "role": "author"
-        }
-    ],
-    "refs": [],
-    "abstracts": [
+          "subject": "PERFORMANCE"
+        },
+        {
+          "subject": "THERMAL STORAGE"
+        },
+        {
+          "subject": "TANK"
+        },
         {
-            "content": "The purpose of the ISEC concept is to provide a high-efficient heat pump system for hot water production. The ISEC concept uses two storage tanks for the water, one discharged and one charged. Hot water for the industrial process is tapped from the charged tank, while the other tank is charging. Charging is done by circulating the water in the tank through the condenser of a heat pump several times and thereby gradually heating the water. The charging is done with a higher mass flow rate than the discharging to reach several circulations of the water during the time frame of one discharging. This result in a lower condensing temperature than if the water was heated in one step. Two test setups were built, one to test the performance of the heat pump gradually heating the water and one to investigate the stratification in the storage tanks. Furthermore, a dynamic model of the system was implemented in Dymola, and validated by the use of test data from the two experimental setups. This paper shows that there is a good consistency between the model and the experimental tests.",
-            "mimetype": "text/plain",
-            "lang": "en"
+          "subject": "MODEL"
         }
-    ]
+      ],
+      "resourceType": "Dataset",
+      "resourceTypeGeneral": "Dataset"
+    }
+  },
+  "title": "High efficient heat pump system using storage tanks to increase cop by means of the ISEC concept. 1: model validation.",
+  "release_type": "dataset",
+  "release_stage": "published",
+  "release_year": 2015,
+  "ext_ids": {
+    "doi": "10.18462/iir.icr.2015.0926"
+  },
+  "publisher": "International Institute of Refrigeration (IIR)",
+  "language": "en",
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "E. ROTHUIZEN",
+      "given_name": "E.",
+      "surname": "ROTHUIZEN",
+      "role": "author"
+    },
+    {
+      "index": 1,
+      "raw_name": "B. ELMEGAARD",
+      "given_name": "B.",
+      "surname": "ELMEGAARD",
+      "role": "author"
+    },
+    {
+      "index": 2,
+      "raw_name": "B. MARKUSSEN W.",
+      "given_name": "B.",
+      "surname": "MARKUSSEN W.",
+      "role": "author"
+    },
+    {
+      "index": 3,
+      "raw_name": "Et Al.",
+      "role": "author"
+    }
+  ],
+  "refs": [],
+  "abstracts": [
+    {
+      "content": "The purpose of the ISEC concept is to provide a high-efficient heat pump system for hot water production. The ISEC concept uses two storage tanks for the water, one discharged and one charged. Hot water for the industrial process is tapped from the charged tank, while the other tank is charging. Charging is done by circulating the water in the tank through the condenser of a heat pump several times and thereby gradually heating the water. The charging is done with a higher mass flow rate than the discharging to reach several circulations of the water during the time frame of one discharging. This result in a lower condensing temperature than if the water was heated in one step. Two test setups were built, one to test the performance of the heat pump gradually heating the water and one to investigate the stratification in the storage tanks. Furthermore, a dynamic model of the system was implemented in Dymola, and validated by the use of test data from the two experimental setups. This paper shows that there is a good consistency between the model and the experimental tests.",
+      "mimetype": "text/plain",
+      "lang": "en"
+    }
+  ]
 }
diff --git a/python/tests/files/datacite/datacite_result_08.json b/python/tests/files/datacite/datacite_result_08.json
index 46ef5b44..ff942d0a 100644
--- a/python/tests/files/datacite/datacite_result_08.json
+++ b/python/tests/files/datacite/datacite_result_08.json
@@ -1,54 +1,57 @@
 {
-    "extra": {
-        "datacite": {
-            "subjects": [
-                {
-                    "subject": "Land Economics/Use"
-                },
-                {
-                    "subject": "irrigation",
-                    "subjectScheme": "keyword"
-                },
-                {
-                    "subject": "industrialization",
-                    "subjectScheme": "keyword"
-                },
-                {
-                    "subject": "collective action",
-                    "subjectScheme": "keyword"
-                }
-            ]
-        }
-    },
-    "title": "Irrigation Policies under Rapid Industrialization and Labor Migration: Lessons from Japan, China and India",
-    "release_type": "article-journal",
-    "release_year": 2017,
-    "ext_ids": {
-        "doi": "10.22004/ag.econ.284864"
-    },
-    "language": "en",
-    "contribs": [
+  "extra": {
+    "datacite": {
+      "subjects": [
         {
-            "index": 0,
-            "raw_name": "Kei Kajisa",
-            "given_name": "Kei",
-            "surname": "Kajisa",
-            "role": "author"
+          "subject": "Land Economics/Use"
         },
         {
-            "index": 1,
-            "raw_name": "Kei Kajisa",
-            "given_name": "Kei",
-            "surname": "Kajisa",
-            "role": "author"
-        }
-    ],
-    "refs": [],
-    "abstracts": [
+          "subject": "irrigation",
+          "subjectScheme": "keyword"
+        },
+        {
+          "subject": "industrialization",
+          "subjectScheme": "keyword"
+        },
         {
-            "content": "International society recognizes that the scarcity of fresh water is increasing and farming sectors suffer from lack of irrigation water. However, if we look at this issue with a framework of relative factor endowment, a different view will arise. In emerging states with rapid industrialization and labor migration, labor scarcity increases at a faster pace than that of irrigation water. Using the historical review of Japan's irrigation policies as well as the case studies of India and China, this paper shows that the introduction of policies which do not reflect the actual relative resource scarcity may mislead the development path. We argue that under increasing relative labor scarcity it is important to realize the substitution of capital for labor for surface irrigation system management and that the substitution needs public support because the service of surface irrigation system has some externalities. Through this argument, this paper also intends to shed the light back to the role of the state for local resource management which seems to be unfairly undervalued since the boom of community participatory approach in the 1980s.",
-            "mimetype": "text/plain",
-            "lang": "en"
+          "subject": "collective action",
+          "subjectScheme": "keyword"
         }
-    ]
+      ],
+      "metadataVersion": 1,
+      "resourceType": "Text",
+      "resourceTypeGeneral": "Text"
+    }
+  },
+  "title": "Irrigation Policies under Rapid Industrialization and Labor Migration: Lessons from Japan, China and India",
+  "release_type": "article-journal",
+  "release_year": 2017,
+  "ext_ids": {
+    "doi": "10.22004/ag.econ.284864"
+  },
+  "language": "en",
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "Kei Kajisa",
+      "given_name": "Kei",
+      "surname": "Kajisa",
+      "role": "author"
+    },
+    {
+      "index": 1,
+      "raw_name": "Kei Kajisa",
+      "given_name": "Kei",
+      "surname": "Kajisa",
+      "role": "author"
+    }
+  ],
+  "refs": [],
+  "abstracts": [
+    {
+      "content": "International society recognizes that the scarcity of fresh water is increasing and farming sectors suffer from lack of irrigation water. However, if we look at this issue with a framework of relative factor endowment, a different view will arise. In emerging states with rapid industrialization and labor migration, labor scarcity increases at a faster pace than that of irrigation water. Using the historical review of Japan's irrigation policies as well as the case studies of India and China, this paper shows that the introduction of policies which do not reflect the actual relative resource scarcity may mislead the development path. We argue that under increasing relative labor scarcity it is important to realize the substitution of capital for labor for surface irrigation system management and that the substitution needs public support because the service of surface irrigation system has some externalities. Through this argument, this paper also intends to shed the light back to the role of the state for local resource management which seems to be unfairly undervalued since the boom of community participatory approach in the 1980s.",
+      "mimetype": "text/plain",
+      "lang": "en"
+    }
+  ]
 }
diff --git a/python/tests/files/datacite/datacite_result_09.json b/python/tests/files/datacite/datacite_result_09.json
index db103d2b..fd873309 100644
--- a/python/tests/files/datacite/datacite_result_09.json
+++ b/python/tests/files/datacite/datacite_result_09.json
@@ -1,35 +1,40 @@
 {
-    "extra": {
-        "datacite": {
-            "subjects": [
-                {
-                    "subject": "Direktdiodenlasersysteme"
-                },
-                {
-                    "subject": "Physics",
-                    "subjectScheme": "linsearch"
-                }
-            ]
-        }
-    },
-    "title": "BrightLas : TP3.3. Module f\u00fcr Direktdiodenstrahlquellen bis 4kW und Untersuchungen zur Leistungsskalierung (Diodemodul) : zum Verbundvorhaben Direktdiodenlaseranlagen und -systeme (VP3) im F\u00f6rderschwerpunkt innovative regionale Wachstumskerne, BMBF : Abschlussbericht",
-    "release_type": "report",
-    "release_stage": "published",
-    "release_year": 2016,
-    "ext_ids": {
-        "doi": "10.2314/gbv:880813733"
-    },
-    "publisher": "[Lumics GmbH]",
-    "language": "de",
-    "contribs": [
+  "extra": {
+    "datacite": {
+      "subjects": [
+        {
+          "subject": "Direktdiodenlasersysteme"
+        },
         {
-            "index": 0,
-            "raw_name": "Nils Kirstaedter",
-            "given_name": "Nils",
-            "surname": "Kirstaedter",
-            "role": "author"
+          "subject": "Physics",
+          "subjectScheme": "linsearch"
         }
-    ],
-    "refs": [],
-    "abstracts": []
-}
\ No newline at end of file
+      ],
+      "metadataVersion": 9,
+      "resourceType": "Report",
+      "resourceTypeGeneral": "Text",
+      "schemaVersion": "http://datacite.org/schema/kernel-4"
+    }
+  },
+  "title": "BrightLas : TP3.3. Module für Direktdiodenstrahlquellen bis 4kW und Untersuchungen zur Leistungsskalierung (Diodemodul) : zum Verbundvorhaben Direktdiodenlaseranlagen und -systeme (VP3) im Förderschwerpunkt innovative regionale Wachstumskerne, BMBF : Abschlussbericht",
+  "release_type": "report",
+  "release_stage": "published",
+  "release_year": 2016,
+  "ext_ids": {
+    "doi": "10.2314/gbv:880813733"
+  },
+  "publisher": "[Lumics GmbH]",
+  "language": "de",
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "Nils Kirstaedter",
+      "given_name": "Nils",
+      "surname": "Kirstaedter",
+      "role": "author"
+    }
+  ],
+  "refs": [],
+  "abstracts": [],
+  "version": "1.0"
+}
diff --git a/python/tests/files/datacite/datacite_result_10.json b/python/tests/files/datacite/datacite_result_10.json
index 325facf7..8dea8957 100644
--- a/python/tests/files/datacite/datacite_result_10.json
+++ b/python/tests/files/datacite/datacite_result_10.json
@@ -1,32 +1,35 @@
 {
-    "extra": {
-        "datacite": {
-            "subjects": [
-                {
-                    "subject": "housing areas"
-                },
-                {
-                    "subject": "Dwellings"
-                }
-            ]
-        }
-    },
-    "title": "WPA household census for 210 E VERNON, Los Angeles",
-    "release_type": "dataset",
-    "release_stage": "published",
-    "release_year": 2012,
-    "ext_ids": {
-        "doi": "10.25549/wpacards-m6171"
-    },
-    "publisher": "University of Southern California Digital Library (USC.DL)",
-    "language": "en",
-    "contribs": [
+  "extra": {
+    "datacite": {
+      "subjects": [
+        {
+          "subject": "housing areas"
+        },
         {
-            "index": 0,
-            "raw_name": "Unknown",
-            "role": "author"
+          "subject": "Dwellings"
         }
-    ],
-    "refs": [],
-    "abstracts": []
-}
\ No newline at end of file
+      ],
+      "resourceType": "Dataset",
+      "resourceTypeGeneral": "Dataset",
+      "schemaVersion": "http://datacite.org/schema/kernel-4"
+    }
+  },
+  "title": "WPA household census for 210 E VERNON, Los Angeles",
+  "release_type": "dataset",
+  "release_stage": "published",
+  "release_year": 2012,
+  "ext_ids": {
+    "doi": "10.25549/wpacards-m6171"
+  },
+  "publisher": "University of Southern California Digital Library (USC.DL)",
+  "language": "en",
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "Unknown",
+      "role": "author"
+    }
+  ],
+  "refs": [],
+  "abstracts": []
+}
diff --git a/python/tests/files/datacite/datacite_result_11.json b/python/tests/files/datacite/datacite_result_11.json
index 3045701f..944ca718 100644
--- a/python/tests/files/datacite/datacite_result_11.json
+++ b/python/tests/files/datacite/datacite_result_11.json
@@ -1,21 +1,27 @@
 {
-    "extra": {"datacite": {}},
-    "title": "N1 bei Safenwil",
-    "release_type": "graphic",
-    "release_stage": "published",
-    "release_year": 1965,
-    "ext_ids": {
-        "doi": "10.3932/ethz-a-000055869"
-    },
-    "publisher": "ETH-Bibliothek Z\u00fcrich, Bildarchiv",
-    "language": "de",
-    "contribs": [
-        {
-            "index": 0,
-            "raw_name": "Comet Photo AG (Z\u00fcrich)",
-            "role": "author"
-        }
-    ],
-    "refs": [],
-    "abstracts": []
+  "extra": {
+    "datacite": {
+      "metadataVersion": 6,
+      "resourceTypeGeneral": "Image",
+      "schemaVersion": "http://datacite.org/schema/kernel-3"
+    }
+  },
+  "title": "N1 bei Safenwil",
+  "release_type": "graphic",
+  "release_stage": "published",
+  "release_year": 1965,
+  "ext_ids": {
+    "doi": "10.3932/ethz-a-000055869"
+  },
+  "publisher": "ETH-Bibliothek Zürich, Bildarchiv",
+  "language": "de",
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "Comet Photo AG (Zürich)",
+      "role": "author"
+    }
+  ],
+  "refs": [],
+  "abstracts": []
 }
diff --git a/python/tests/files/datacite/datacite_result_12.json b/python/tests/files/datacite/datacite_result_12.json
index c3a9071c..5e2a6281 100644
--- a/python/tests/files/datacite/datacite_result_12.json
+++ b/python/tests/files/datacite/datacite_result_12.json
@@ -1,44 +1,49 @@
 {
-    "extra": {"datacite": {}, "month": 6},
-    "title": "Anthropometric and Physiological Profile of Mixed Martial Art Athletes: A Brief Review",
-    "release_type": "article-journal",
-    "release_stage": "published",
-    "release_date": "2019-06-14",
-    "release_year": 2019,
-    "ext_ids": {
-        "doi": "10.5167/uzh-171449"
+  "extra": {
+    "datacite": {
+      "resourceTypeGeneral": "Text"
     },
-    "publisher": "MDPI Publishing",
-    "contribs": [
-        {
-            "index": 0,
-            "raw_name": "Charalampos Spanias",
-            "given_name": "Charalampos",
-            "surname": "Spanias",
-            "role": "author"
-        },
-        {
-            "index": 1,
-            "raw_name": "Pantelis T Nikolaidis",
-            "given_name": "Pantelis T",
-            "surname": "Nikolaidis",
-            "role": "author"
-        },
-        {
-            "index": 2,
-            "raw_name": "Thomas Rosemann",
-            "given_name": "Thomas",
-            "surname": "Rosemann",
-            "role": "author"
-        },
-        {
-            "index": 3,
-            "raw_name": "Beat Knechtle",
-            "given_name": "Beat",
-            "surname": "Knechtle",
-            "role": "author"
-        }
-    ],
-    "refs": [],
-    "abstracts": []
+    "month": 6
+  },
+  "title": "Anthropometric and Physiological Profile of Mixed Martial Art Athletes: A Brief Review",
+  "release_type": "article-journal",
+  "release_stage": "published",
+  "release_date": "2019-06-14",
+  "release_year": 2019,
+  "ext_ids": {
+    "doi": "10.5167/uzh-171449"
+  },
+  "publisher": "MDPI Publishing",
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "Charalampos Spanias",
+      "given_name": "Charalampos",
+      "surname": "Spanias",
+      "role": "author"
+    },
+    {
+      "index": 1,
+      "raw_name": "Pantelis T Nikolaidis",
+      "given_name": "Pantelis T",
+      "surname": "Nikolaidis",
+      "role": "author"
+    },
+    {
+      "index": 2,
+      "raw_name": "Thomas Rosemann",
+      "given_name": "Thomas",
+      "surname": "Rosemann",
+      "role": "author"
+    },
+    {
+      "index": 3,
+      "raw_name": "Beat Knechtle",
+      "given_name": "Beat",
+      "surname": "Knechtle",
+      "role": "author"
+    }
+  ],
+  "refs": [],
+  "abstracts": []
 }
diff --git a/python/tests/files/datacite/datacite_result_13.json b/python/tests/files/datacite/datacite_result_13.json
index d6ed2985..3dc7cafb 100644
--- a/python/tests/files/datacite/datacite_result_13.json
+++ b/python/tests/files/datacite/datacite_result_13.json
@@ -1,28 +1,36 @@
 {
-    "extra": {"datacite": {}, "month": 10},
-    "title": "[M\u00fcssen wir des Gl\u00fccks uns sch\u00e4men?]",
-    "release_type": "article-journal",
-    "release_stage": "published",
-    "release_date": "1940-10-05",
-    "release_year": 1940,
-    "ext_ids": {
-        "doi": "10.5169/seals-314104"
+  "extra": {
+    "datacite": {
+      "metadataVersion": 17,
+      "resourceType": "Journal Article",
+      "resourceTypeGeneral": "Text",
+      "schemaVersion": "http://datacite.org/schema/kernel-3"
     },
-    "publisher": "Buchdruckerei B\u00fcchler & Co.",
-    "contribs": [
-        {
-            "index": 0,
-            "raw_name": "O.M.",
-            "role": "author"
-        },
-        {
-            "index": 1,
-            "raw_name": "Hermann Hiltbrunner",
-            "given_name": "Hermann",
-            "surname": "Hiltbrunner",
-            "role": "author"
-        }
-    ],
-    "refs": [],
-    "abstracts": []
+    "month": 10
+  },
+  "title": "[Müssen wir des Glücks uns schämen?]",
+  "release_type": "article-journal",
+  "release_stage": "published",
+  "release_date": "1940-10-05",
+  "release_year": 1940,
+  "ext_ids": {
+    "doi": "10.5169/seals-314104"
+  },
+  "publisher": "Buchdruckerei Büchler & Co.",
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "O.M.",
+      "role": "author"
+    },
+    {
+      "index": 1,
+      "raw_name": "Hermann Hiltbrunner",
+      "given_name": "Hermann",
+      "surname": "Hiltbrunner",
+      "role": "author"
+    }
+  ],
+  "refs": [],
+  "abstracts": []
 }
diff --git a/python/tests/files/datacite/datacite_result_14.json b/python/tests/files/datacite/datacite_result_14.json
index c3719aeb..e28ee5c3 100644
--- a/python/tests/files/datacite/datacite_result_14.json
+++ b/python/tests/files/datacite/datacite_result_14.json
@@ -1,111 +1,114 @@
 {
-    "extra": {
-        "datacite": {
-            "subjects": [
-                {
-                    "subject": "Crystal Structure"
-                },
-                {
-                    "subject": "Experimental 3D Coordinates"
-                },
-                {
-                    "subject": "Crystal System"
-                },
-                {
-                    "subject": "Space Group"
-                },
-                {
-                    "subject": "Cell Parameters"
-                },
-                {
-                    "subject": "Crystallography"
-                },
-                {
-                    "subject": "bis(mu~2~-5-(3,5-Di-t-butylphenyl)-15-(4-(2-(diphenylphosphino)ethynyl)phenyl)-2,8,12,18-tetrahexyl-3,7,13,17-tetramethylporphyrinato)-(5,15-bis(3,5-di-t-butylphenyl)-2,8,12,18-tetraethyl-3,7,13,17-tetramethylporphyrinato)-di-nickel-ruthenium chloroform solvate"
-                }
-            ],
-            "relations": [
-                {
-                    "relationType": "IsSupplementTo",
-                    "relatedIdentifier": "10.1021/ic034699w",
-                    "relatedIdentifierType": "DOI"
-                }
-            ]
-        }
-    },
-    "title": "CCDC 222635: Experimental Crystal Structure Determination",
-    "release_type": "dataset",
-    "release_stage": "published",
-    "release_year": 2004,
-    "ext_ids": {
-        "doi": "10.5517/cc7gns3"
-    },
-    "publisher": "Cambridge Crystallographic Data Centre",
-    "language": "en",
-    "contribs": [
+  "extra": {
+    "datacite": {
+      "subjects": [
         {
-            "index": 0,
-            "raw_name": "E. Stulz",
-            "given_name": "E.",
-            "surname": "Stulz",
-            "role": "author"
+          "subject": "Crystal Structure"
         },
         {
-            "index": 1,
-            "raw_name": "S.M. Scott",
-            "given_name": "S.M.",
-            "surname": "Scott",
-            "role": "author"
+          "subject": "Experimental 3D Coordinates"
         },
         {
-            "index": 2,
-            "raw_name": "Yiu-Fai Ng",
-            "given_name": "Yiu-Fai",
-            "surname": "Ng",
-            "role": "author"
+          "subject": "Crystal System"
         },
         {
-            "index": 3,
-            "raw_name": "A.D. Bond",
-            "given_name": "A.D.",
-            "surname": "Bond",
-            "role": "author"
+          "subject": "Space Group"
         },
         {
-            "index": 4,
-            "raw_name": "S.J. Teat",
-            "given_name": "S.J.",
-            "surname": "Teat",
-            "role": "author"
+          "subject": "Cell Parameters"
         },
         {
-            "index": 5,
-            "raw_name": "S.L. Darling",
-            "given_name": "S.L.",
-            "surname": "Darling",
-            "role": "author"
+          "subject": "Crystallography"
         },
         {
-            "index": 6,
-            "raw_name": "N. Feeder",
-            "given_name": "N.",
-            "surname": "Feeder",
-            "role": "author"
-        },
-        {
-            "index": 7,
-            "raw_name": "J.K.M. Sanders",
-            "given_name": "J.K.M.",
-            "surname": "Sanders",
-            "role": "author"
+          "subject": "bis(mu~2~-5-(3,5-Di-t-butylphenyl)-15-(4-(2-(diphenylphosphino)ethynyl)phenyl)-2,8,12,18-tetrahexyl-3,7,13,17-tetramethylporphyrinato)-(5,15-bis(3,5-di-t-butylphenyl)-2,8,12,18-tetraethyl-3,7,13,17-tetramethylporphyrinato)-di-nickel-ruthenium chloroform solvate"
         }
-    ],
-    "refs": [],
-    "abstracts": [
+      ],
+      "relations": [
         {
-            "content": "An entry from the Cambridge Structural Database, the world's repository for small molecule crystal structures. The entry contains experimental data from a crystal diffraction study. The deposited dataset for this entry is freely available from the CCDC and typically includes 3D coordinates, cell parameters, space group, experimental conditions and quality measures.",
-            "mimetype": "text/plain",
-            "lang": "en"
+          "relationType": "IsSupplementTo",
+          "relatedIdentifier": "10.1021/ic034699w",
+          "relatedIdentifierType": "DOI"
         }
-    ]
+      ],
+      "metadataVersion": 2,
+      "resourceTypeGeneral": "Dataset",
+      "schemaVersion": "http://datacite.org/schema/kernel-3"
+    }
+  },
+  "title": "CCDC 222635: Experimental Crystal Structure Determination",
+  "release_type": "dataset",
+  "release_stage": "published",
+  "release_year": 2004,
+  "ext_ids": {
+    "doi": "10.5517/cc7gns3"
+  },
+  "publisher": "Cambridge Crystallographic Data Centre",
+  "language": "en",
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "E. Stulz",
+      "given_name": "E.",
+      "surname": "Stulz",
+      "role": "author"
+    },
+    {
+      "index": 1,
+      "raw_name": "S.M. Scott",
+      "given_name": "S.M.",
+      "surname": "Scott",
+      "role": "author"
+    },
+    {
+      "index": 2,
+      "raw_name": "Yiu-Fai Ng",
+      "given_name": "Yiu-Fai",
+      "surname": "Ng",
+      "role": "author"
+    },
+    {
+      "index": 3,
+      "raw_name": "A.D. Bond",
+      "given_name": "A.D.",
+      "surname": "Bond",
+      "role": "author"
+    },
+    {
+      "index": 4,
+      "raw_name": "S.J. Teat",
+      "given_name": "S.J.",
+      "surname": "Teat",
+      "role": "author"
+    },
+    {
+      "index": 5,
+      "raw_name": "S.L. Darling",
+      "given_name": "S.L.",
+      "surname": "Darling",
+      "role": "author"
+    },
+    {
+      "index": 6,
+      "raw_name": "N. Feeder",
+      "given_name": "N.",
+      "surname": "Feeder",
+      "role": "author"
+    },
+    {
+      "index": 7,
+      "raw_name": "J.K.M. Sanders",
+      "given_name": "J.K.M.",
+      "surname": "Sanders",
+      "role": "author"
+    }
+  ],
+  "refs": [],
+  "abstracts": [
+    {
+      "content": "An entry from the Cambridge Structural Database, the world's repository for small molecule crystal structures. The entry contains experimental data from a crystal diffraction study. The deposited dataset for this entry is freely available from the CCDC and typically includes 3D coordinates, cell parameters, space group, experimental conditions and quality measures.",
+      "mimetype": "text/plain",
+      "lang": "en"
+    }
+  ]
 }
diff --git a/python/tests/files/datacite/datacite_result_15.json b/python/tests/files/datacite/datacite_result_15.json
index 1b430a7d..3a03dfb6 100644
--- a/python/tests/files/datacite/datacite_result_15.json
+++ b/python/tests/files/datacite/datacite_result_15.json
@@ -1,22 +1,29 @@
 {
-    "extra": {"datacite": {}},
-    "title": "Parramore Island of the Virginia Coast Reserve Permanent Plot Resurvey: Tree data 1997",
-    "release_type": "dataset",
-    "release_stage": "published",
-    "release_year": 2017,
-    "ext_ids": {
-        "doi": "10.6073/pasta/95296d8416aae24f3d39b4ecb27f0b28"
-    },
-    "publisher": "Environmental Data Initiative",
-    "contribs": [
-        {
-            "index": 0,
-            "raw_name": "David Richardson",
-            "given_name": "David",
-            "surname": "Richardson",
-            "role": "author"
-        }
-    ],
-    "refs": [],
-    "abstracts": []
+  "extra": {
+    "datacite": {
+      "metadataVersion": 1,
+      "resourceType": "dataPackage",
+      "resourceTypeGeneral": "Dataset",
+      "schemaVersion": "http://datacite.org/schema/kernel-2.2"
+    }
+  },
+  "title": "Parramore Island of the Virginia Coast Reserve Permanent Plot Resurvey: Tree data 1997",
+  "release_type": "dataset",
+  "release_stage": "published",
+  "release_year": 2017,
+  "ext_ids": {
+    "doi": "10.6073/pasta/95296d8416aae24f3d39b4ecb27f0b28"
+  },
+  "publisher": "Environmental Data Initiative",
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "David Richardson",
+      "given_name": "David",
+      "surname": "Richardson",
+      "role": "author"
+    }
+  ],
+  "refs": [],
+  "abstracts": []
 }
diff --git a/python/tests/files/datacite/datacite_result_16.json b/python/tests/files/datacite/datacite_result_16.json
index ea8c2e59..8cf762b6 100644
--- a/python/tests/files/datacite/datacite_result_16.json
+++ b/python/tests/files/datacite/datacite_result_16.json
@@ -1,31 +1,34 @@
 {
-    "extra": {
-        "datacite": {
-            "license": [
-                {
-                    "rights": "CC-BY",
-                    "rightsUri": "http://creativecommons.org/licenses/by/3.0/us"
-                }
-            ]
-        }
-    },
-    "title": "Testing the Connectivity of Networks",
-    "release_type": "dataset",
-    "release_stage": "published",
-    "release_year": 2014,
-    "ext_ids": {
-        "doi": "10.6084/m9.figshare.1282478"
-    },
-    "publisher": "Figshare",
-    "contribs": [
+  "extra": {
+    "datacite": {
+      "license": [
         {
-            "index": 0,
-            "raw_name": "Taha Sochi",
-            "given_name": "Taha",
-            "surname": "Sochi",
-            "role": "author"
+          "rights": "CC-BY",
+          "rightsUri": "http://creativecommons.org/licenses/by/3.0/us"
         }
-    ],
-    "refs": [],
-    "abstracts": []
-}
\ No newline at end of file
+      ],
+      "resourceType": "Paper",
+      "resourceTypeGeneral": "Dataset",
+      "schemaVersion": "http://datacite.org/schema/kernel-3"
+    }
+  },
+  "title": "Testing the Connectivity of Networks",
+  "release_type": "dataset",
+  "release_stage": "published",
+  "release_year": 2014,
+  "ext_ids": {
+    "doi": "10.6084/m9.figshare.1282478"
+  },
+  "publisher": "Figshare",
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "Taha Sochi",
+      "given_name": "Taha",
+      "surname": "Sochi",
+      "role": "author"
+    }
+  ],
+  "refs": [],
+  "abstracts": []
+}
diff --git a/python/tests/files/datacite/datacite_result_17.json b/python/tests/files/datacite/datacite_result_17.json
index 73b082d9..6e8c4e34 100644
--- a/python/tests/files/datacite/datacite_result_17.json
+++ b/python/tests/files/datacite/datacite_result_17.json
@@ -1,20 +1,25 @@
 {
-    "extra": {"datacite": {}},
-    "title": "gel_BSA-FITC_Markov_segmntation0343.tif",
-    "release_type": "dataset",
-    "release_stage": "published",
-    "release_year": 2018,
-    "ext_ids": {
-        "doi": "10.7910/dvn/tsqfwc/yytj22"
-    },
-    "publisher": "Harvard Dataverse",
-    "contribs": [
-        {
-            "index": 0,
-            "raw_name": "Di Giovanna, Antonino Paolo (University Of Florence)",
-            "role": "author"
-        }
-    ],
-    "refs": [],
-    "abstracts": []
+  "extra": {
+    "datacite": {
+      "resourceTypeGeneral": "Dataset",
+      "schemaVersion": "http://datacite.org/schema/kernel-4"
+    }
+  },
+  "title": "gel_BSA-FITC_Markov_segmntation0343.tif",
+  "release_type": "dataset",
+  "release_stage": "published",
+  "release_year": 2018,
+  "ext_ids": {
+    "doi": "10.7910/dvn/tsqfwc/yytj22"
+  },
+  "publisher": "Harvard Dataverse",
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "Di Giovanna, Antonino Paolo (University Of Florence)",
+      "role": "author"
+    }
+  ],
+  "refs": [],
+  "abstracts": []
 }
diff --git a/python/tests/files/datacite/datacite_result_18.json b/python/tests/files/datacite/datacite_result_18.json
index fb109de2..43b46923 100644
--- a/python/tests/files/datacite/datacite_result_18.json
+++ b/python/tests/files/datacite/datacite_result_18.json
@@ -1,15 +1,21 @@
 {
-    "extra": {"datacite": {}, "month": 8},
-    "title": "Eastern questionnaire, answer sheet for Interviewee 53215, page 064",
-    "release_type": "article",
-    "release_stage": "published",
-    "release_date": "2017-08-21",
-    "release_year": 2017,
-    "ext_ids": {
-        "doi": "10.7916/d81z522m"
+  "extra": {
+    "datacite": {
+      "metadataVersion": 2,
+      "schemaVersion": "http://datacite.org/schema/kernel-3"
     },
-    "publisher": "Columbia University",
-    "contribs": [],
-    "refs": [],
-    "abstracts": []
+    "month": 8
+  },
+  "title": "Eastern questionnaire, answer sheet for Interviewee 53215, page 064",
+  "release_type": "article",
+  "release_stage": "published",
+  "release_date": "2017-08-21",
+  "release_year": 2017,
+  "ext_ids": {
+    "doi": "10.7916/d81z522m"
+  },
+  "publisher": "Columbia University",
+  "contribs": [],
+  "refs": [],
+  "abstracts": []
 }
diff --git a/python/tests/files/datacite/datacite_result_19.json b/python/tests/files/datacite/datacite_result_19.json
index 85bada92..8b91efe5 100644
--- a/python/tests/files/datacite/datacite_result_19.json
+++ b/python/tests/files/datacite/datacite_result_19.json
@@ -1,15 +1,21 @@
 {
-    "extra": {"datacite": {}, "month": 8},
-    "title": "Eastern questionnaire, answer sheet for Interviewee 55236, page 092",
-    "release_type": "article",
-    "release_stage": "published",
-    "release_date": "2017-08-24",
-    "release_year": 2017,
-    "ext_ids": {
-        "doi": "10.7916/d86x0cg1"
+  "extra": {
+    "datacite": {
+      "metadataVersion": 3,
+      "schemaVersion": "http://datacite.org/schema/kernel-3"
     },
-    "publisher": "Columbia University",
-    "contribs": [],
-    "refs": [],
-    "abstracts": []
+    "month": 8
+  },
+  "title": "Eastern questionnaire, answer sheet for Interviewee 55236, page 092",
+  "release_type": "article",
+  "release_stage": "published",
+  "release_date": "2017-08-24",
+  "release_year": 2017,
+  "ext_ids": {
+    "doi": "10.7916/d86x0cg1"
+  },
+  "publisher": "Columbia University",
+  "contribs": [],
+  "refs": [],
+  "abstracts": []
 }
diff --git a/python/tests/files/datacite/datacite_result_20.json b/python/tests/files/datacite/datacite_result_20.json
index 891cb41e..ed1f8885 100644
--- a/python/tests/files/datacite/datacite_result_20.json
+++ b/python/tests/files/datacite/datacite_result_20.json
@@ -1,14 +1,17 @@
 {
-    "extra": {"datacite": {}, "month": 8},
-    "title": "<h1>Eastern questionnaire</h1>",
-    "release_type": "article",
-    "release_stage": "published",
-    "release_date": "2017-08-24",
-    "release_year": 2017,
-    "ext_ids": {
-        "doi": "10.7916/d86x0cg1"
-    },
-    "contribs": [],
-    "refs": [],
-    "abstracts": []
+  "extra": {
+    "datacite": {},
+    "month": 8
+  },
+  "title": "<h1>Eastern questionnaire</h1>",
+  "release_type": "article",
+  "release_stage": "published",
+  "release_date": "2017-08-24",
+  "release_year": 2017,
+  "ext_ids": {
+    "doi": "10.7916/d86x0cg1"
+  },
+  "contribs": [],
+  "refs": [],
+  "abstracts": []
 }
diff --git a/python/tests/files/datacite/datacite_result_21.json b/python/tests/files/datacite/datacite_result_21.json
index 73df8216..1230abfa 100644
--- a/python/tests/files/datacite/datacite_result_21.json
+++ b/python/tests/files/datacite/datacite_result_21.json
@@ -1,15 +1,18 @@
 {
-    "extra": {"datacite": {}, "month": 8},
-    "title": "ABC",
-    "release_type": "article",
-    "release_stage": "published",
-    "release_date": "2017-08-24",
-    "release_year": 2017,
-    "ext_ids": {
-        "doi": "10.7916/d86x0cg1"
-    },
-    "language": "de",
-    "contribs": [],
-    "refs": [],
-    "abstracts": []
+  "extra": {
+    "datacite": {},
+    "month": 8
+  },
+  "title": "ABC",
+  "release_type": "article",
+  "release_stage": "published",
+  "release_date": "2017-08-24",
+  "release_year": 2017,
+  "ext_ids": {
+    "doi": "10.7916/d86x0cg1"
+  },
+  "language": "de",
+  "contribs": [],
+  "refs": [],
+  "abstracts": []
 }
diff --git a/python/tests/files/datacite/datacite_result_22.json b/python/tests/files/datacite/datacite_result_22.json
index 97f35da5..cba01531 100644
--- a/python/tests/files/datacite/datacite_result_22.json
+++ b/python/tests/files/datacite/datacite_result_22.json
@@ -1,22 +1,25 @@
 {
-    "extra": {"datacite": {}, "month": 8},
-    "title": "ABC",
-    "release_type": "article",
-    "release_stage": "published",
-    "release_date": "2017-08-24",
-    "release_year": 2017,
-    "ext_ids": {
-        "doi": "10.7916/d86x0cg1"
-    },
-    "language": "de",
-    "contribs": [
-        {
-            "index": 0,
-            "raw_name": "Anton Welch",
-            "role": "author",
-            "raw_affiliation": "Department of pataphysics"
-        }
-    ],
-    "refs": [],
-    "abstracts": []
+  "extra": {
+    "datacite": {},
+    "month": 8
+  },
+  "title": "ABC",
+  "release_type": "article",
+  "release_stage": "published",
+  "release_date": "2017-08-24",
+  "release_year": 2017,
+  "ext_ids": {
+    "doi": "10.7916/d86x0cg1"
+  },
+  "language": "de",
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "Anton Welch",
+      "role": "author",
+      "raw_affiliation": "Department of pataphysics"
+    }
+  ],
+  "refs": [],
+  "abstracts": []
 }
diff --git a/python/tests/files/datacite/datacite_result_23.json b/python/tests/files/datacite/datacite_result_23.json
index 93385c70..db622e1c 100644
--- a/python/tests/files/datacite/datacite_result_23.json
+++ b/python/tests/files/datacite/datacite_result_23.json
@@ -1,22 +1,25 @@
 {
-    "extra": {"datacite": {}, "month": 8},
-    "title": "ABC",
-    "release_type": "article",
-    "release_stage": "published",
-    "release_date": "2017-08-24",
-    "release_year": 2017,
-    "ext_ids": {
-        "doi": "10.7916/d86x0cg1-xxx"
-    },
-    "language": "de",
-    "contribs": [
-        {
-            "index": 0,
-            "raw_name": "Anton Welch",
-            "role": "author",
-            "raw_affiliation": "Department of pataphysics"
-        }
-    ],
-    "refs": [],
-    "abstracts": []
+  "extra": {
+    "datacite": {},
+    "month": 8
+  },
+  "title": "ABC",
+  "release_type": "article",
+  "release_stage": "published",
+  "release_date": "2017-08-24",
+  "release_year": 2017,
+  "ext_ids": {
+    "doi": "10.7916/d86x0cg1-xxx"
+  },
+  "language": "de",
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "Anton Welch",
+      "role": "author",
+      "raw_affiliation": "Department of pataphysics"
+    }
+  ],
+  "refs": [],
+  "abstracts": []
 }
diff --git a/python/tests/files/datacite/datacite_result_24.json b/python/tests/files/datacite/datacite_result_24.json
index cb08e67b..8338cf29 100644
--- a/python/tests/files/datacite/datacite_result_24.json
+++ b/python/tests/files/datacite/datacite_result_24.json
@@ -1,22 +1,25 @@
 {
-    "extra": {"datacite": {}, "month": 8},
-    "title": "ABC",
-    "subtitle": "DEF",
-    "release_type": "article",
-    "release_stage": "published",
-    "release_date": "2017-08-24",
-    "release_year": 2017,
-    "ext_ids": {
-        "doi": "10.7916/d86x0cg1"
-    },
-    "contribs": [
-        {
-            "index": 0,
-            "raw_name": "Anton Welch",
-            "role": "author",
-            "raw_affiliation": "Department of pataphysics"
-        }
-    ],
-    "refs": [],
-    "abstracts": []
+  "extra": {
+    "datacite": {},
+    "month": 8
+  },
+  "title": "ABC",
+  "subtitle": "DEF",
+  "release_type": "article",
+  "release_stage": "published",
+  "release_date": "2017-08-24",
+  "release_year": 2017,
+  "ext_ids": {
+    "doi": "10.7916/d86x0cg1"
+  },
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "Anton Welch",
+      "role": "author",
+      "raw_affiliation": "Department of pataphysics"
+    }
+  ],
+  "refs": [],
+  "abstracts": []
 }
-- 
cgit v1.2.3


From a23f73e37cd88de5467c47aa5f84b96448c5713d Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Wed, 8 Jan 2020 03:35:41 +0100
Subject: datacite: CCDC are entries, mostly

---
 python/fatcat_tools/importers/datacite.py           | 4 ++++
 python/tests/files/datacite/datacite_result_14.json | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'python/tests')

diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 58dfc556..587a65aa 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -543,6 +543,10 @@ class DataciteImporter(EntityImporter):
         if publisher == 'The Global Biodiversity Information Facility':
             release_type = 'stub'
 
+        # release_type exception: lots of "Experimental Crystal Structure Determination"
+        if publisher == 'Cambridge Crystallographic Data Centre':
+            release_type = 'entry'
+
         # Language values are varied ("ger", "es", "English", "ENG", "en-us",
         # "other", ...). Try to crush it with langcodes: "It may sound to you
         # like langcodes solves a pretty boring problem. At one level, that's
diff --git a/python/tests/files/datacite/datacite_result_14.json b/python/tests/files/datacite/datacite_result_14.json
index e28ee5c3..20f6bfd4 100644
--- a/python/tests/files/datacite/datacite_result_14.json
+++ b/python/tests/files/datacite/datacite_result_14.json
@@ -37,7 +37,7 @@
     }
   },
   "title": "CCDC 222635: Experimental Crystal Structure Determination",
-  "release_type": "dataset",
+  "release_type": "entry",
   "release_stage": "published",
   "release_year": 2004,
   "ext_ids": {
-- 
cgit v1.2.3


From a7e5460d6355dd0e99b08e480d4e50755fda3b16 Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Wed, 8 Jan 2020 03:47:10 +0100
Subject: datacite: mark additional files as stub

---
 python/fatcat_tools/importers/datacite.py          |  4 ++
 python/tests/files/datacite/datacite_doc_25.json   | 47 ++++++++++++++++++++++
 .../tests/files/datacite/datacite_result_25.json   | 25 ++++++++++++
 python/tests/import_datacite.py                    |  2 +-
 4 files changed, 77 insertions(+), 1 deletion(-)
 create mode 100644 python/tests/files/datacite/datacite_doc_25.json
 create mode 100644 python/tests/files/datacite/datacite_result_25.json

(limited to 'python/tests')

diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 587a65aa..90bc3db7 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -547,6 +547,10 @@ class DataciteImporter(EntityImporter):
         if publisher == 'Cambridge Crystallographic Data Centre':
             release_type = 'entry'
 
+        # Supplement files, e.g. "Additional file 1: ASE constructs in questionnaire."
+        if title.lower().startswith('additional file'):
+            release_type = 'stub'
+
         # Language values are varied ("ger", "es", "English", "ENG", "en-us",
         # "other", ...). Try to crush it with langcodes: "It may sound to you
         # like langcodes solves a pretty boring problem. At one level, that's
diff --git a/python/tests/files/datacite/datacite_doc_25.json b/python/tests/files/datacite/datacite_doc_25.json
new file mode 100644
index 00000000..60cd0ab7
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_25.json
@@ -0,0 +1,47 @@
+{
+  "attributes": {
+    "doi": "10.7916/d86x0cg1",
+    "creators": [
+      {
+        "name": "Anton Welch",
+        "affiliation": [
+          "Department of pataphysics"
+        ],
+        "nameIdentifiers": []
+      }
+    ],
+    "titles": [
+      {
+        "title": "Additional file 123: ABC"
+      },
+      {
+        "title": "DEF",
+        "titleType": "Subtitle"
+      }
+    ],
+    "publicationYear": 2016,
+    "language": "DE-CH",
+    "types": {
+      "ris": "GEN",
+      "bibtex": "misc",
+      "citeproc": "article",
+      "schemaOrg": "CreativeWork"
+    },
+    "dates": [
+      {
+        "date": "2017-08-24",
+        "dateType": "Created"
+      },
+      {
+        "date": "2019-08-04",
+        "dateType": "Updated"
+      },
+      {
+        "date": "2017",
+        "dateType": "Issued"
+      }
+    ],
+    "isActive": true,
+    "state": "findable"
+  }
+}
diff --git a/python/tests/files/datacite/datacite_result_25.json b/python/tests/files/datacite/datacite_result_25.json
new file mode 100644
index 00000000..8a370bbb
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_25.json
@@ -0,0 +1,25 @@
+{
+  "extra": {
+    "datacite": {},
+    "month": 8
+  },
+  "title": "Additional file 123: ABC",
+  "subtitle": "DEF",
+  "release_type": "stub",
+  "release_stage": "published",
+  "release_date": "2017-08-24",
+  "release_year": 2017,
+  "ext_ids": {
+    "doi": "10.7916/d86x0cg1"
+  },
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "Anton Welch",
+      "role": "author",
+      "raw_affiliation": "Department of pataphysics"
+    }
+  ],
+  "refs": [],
+  "abstracts": []
+}
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index 9ee479e8..7293ecac 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -287,7 +287,7 @@ def test_datacite_conversions(datacite_importer):
     for now.
     """
     datacite_importer.debug = True
-    for i in range(25):
+    for i in range(26):
         src = 'tests/files/datacite/datacite_doc_{0:02d}.json'.format(i)
         dst = 'tests/files/datacite/datacite_result_{0:02d}.json'.format(i)
         print('testing mapping from {} => {}'.format(src, dst))
-- 
cgit v1.2.3


From 791c21af58554203cbfa52a7ebc1d91db261daec Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Wed, 8 Jan 2020 03:56:28 +0100
Subject: datacite: adjust tests for release_month

---
 python/tests/files/datacite/datacite_result_00.json | 2 +-
 python/tests/files/datacite/datacite_result_05.json | 2 +-
 python/tests/files/datacite/datacite_result_12.json | 2 +-
 python/tests/files/datacite/datacite_result_13.json | 2 +-
 python/tests/files/datacite/datacite_result_18.json | 2 +-
 python/tests/files/datacite/datacite_result_19.json | 2 +-
 python/tests/files/datacite/datacite_result_20.json | 2 +-
 python/tests/files/datacite/datacite_result_21.json | 2 +-
 python/tests/files/datacite/datacite_result_22.json | 2 +-
 python/tests/files/datacite/datacite_result_23.json | 2 +-
 python/tests/files/datacite/datacite_result_24.json | 2 +-
 python/tests/files/datacite/datacite_result_25.json | 2 +-
 12 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'python/tests')

diff --git a/python/tests/files/datacite/datacite_result_00.json b/python/tests/files/datacite/datacite_result_00.json
index 28da5397..0a84e7bd 100644
--- a/python/tests/files/datacite/datacite_result_00.json
+++ b/python/tests/files/datacite/datacite_result_00.json
@@ -20,7 +20,7 @@
       "schemaVersion": "http://datacite.org/schema/kernel-4",
       "metadataVersion": 1
     },
-    "month": 5
+    "release_month": 5
   },
   "title": "Synthesis and Crystal Structure of a Compound with Two Conformational Isomers: N-(2-methylbenzoyl)-N′-(4-nitrophenyl)thiourea",
   "release_type": "article-journal",
diff --git a/python/tests/files/datacite/datacite_result_05.json b/python/tests/files/datacite/datacite_result_05.json
index 961ad72a..22542a10 100644
--- a/python/tests/files/datacite/datacite_result_05.json
+++ b/python/tests/files/datacite/datacite_result_05.json
@@ -12,7 +12,7 @@
       "resourceTypeGeneral": "Dataset",
       "schemaVersion": "http://datacite.org/schema/kernel-3"
     },
-    "month": 10
+    "release_month": 10
   },
   "title": "SH409843.07FU",
   "subtitle": "Gomphales",
diff --git a/python/tests/files/datacite/datacite_result_12.json b/python/tests/files/datacite/datacite_result_12.json
index 5e2a6281..6977ecea 100644
--- a/python/tests/files/datacite/datacite_result_12.json
+++ b/python/tests/files/datacite/datacite_result_12.json
@@ -3,7 +3,7 @@
     "datacite": {
       "resourceTypeGeneral": "Text"
     },
-    "month": 6
+    "release_month": 6
   },
   "title": "Anthropometric and Physiological Profile of Mixed Martial Art Athletes: A Brief Review",
   "release_type": "article-journal",
diff --git a/python/tests/files/datacite/datacite_result_13.json b/python/tests/files/datacite/datacite_result_13.json
index 3dc7cafb..91126c5a 100644
--- a/python/tests/files/datacite/datacite_result_13.json
+++ b/python/tests/files/datacite/datacite_result_13.json
@@ -6,7 +6,7 @@
       "resourceTypeGeneral": "Text",
       "schemaVersion": "http://datacite.org/schema/kernel-3"
     },
-    "month": 10
+    "release_month": 10
   },
   "title": "[Müssen wir des Glücks uns schämen?]",
   "release_type": "article-journal",
diff --git a/python/tests/files/datacite/datacite_result_18.json b/python/tests/files/datacite/datacite_result_18.json
index 43b46923..6e69bad2 100644
--- a/python/tests/files/datacite/datacite_result_18.json
+++ b/python/tests/files/datacite/datacite_result_18.json
@@ -4,7 +4,7 @@
       "metadataVersion": 2,
       "schemaVersion": "http://datacite.org/schema/kernel-3"
     },
-    "month": 8
+    "release_month": 8
   },
   "title": "Eastern questionnaire, answer sheet for Interviewee 53215, page 064",
   "release_type": "article",
diff --git a/python/tests/files/datacite/datacite_result_19.json b/python/tests/files/datacite/datacite_result_19.json
index 8b91efe5..2f2f217e 100644
--- a/python/tests/files/datacite/datacite_result_19.json
+++ b/python/tests/files/datacite/datacite_result_19.json
@@ -4,7 +4,7 @@
       "metadataVersion": 3,
       "schemaVersion": "http://datacite.org/schema/kernel-3"
     },
-    "month": 8
+    "release_month": 8
   },
   "title": "Eastern questionnaire, answer sheet for Interviewee 55236, page 092",
   "release_type": "article",
diff --git a/python/tests/files/datacite/datacite_result_20.json b/python/tests/files/datacite/datacite_result_20.json
index ed1f8885..0f99e2a2 100644
--- a/python/tests/files/datacite/datacite_result_20.json
+++ b/python/tests/files/datacite/datacite_result_20.json
@@ -1,7 +1,7 @@
 {
   "extra": {
     "datacite": {},
-    "month": 8
+    "release_month": 8
   },
   "title": "<h1>Eastern questionnaire</h1>",
   "release_type": "article",
diff --git a/python/tests/files/datacite/datacite_result_21.json b/python/tests/files/datacite/datacite_result_21.json
index 1230abfa..3dfcf1bf 100644
--- a/python/tests/files/datacite/datacite_result_21.json
+++ b/python/tests/files/datacite/datacite_result_21.json
@@ -1,7 +1,7 @@
 {
   "extra": {
     "datacite": {},
-    "month": 8
+    "release_month": 8
   },
   "title": "ABC",
   "release_type": "article",
diff --git a/python/tests/files/datacite/datacite_result_22.json b/python/tests/files/datacite/datacite_result_22.json
index cba01531..bd88c358 100644
--- a/python/tests/files/datacite/datacite_result_22.json
+++ b/python/tests/files/datacite/datacite_result_22.json
@@ -1,7 +1,7 @@
 {
   "extra": {
     "datacite": {},
-    "month": 8
+    "release_month": 8
   },
   "title": "ABC",
   "release_type": "article",
diff --git a/python/tests/files/datacite/datacite_result_23.json b/python/tests/files/datacite/datacite_result_23.json
index db622e1c..e82925af 100644
--- a/python/tests/files/datacite/datacite_result_23.json
+++ b/python/tests/files/datacite/datacite_result_23.json
@@ -1,7 +1,7 @@
 {
   "extra": {
     "datacite": {},
-    "month": 8
+    "release_month": 8
   },
   "title": "ABC",
   "release_type": "article",
diff --git a/python/tests/files/datacite/datacite_result_24.json b/python/tests/files/datacite/datacite_result_24.json
index 8338cf29..2d95d300 100644
--- a/python/tests/files/datacite/datacite_result_24.json
+++ b/python/tests/files/datacite/datacite_result_24.json
@@ -1,7 +1,7 @@
 {
   "extra": {
     "datacite": {},
-    "month": 8
+    "release_month": 8
   },
   "title": "ABC",
   "subtitle": "DEF",
diff --git a/python/tests/files/datacite/datacite_result_25.json b/python/tests/files/datacite/datacite_result_25.json
index 8a370bbb..aad6d17e 100644
--- a/python/tests/files/datacite/datacite_result_25.json
+++ b/python/tests/files/datacite/datacite_result_25.json
@@ -1,7 +1,7 @@
 {
   "extra": {
     "datacite": {},
-    "month": 8
+    "release_month": 8
   },
   "title": "Additional file 123: ABC",
   "subtitle": "DEF",
-- 
cgit v1.2.3


From 62d6a7e48d6bea1bc7f451c6043f38aee2051f9b Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Wed, 8 Jan 2020 22:33:58 +0100
Subject: datacite: factor out contributor handling

Use values from:

* attributes.creators[]
* attributes.contributors[]
---
 python/fatcat_tools/importers/datacite.py          | 183 ++++++++++++---------
 python/tests/files/datacite/datacite_doc_26.json   |  57 +++++++
 .../tests/files/datacite/datacite_result_05.json   |   6 +
 .../tests/files/datacite/datacite_result_09.json   |  11 ++
 .../tests/files/datacite/datacite_result_26.json   |  31 ++++
 python/tests/import_datacite.py                    |   4 +-
 6 files changed, 210 insertions(+), 82 deletions(-)
 create mode 100644 python/tests/files/datacite/datacite_doc_26.json
 create mode 100644 python/tests/files/datacite/datacite_result_26.json

(limited to 'python/tests')

diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index fc986994..9ca72758 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -303,88 +303,11 @@ class DataciteImporter(EntityImporter):
             print('[{}] skipping non-ascii doi for now'.format(doi))
             return None
 
-        # Contributors. Many nameIdentifierSchemes, we do not use (yet):
-        # "attributes.creators[].nameIdentifiers[].nameIdentifierScheme":
-        # ["LCNA", "GND", "email", "NAF", "OSF", "RRID", "ORCID",
-        # "SCOPUS", "NRCPID", "schema.org", "GRID", "MGDS", "VIAF", "JACoW-ID"].
-        contribs = []
-
-        # Names, that should be ignored right away.
-        name_blacklist = set(('Occdownload Gbif.Org',))
-
-        for i, c in enumerate(attributes['creators']):
-            nameType = c.get('nameType', '') or ''
-            if nameType in ('', 'Personal'):
-                creator_id = None
-                for nid in c.get('nameIdentifiers', []):
-                    name_scheme = nid.get('nameIdentifierScheme', '') or ''
-                    if not name_scheme.lower() == "orcid":
-                        continue
-                    orcid = nid.get('nameIdentifier',
-                                    '').replace('https://orcid.org/', '')
-                    if not orcid:
-                        continue
-                    creator_id = self.lookup_orcid(orcid)
-                    # TODO(martin): If creator_id is None, should we create creators?
-
-                # If there are multiple affiliation strings, use the first one.
-                affiliations = c.get('affiliation', []) or []
-                raw_affiliation = None
-                if len(affiliations) == 0:
-                    raw_affiliation = None
-                else:
-                    raw_affiliation = clean(affiliations[0])
-
-                name = c.get('name')
-                given_name = c.get('givenName')
-                surname = c.get('familyName')
-
-                if name:
-                    name = clean(name)
-
-                if name in name_blacklist:
-                    continue
-
-                if given_name:
-                    given_name = clean(given_name)
-
-                if surname:
-                    surname = clean(surname)
-
-                if not name:
-                    continue
-
-                if raw_affiliation == '':
-                    continue
 
-                if name.lower() in UNKNOWN_MARKERS:
-                    continue
+        creators = attributes.get('creators', []) or []
+        contributors = attributes.get('contributors', []) or []  # Much fewer than creators.
 
-                # Unpack name, if we have an index form (e.g. 'Razis, Panos A') into 'Panos A razis'.
-                if name:
-                    name = index_form_to_display_name(name)
-
-                contribs.append(
-                    fatcat_openapi_client.ReleaseContrib(
-                        creator_id=creator_id,
-                        index=i,
-                        raw_name=name,
-                        given_name=given_name,
-                        surname=surname,
-                        role='author',
-                        raw_affiliation=raw_affiliation,
-                    ))
-            elif nameType == 'Organizational':
-                name = c.get('name', '') or ''
-                if name in UNKNOWN_MARKERS:
-                    continue
-                if len(name) < 3:
-                    continue
-                extra = {'organization': name}
-                contribs.append(fatcat_openapi_client.ReleaseContrib(
-                    index=i, extra=extra))
-            else:
-                print('[{}] unknown name type: {}'.format(doi, nameType), file=sys.stderr)
+        contribs = self.parse_datacite_creators(creators) + self.parse_datacite_creators(contributors, role=None, set_index=False)
 
         # Title, may come with "attributes.titles[].titleType", like
         # "AlternativeTitle", "Other", "Subtitle", "TranslatedTitle"
@@ -767,6 +690,104 @@ class DataciteImporter(EntityImporter):
                     extra=self.editgroup_extra),
                 entity_list=batch))
 
+    def parse_datacite_creators(self, creators, role='author', set_index=True):
+        """
+        Parses a list of creators into a list of ReleaseContrib objects. Set
+        set_index to False, if the index contrib field should be left blank.
+        """
+        # Contributors. Many nameIdentifierSchemes, we do not use (yet):
+        # "attributes.creators[].nameIdentifiers[].nameIdentifierScheme":
+        # ["LCNA", "GND", "email", "NAF", "OSF", "RRID", "ORCID",
+        # "SCOPUS", "NRCPID", "schema.org", "GRID", "MGDS", "VIAF", "JACoW-ID"].
+        contribs = []
+
+        # Names, that should be ignored right away.
+        name_blacklist = set(('Occdownload Gbif.Org',))
+
+        for i, c in enumerate(creators):
+            if not set_index:
+                i = None
+            nameType = c.get('nameType', '') or ''
+            if nameType in ('', 'Personal'):
+                creator_id = None
+                for nid in c.get('nameIdentifiers', []):
+                    name_scheme = nid.get('nameIdentifierScheme', '') or ''
+                    if not name_scheme.lower() == "orcid":
+                        continue
+                    orcid = nid.get('nameIdentifier', '').replace('https://orcid.org/', '')
+                    if not orcid:
+                        continue
+                    creator_id = self.lookup_orcid(orcid)
+                    # TODO(martin): If creator_id is None, should we create creators?
+
+                # If there are multiple affiliation strings, use the first one.
+                affiliations = c.get('affiliation', []) or []
+                raw_affiliation = None
+                if len(affiliations) == 0:
+                    raw_affiliation = None
+                else:
+                    raw_affiliation = clean(affiliations[0])
+
+                name = c.get('name')
+                given_name = c.get('givenName')
+                surname = c.get('familyName')
+
+                if name:
+                    name = clean(name)
+                if not name:
+                    continue
+                if name in name_blacklist:
+                    continue
+                if name.lower() in UNKNOWN_MARKERS:
+                    continue
+                # Unpack name, if we have an index form (e.g. 'Razis, Panos A') into 'Panos A razis'.
+                if name:
+                    name = index_form_to_display_name(name)
+
+                if given_name:
+                    given_name = clean(given_name)
+                if surname:
+                    surname = clean(surname)
+                if raw_affiliation == '':
+                    continue
+
+                extra = None
+
+                # "DataManager", "DataCurator", "ContactPerson", "Distributor",
+                # "RegistrationAgency", "Sponsor", "Researcher",
+                # "RelatedPerson", "ProjectLeader", "Editor", "Other",
+                # "ProjectMember", "Funder", "RightsHolder", "DataCollector",
+                # "Supervisor", "Producer", "HostingInstitution", "ResearchGroup"
+                contributorType = c.get('contributorType', '') or ''
+
+                if contributorType:
+                    extra = {'type': contributorType}
+
+                contribs.append(
+                    fatcat_openapi_client.ReleaseContrib(
+                        creator_id=creator_id,
+                        index=i,
+                        raw_name=name,
+                        given_name=given_name,
+                        surname=surname,
+                        role=role,
+                        raw_affiliation=raw_affiliation,
+                        extra=extra,
+                    ))
+            elif nameType == 'Organizational':
+                name = c.get('name', '') or ''
+                if name in UNKNOWN_MARKERS:
+                    continue
+                if len(name) < 3:
+                    continue
+                extra = {'organization': name}
+                contribs.append(fatcat_openapi_client.ReleaseContrib(
+                    index=i, extra=extra))
+            else:
+                print('[{}] unknown name type: {}'.format(doi, nameType), file=sys.stderr)
+
+        return contribs
+
 
 def lookup_license_slug(raw):
     """
@@ -971,6 +992,8 @@ def index_form_to_display_name(s):
     if s.count(',') > 1:
         # "Dr. Hina, Dr. Muhammad Usman Shahid, Dr. Muhammad Zeeshan Khan"
         return s
+
+    # Not names, but sprinkled in fields where authors live.
     stopwords = [s.lower() for s in (
         'Archive',
         'Collection',
diff --git a/python/tests/files/datacite/datacite_doc_26.json b/python/tests/files/datacite/datacite_doc_26.json
new file mode 100644
index 00000000..c2abb1b2
--- /dev/null
+++ b/python/tests/files/datacite/datacite_doc_26.json
@@ -0,0 +1,57 @@
+{
+  "attributes": {
+    "doi": "10.7916/d86x0cg1",
+    "creators": [
+      {
+        "name": "Anton Welch",
+        "affiliation": [
+          "Department of pataphysics"
+        ],
+        "nameIdentifiers": []
+      }
+    ],
+    "contributors": [
+      {
+        "name": "Wemmer, David",
+        "nameType": "Personal",
+        "givenName": "David",
+        "familyName": "Wemmer",
+        "affiliation": [],
+        "contributorType": "Editor"
+      }
+    ],
+    "titles": [
+      {
+        "title": "Additional file 123: ABC"
+      },
+      {
+        "title": "DEF",
+        "titleType": "Subtitle"
+      }
+    ],
+    "publicationYear": 2016,
+    "language": "DE-CH",
+    "types": {
+      "ris": "GEN",
+      "bibtex": "misc",
+      "citeproc": "article",
+      "schemaOrg": "CreativeWork"
+    },
+    "dates": [
+      {
+        "date": "2017-08-24",
+        "dateType": "Created"
+      },
+      {
+        "date": "2019-08-04",
+        "dateType": "Updated"
+      },
+      {
+        "date": "2017",
+        "dateType": "Issued"
+      }
+    ],
+    "isActive": true,
+    "state": "findable"
+  }
+}
diff --git a/python/tests/files/datacite/datacite_result_05.json b/python/tests/files/datacite/datacite_result_05.json
index 22542a10..c4e5418d 100644
--- a/python/tests/files/datacite/datacite_result_05.json
+++ b/python/tests/files/datacite/datacite_result_05.json
@@ -523,6 +523,12 @@
       "given_name": "Christian",
       "surname": "Wurzbacher",
       "role": "author"
+    },
+    {
+      "raw_name": "Kessy Abarenkov"
+    },
+    {
+      "raw_name": "NHM UT-University Of Tartu; Natural History Museum And Botanic Garden"
     }
   ],
   "refs": [],
diff --git a/python/tests/files/datacite/datacite_result_09.json b/python/tests/files/datacite/datacite_result_09.json
index fd873309..c93dc769 100644
--- a/python/tests/files/datacite/datacite_result_09.json
+++ b/python/tests/files/datacite/datacite_result_09.json
@@ -32,6 +32,17 @@
       "given_name": "Nils",
       "surname": "Kirstaedter",
       "role": "author"
+    },
+    {
+      "extra": {
+        "organization": "TIB-Technische Informationsbibliothek Universitätsbibliothek Hannover"
+      }
+    },
+    {
+      "raw_name": "Technische Informationsbibliothek (TIB)",
+      "extra": {
+        "type": "DataManager"
+      }
     }
   ],
   "refs": [],
diff --git a/python/tests/files/datacite/datacite_result_26.json b/python/tests/files/datacite/datacite_result_26.json
new file mode 100644
index 00000000..8d26197c
--- /dev/null
+++ b/python/tests/files/datacite/datacite_result_26.json
@@ -0,0 +1,31 @@
+{
+  "extra": {
+    "datacite": {},
+    "release_month": 8
+  },
+  "title": "Additional file 123: ABC",
+  "subtitle": "DEF",
+  "release_type": "stub",
+  "release_stage": "published",
+  "release_date": "2017-08-24",
+  "release_year": 2017,
+  "ext_ids": {
+    "doi": "10.7916/d86x0cg1"
+  },
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "Anton Welch",
+      "role": "author",
+      "raw_affiliation": "Department of pataphysics"
+    },
+      {
+        "extra": {"type": "Editor"},
+        "raw_name": "David Wemmer",
+        "given_name": "David",
+        "surname": "Wemmer"
+      }
+  ],
+  "refs": [],
+  "abstracts": []
+}
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index 7293ecac..5ad7ef2c 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -275,7 +275,7 @@ def test_datacite_dict_parse(datacite_importer):
         assert r.extra['datacite']['subjects'] == [{'subject': 'Plant Genetic Resource for Food and Agriculture'}]
         assert len(r.abstracts) == 1
         assert len(r.abstracts[0].content) == 421
-        assert len(r.contribs) == 1
+        assert len(r.contribs) == 2
         assert r.contribs[0].raw_name == "GLIS Of The ITPGRFA"
         assert r.contribs[0].given_name == None
         assert r.contribs[0].surname == None
@@ -287,7 +287,7 @@ def test_datacite_conversions(datacite_importer):
     for now.
     """
     datacite_importer.debug = True
-    for i in range(26):
+    for i in range(27):
         src = 'tests/files/datacite/datacite_doc_{0:02d}.json'.format(i)
         dst = 'tests/files/datacite/datacite_result_{0:02d}.json'.format(i)
         print('testing mapping from {} => {}'.format(src, dst))
-- 
cgit v1.2.3


From d3deb36c26ae86c1763c33a8c356ecd5491caa40 Mon Sep 17 00:00:00 2001
From: Martin Czygan <martin.czygan@gmail.com>
Date: Wed, 8 Jan 2020 22:41:17 +0100
Subject: datacite: reformat test cases and use jq . --sort-keys

---
 python/tests/files/datacite/datacite_doc_00.json   | 158 ++---
 python/tests/files/datacite/datacite_doc_01.json   |  96 +--
 python/tests/files/datacite/datacite_doc_02.json   |  96 +--
 python/tests/files/datacite/datacite_doc_03.json   |  78 +--
 python/tests/files/datacite/datacite_doc_04.json   |  94 +--
 python/tests/files/datacite/datacite_doc_05.json   | 684 ++++++++++-----------
 python/tests/files/datacite/datacite_doc_06.json   |  90 +--
 python/tests/files/datacite/datacite_doc_07.json   | 120 ++--
 python/tests/files/datacite/datacite_doc_08.json   | 112 ++--
 python/tests/files/datacite/datacite_doc_09.json   | 140 ++---
 python/tests/files/datacite/datacite_doc_10.json   |  90 +--
 python/tests/files/datacite/datacite_doc_11.json   |  92 +--
 python/tests/files/datacite/datacite_doc_12.json   | 124 ++--
 python/tests/files/datacite/datacite_doc_13.json   |  98 +--
 python/tests/files/datacite/datacite_doc_14.json   | 188 +++---
 python/tests/files/datacite/datacite_doc_15.json   |  92 +--
 python/tests/files/datacite/datacite_doc_16.json   |  94 +--
 python/tests/files/datacite/datacite_doc_17.json   |  84 +--
 python/tests/files/datacite/datacite_doc_18.json   |  82 +--
 python/tests/files/datacite/datacite_doc_19.json   |  82 +--
 python/tests/files/datacite/datacite_doc_20.json   |  24 +-
 python/tests/files/datacite/datacite_doc_21.json   |  32 +-
 python/tests/files/datacite/datacite_doc_22.json   |  32 +-
 python/tests/files/datacite/datacite_doc_23.json   |  32 +-
 python/tests/files/datacite/datacite_doc_24.json   |  40 +-
 python/tests/files/datacite/datacite_doc_25.json   |  40 +-
 python/tests/files/datacite/datacite_doc_26.json   |  58 +-
 .../tests/files/datacite/datacite_result_00.json   |  86 +--
 .../tests/files/datacite/datacite_result_01.json   |  36 +-
 .../tests/files/datacite/datacite_result_02.json   |  36 +-
 .../tests/files/datacite/datacite_result_03.json   |  26 +-
 .../tests/files/datacite/datacite_result_04.json   |  48 +-
 .../tests/files/datacite/datacite_result_05.json   | 494 +++++++--------
 .../tests/files/datacite/datacite_result_06.json   |  26 +-
 .../tests/files/datacite/datacite_result_07.json   |  92 +--
 .../tests/files/datacite/datacite_result_08.json   |  66 +-
 .../tests/files/datacite/datacite_result_09.json   |  64 +-
 .../tests/files/datacite/datacite_result_10.json   |  40 +-
 .../tests/files/datacite/datacite_result_11.json   |  32 +-
 .../tests/files/datacite/datacite_result_12.json   |  56 +-
 .../tests/files/datacite/datacite_result_13.json   |  44 +-
 .../tests/files/datacite/datacite_result_14.json   | 152 ++---
 .../tests/files/datacite/datacite_result_15.json   |  34 +-
 .../tests/files/datacite/datacite_result_16.json   |  34 +-
 .../tests/files/datacite/datacite_result_17.json   |  30 +-
 .../tests/files/datacite/datacite_result_18.json   |  20 +-
 .../tests/files/datacite/datacite_result_19.json   |  20 +-
 .../tests/files/datacite/datacite_result_20.json   |  18 +-
 .../tests/files/datacite/datacite_result_21.json   |  20 +-
 .../tests/files/datacite/datacite_result_22.json   |  32 +-
 .../tests/files/datacite/datacite_result_23.json   |  32 +-
 .../tests/files/datacite/datacite_result_24.json   |  32 +-
 .../tests/files/datacite/datacite_result_25.json   |  32 +-
 .../tests/files/datacite/datacite_result_26.json   |  46 +-
 54 files changed, 2301 insertions(+), 2299 deletions(-)

(limited to 'python/tests')

diff --git a/python/tests/files/datacite/datacite_doc_00.json b/python/tests/files/datacite/datacite_doc_00.json
index 248f525f..f60b106f 100644
--- a/python/tests/files/datacite/datacite_doc_00.json
+++ b/python/tests/files/datacite/datacite_doc_00.json
@@ -1,53 +1,34 @@
 {
-  "id": "10.1007/s10870-008-9413-z",
-  "type": "dois",
   "attributes": {
-    "doi": "10.1007/s10870-008-9413-z",
-    "identifiers": [
-      {
-        "identifier": "https://doi.org/10.1007/s10870-008-9413-z",
-        "identifierType": "DOI"
-      },
-      {
-        "identifier": "s10870-008-9413-z",
-        "identifierType": "Publisher ID"
-      }
-    ],
+    "container": {
+      "firstPage": "927",
+      "identifier": "1074-1542",
+      "identifierType": "ISSN",
+      "issue": "12",
+      "lastPage": "930",
+      "title": "Journal of Chemical Crystallography",
+      "type": "Journal",
+      "volume": "38"
+    },
+    "contentUrl": null,
+    "contributors": [],
+    "created": "2019-06-18T14:52:19.000Z",
     "creators": [
       {
-        "name": "Li, Qian-Jin",
-        "nameType": "Personal",
-        "givenName": "Qian-Jin",
+        "affiliation": [],
         "familyName": "Li",
-        "affiliation": []
+        "givenName": "Qian-Jin",
+        "name": "Li, Qian-Jin",
+        "nameType": "Personal"
       },
       {
-        "name": "Yang, Chun-Long",
-        "nameType": "Personal",
-        "givenName": "Chun-Long",
+        "affiliation": [],
         "familyName": "Yang",
-        "affiliation": []
-      }
-    ],
-    "titles": [
-      {
-        "title": "Synthesis and Crystal Structure of a Compound with Two Conformational Isomers: N-(2-methylbenzoyl)-N′-(4-nitrophenyl)thiourea"
+        "givenName": "Chun-Long",
+        "name": "Yang, Chun-Long",
+        "nameType": "Personal"
       }
     ],
-    "publisher": "Springer Science and Business Media LLC",
-    "container": {
-      "type": "Journal",
-      "issue": "12",
-      "title": "Journal of Chemical Crystallography",
-      "volume": "38",
-      "lastPage": "930",
-      "firstPage": "927",
-      "identifier": "1074-1542",
-      "identifierType": "ISSN"
-    },
-    "publicationYear": 2008,
-    "subjects": [],
-    "contributors": [],
     "dates": [
       {
         "date": "2008-05-30",
@@ -58,77 +39,95 @@
         "dateType": "Updated"
       }
     ],
+    "descriptions": [],
+    "doi": "10.1007/s10870-008-9413-z",
+    "formats": [],
+    "fundingReferences": [],
+    "geoLocations": [],
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.1007/s10870-008-9413-z",
+        "identifierType": "DOI"
+      },
+      {
+        "identifier": "s10870-008-9413-z",
+        "identifierType": "Publisher ID"
+      }
+    ],
+    "isActive": true,
     "language": null,
-    "types": {
-      "ris": "JOUR",
-      "bibtex": "article",
-      "citeproc": "article-journal",
-      "schemaOrg": "ScholarlyArticle",
-      "resourceType": "JournalArticle",
-      "resourceTypeGeneral": "Text"
-    },
+    "metadataVersion": 1,
+    "publicationYear": 2008,
+    "published": "2008",
+    "publisher": "Springer Science and Business Media LLC",
+    "reason": null,
+    "registered": null,
     "relatedIdentifiers": [
       {
-        "relationType": "IsPartOf",
         "relatedIdentifier": "1074-1542",
-        "resourceTypeGeneral": "Collection",
-        "relatedIdentifierType": "ISSN"
+        "relatedIdentifierType": "ISSN",
+        "relationType": "IsPartOf",
+        "resourceTypeGeneral": "Collection"
       },
       {
-        "relationType": "References",
         "relatedIdentifier": "10.1016/j.bmcl.2005.09.033",
-        "relatedIdentifierType": "DOI"
+        "relatedIdentifierType": "DOI",
+        "relationType": "References"
       },
       {
-        "relationType": "References",
         "relatedIdentifier": "10.1016/s0022-1139(02)00330-5",
-        "relatedIdentifierType": "DOI"
+        "relatedIdentifierType": "DOI",
+        "relationType": "References"
       },
       {
-        "relationType": "References",
         "relatedIdentifier": "10.1016/s0010-8545(01)00337-x",
-        "relatedIdentifierType": "DOI"
+        "relatedIdentifierType": "DOI",
+        "relationType": "References"
       },
       {
-        "relationType": "References",
         "relatedIdentifier": "10.1016/j.tetlet.2005.06.135",
-        "relatedIdentifierType": "DOI"
+        "relatedIdentifierType": "DOI",
+        "relationType": "References"
       },
       {
-        "relationType": "References",
         "relatedIdentifier": "10.1039/p298700000s1",
-        "relatedIdentifierType": "DOI"
+        "relatedIdentifierType": "DOI",
+        "relationType": "References"
       },
       {
-        "relationType": "References",
         "relatedIdentifier": "10.1002/anie.199515551",
-        "relatedIdentifierType": "DOI"
+        "relatedIdentifierType": "DOI",
+        "relationType": "References"
       }
     ],
-    "sizes": [],
-    "formats": [],
-    "version": null,
     "rightsList": [
       {
         "rightsUri": "http://www.springer.com/tdm"
       }
     ],
-    "descriptions": [],
-    "geoLocations": [],
-    "fundingReferences": [],
-    "url": "http://link.springer.com/10.1007/s10870-008-9413-z",
-    "contentUrl": null,
-    "metadataVersion": 1,
     "schemaVersion": "http://datacite.org/schema/kernel-4",
+    "sizes": [],
     "source": "levriero",
-    "isActive": true,
     "state": "findable",
-    "reason": null,
-    "created": "2019-06-18T14:52:19.000Z",
-    "registered": null,
-    "published": "2008",
-    "updated": "2019-08-03T00:03:40.000Z"
+    "subjects": [],
+    "titles": [
+      {
+        "title": "Synthesis and Crystal Structure of a Compound with Two Conformational Isomers: N-(2-methylbenzoyl)-N′-(4-nitrophenyl)thiourea"
+      }
+    ],
+    "types": {
+      "bibtex": "article",
+      "citeproc": "article-journal",
+      "resourceType": "JournalArticle",
+      "resourceTypeGeneral": "Text",
+      "ris": "JOUR",
+      "schemaOrg": "ScholarlyArticle"
+    },
+    "updated": "2019-08-03T00:03:40.000Z",
+    "url": "http://link.springer.com/10.1007/s10870-008-9413-z",
+    "version": null
   },
+  "id": "10.1007/s10870-008-9413-z",
   "relationships": {
     "client": {
       "data": {
@@ -136,5 +135,6 @@
         "type": "clients"
       }
     }
-  }
+  },
+  "type": "dois"
 }
diff --git a/python/tests/files/datacite/datacite_doc_01.json b/python/tests/files/datacite/datacite_doc_01.json
index c4ef6e45..16a446b3 100644
--- a/python/tests/files/datacite/datacite_doc_01.json
+++ b/python/tests/files/datacite/datacite_doc_01.json
@@ -1,75 +1,74 @@
 {
-  "id": "10.11588/diglit.25558.39",
-  "type": "dois",
   "attributes": {
-    "doi": "10.11588/diglit.25558.39",
-    "identifiers": [
-      {
-        "identifier": "https://doi.org/10.11588/diglit.25558.39",
-        "identifierType": "DOI"
-      }
-    ],
+    "container": {},
+    "contentUrl": null,
+    "contributors": [],
+    "created": "2016-12-08T07:43:15.000Z",
     "creators": [
       {
-        "name": "Dargenty, G.",
-        "nameType": "Personal",
-        "givenName": "G.",
+        "affiliation": [],
         "familyName": "Dargenty",
-        "affiliation": []
-      }
-    ],
-    "titles": [
-      {
-        "lang": "de",
-        "title": "Ferdinand Gaillard, [1]: né à Paris le 16 janvier 1834, mort à Paris le 19 janvier 1887"
+        "givenName": "G.",
+        "name": "Dargenty, G.",
+        "nameType": "Personal"
       }
     ],
-    "publisher": "University Library Heidelberg",
-    "container": {},
-    "publicationYear": 1887,
-    "subjects": [],
-    "contributors": [],
     "dates": [
       {
         "date": "1887",
         "dateType": "Issued"
       }
     ],
+    "descriptions": [],
+    "doi": "10.11588/diglit.25558.39",
+    "formats": [],
+    "fundingReferences": [],
+    "geoLocations": [],
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.11588/diglit.25558.39",
+        "identifierType": "DOI"
+      }
+    ],
+    "isActive": true,
     "language": "fre",
-    "types": {
-      "ris": "RPRT",
-      "bibtex": "article",
-      "citeproc": "article-journal",
-      "schemaOrg": "ScholarlyArticle",
-      "resourceType": "DigitalisatDigital copy",
-      "resourceTypeGeneral": "Text"
-    },
+    "metadataVersion": 4,
+    "publicationYear": 1887,
+    "published": "1887",
+    "publisher": "University Library Heidelberg",
+    "reason": null,
+    "registered": "2016-12-08T07:43:15.000Z",
     "relatedIdentifiers": [],
-    "sizes": [],
-    "formats": [],
-    "version": null,
     "rightsList": [
       {
         "lang": "de",
         "rights": "Standard (Creative Commons - Namensnennung - Weitergabe unter gleichen Bedingungen) - http://www.ub.uni-heidelberg.de/helios/digi/nutzung/Welcome.html"
       }
     ],
-    "descriptions": [],
-    "geoLocations": [],
-    "fundingReferences": [],
-    "url": "http://digi.ub.uni-heidelberg.de/diglit/art1887_1/0172",
-    "contentUrl": null,
-    "metadataVersion": 4,
     "schemaVersion": "http://datacite.org/schema/kernel-4",
+    "sizes": [],
     "source": null,
-    "isActive": true,
     "state": "findable",
-    "reason": null,
-    "created": "2016-12-08T07:43:15.000Z",
-    "registered": "2016-12-08T07:43:15.000Z",
-    "published": "1887",
-    "updated": "2019-08-02T14:27:33.000Z"
+    "subjects": [],
+    "titles": [
+      {
+        "lang": "de",
+        "title": "Ferdinand Gaillard, [1]: né à Paris le 16 janvier 1834, mort à Paris le 19 janvier 1887"
+      }
+    ],
+    "types": {
+      "bibtex": "article",
+      "citeproc": "article-journal",
+      "resourceType": "DigitalisatDigital copy",
+      "resourceTypeGeneral": "Text",
+      "ris": "RPRT",
+      "schemaOrg": "ScholarlyArticle"
+    },
+    "updated": "2019-08-02T14:27:33.000Z",
+    "url": "http://digi.ub.uni-heidelberg.de/diglit/art1887_1/0172",
+    "version": null
   },
+  "id": "10.11588/diglit.25558.39",
   "relationships": {
     "client": {
       "data": {
@@ -77,5 +76,6 @@
         "type": "clients"
       }
     }
-  }
+  },
+  "type": "dois"
 }
diff --git a/python/tests/files/datacite/datacite_doc_02.json b/python/tests/files/datacite/datacite_doc_02.json
index 8b9a594e..139e2cb0 100644
--- a/python/tests/files/datacite/datacite_doc_02.json
+++ b/python/tests/files/datacite/datacite_doc_02.json
@@ -1,53 +1,44 @@
 {
-  "id": "10.11588/diglit.37715.57",
-  "type": "dois",
   "attributes": {
-    "doi": "10.11588/diglit.37715.57",
-    "identifiers": [
-      {
-        "identifier": "https://doi.org/10.11588/diglit.37715.57",
-        "identifierType": "DOI"
-      }
-    ],
+    "container": {},
+    "contentUrl": null,
+    "contributors": [],
+    "created": "2018-11-29T12:04:12.000Z",
     "creators": [
       {
-        "name": "Weyersberg, Albert",
-        "nameType": "Personal",
-        "givenName": "Albert",
+        "affiliation": [],
         "familyName": "Weyersberg",
-        "affiliation": []
-      }
-    ],
-    "titles": [
-      {
-        "lang": "de",
-        "title": "Solinger Schwertschmiede-Familien, [4]"
+        "givenName": "Albert",
+        "name": "Weyersberg, Albert",
+        "nameType": "Personal"
       }
     ],
-    "publisher": "University Library Heidelberg",
-    "container": {},
-    "publicationYear": 1897,
-    "subjects": [],
-    "contributors": [],
     "dates": [
       {
         "date": "1897",
         "dateType": "Issued"
       }
     ],
+    "descriptions": [],
+    "doi": "10.11588/diglit.37715.57",
+    "formats": [],
+    "fundingReferences": [],
+    "geoLocations": [],
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.11588/diglit.37715.57",
+        "identifierType": "DOI"
+      }
+    ],
+    "isActive": true,
     "language": "ger",
-    "types": {
-      "ris": "RPRT",
-      "bibtex": "article",
-      "citeproc": "article-journal",
-      "schemaOrg": "ScholarlyArticle",
-      "resourceType": "DigitalisatDigital copy",
-      "resourceTypeGeneral": "Text"
-    },
+    "metadataVersion": 2,
+    "publicationYear": 1897,
+    "published": "1897",
+    "publisher": "University Library Heidelberg",
+    "reason": null,
+    "registered": "2018-11-29T12:04:13.000Z",
     "relatedIdentifiers": [],
-    "sizes": [],
-    "formats": [],
-    "version": null,
     "rightsList": [
       {
         "lang": "de",
@@ -58,22 +49,30 @@
         "rights": "Creative Commons - Namensnennung - Weitergabe unter gleichen Bedingungen - https://creativecommons.org/licenses/by-sa/3.0/"
       }
     ],
-    "descriptions": [],
-    "geoLocations": [],
-    "fundingReferences": [],
-    "url": "https://digi.ub.uni-heidelberg.de/diglit/zhwk1897_1899/0131",
-    "contentUrl": null,
-    "metadataVersion": 2,
     "schemaVersion": "http://datacite.org/schema/kernel-4",
+    "sizes": [],
     "source": "mds",
-    "isActive": true,
     "state": "findable",
-    "reason": null,
-    "created": "2018-11-29T12:04:12.000Z",
-    "registered": "2018-11-29T12:04:13.000Z",
-    "published": "1897",
-    "updated": "2019-08-02T21:31:04.000Z"
+    "subjects": [],
+    "titles": [
+      {
+        "lang": "de",
+        "title": "Solinger Schwertschmiede-Familien, [4]"
+      }
+    ],
+    "types": {
+      "bibtex": "article",
+      "citeproc": "article-journal",
+      "resourceType": "DigitalisatDigital copy",
+      "resourceTypeGeneral": "Text",
+      "ris": "RPRT",
+      "schemaOrg": "ScholarlyArticle"
+    },
+    "updated": "2019-08-02T21:31:04.000Z",
+    "url": "https://digi.ub.uni-heidelberg.de/diglit/zhwk1897_1899/0131",
+    "version": null
   },
+  "id": "10.11588/diglit.37715.57",
   "relationships": {
     "client": {
       "data": {
@@ -81,5 +80,6 @@
         "type": "clients"
       }
     }
-  }
+  },
+  "type": "dois"
 }
diff --git a/python/tests/files/datacite/datacite_doc_03.json b/python/tests/files/datacite/datacite_doc_03.json
index e77a359c..80bacabc 100644
--- a/python/tests/files/datacite/datacite_doc_03.json
+++ b/python/tests/files/datacite/datacite_doc_03.json
@@ -1,64 +1,63 @@
 {
-  "id": "10.13140/rg.2.2.30434.53446",
-  "type": "dois",
   "attributes": {
-    "doi": "10.13140/rg.2.2.30434.53446",
-    "identifiers": [
+    "container": {},
+    "contentUrl": null,
+    "contributors": [],
+    "created": "2016-11-03T09:07:08.000Z",
+    "creators": [
       {
-        "identifier": "https://doi.org/10.13140/rg.2.2.30434.53446",
-        "identifierType": "DOI"
+        "affiliation": [],
+        "name": "Mastura Yahya"
       }
     ],
-    "creators": [
+    "dates": [
       {
-        "name": "Mastura Yahya",
-        "affiliation": []
+        "date": "2016",
+        "dateType": "Issued"
       }
     ],
-    "titles": [
+    "descriptions": [],
+    "doi": "10.13140/rg.2.2.30434.53446",
+    "formats": [],
+    "fundingReferences": [],
+    "geoLocations": [],
+    "identifiers": [
       {
-        "title": "midterm ah30903"
+        "identifier": "https://doi.org/10.13140/rg.2.2.30434.53446",
+        "identifierType": "DOI"
       }
     ],
-    "publisher": "Unpublished",
-    "container": {},
+    "isActive": true,
+    "language": "ms",
+    "metadataVersion": 0,
     "publicationYear": 2016,
+    "published": "2016",
+    "publisher": "Unpublished",
+    "reason": null,
+    "registered": "2016-11-03T09:07:09.000Z",
+    "relatedIdentifiers": [],
+    "rightsList": [],
+    "schemaVersion": "http://datacite.org/schema/kernel-3",
+    "sizes": [],
+    "source": null,
+    "state": "findable",
     "subjects": [],
-    "contributors": [],
-    "dates": [
+    "titles": [
       {
-        "date": "2016",
-        "dateType": "Issued"
+        "title": "midterm ah30903"
       }
     ],
-    "language": "ms",
     "types": {
-      "ris": "GEN",
       "bibtex": "misc",
       "citeproc": "article",
+      "ris": "GEN",
       "schemaOrg": "CreativeWork"
     },
-    "relatedIdentifiers": [],
-    "sizes": [],
-    "formats": [],
-    "version": null,
-    "rightsList": [],
-    "descriptions": [],
-    "geoLocations": [],
-    "fundingReferences": [],
+    "updated": "2019-08-02T12:51:15.000Z",
     "url": "http://rgdoi.net/10.13140/RG.2.2.30434.53446",
-    "contentUrl": null,
-    "metadataVersion": 0,
-    "schemaVersion": "http://datacite.org/schema/kernel-3",
-    "source": null,
-    "isActive": true,
-    "state": "findable",
-    "reason": null,
-    "created": "2016-11-03T09:07:08.000Z",
-    "registered": "2016-11-03T09:07:09.000Z",
-    "published": "2016",
-    "updated": "2019-08-02T12:51:15.000Z"
+    "version": null
   },
+  "id": "10.13140/rg.2.2.30434.53446",
   "relationships": {
     "client": {
       "data": {
@@ -66,5 +65,6 @@
         "type": "clients"
       }
     }
-  }
+  },
+  "type": "dois"
 }
diff --git a/python/tests/files/datacite/datacite_doc_04.json b/python/tests/files/datacite/datacite_doc_04.json
index 8655a26a..f7d06a75 100644
--- a/python/tests/files/datacite/datacite_doc_04.json
+++ b/python/tests/files/datacite/datacite_doc_04.json
@@ -1,74 +1,73 @@
 {
-  "id": "10.14288/1.0080520",
-  "type": "dois",
   "attributes": {
-    "doi": "10.14288/1.0080520",
-    "identifiers": [
-      {
-        "identifier": "https://doi.org/10.14288/1.0080520",
-        "identifierType": "DOI"
-      }
-    ],
+    "container": {},
+    "contentUrl": null,
+    "contributors": [],
+    "created": "2015-11-11T11:12:34.000Z",
     "creators": [
       {
-        "name": "Nicollerat, Marc Andre",
-        "nameType": "Personal",
-        "givenName": "Marc Andre",
+        "affiliation": [],
         "familyName": "Nicollerat",
-        "affiliation": []
-      }
-    ],
-    "titles": [
-      {
-        "title": "On chain maps inducing isomorphisms in homology"
+        "givenName": "Marc Andre",
+        "name": "Nicollerat, Marc Andre",
+        "nameType": "Personal"
       }
     ],
-    "publisher": "University of British Columbia",
-    "container": {},
-    "publicationYear": 1973,
-    "subjects": [],
-    "contributors": [],
     "dates": [
       {
         "date": "1973",
         "dateType": "Issued"
       }
     ],
-    "language": "en",
-    "types": {
-      "ris": "RPRT",
-      "bibtex": "article",
-      "citeproc": "article-journal",
-      "schemaOrg": "ScholarlyArticle",
-      "resourceType": "Text",
-      "resourceTypeGeneral": "Text"
-    },
-    "relatedIdentifiers": [],
-    "sizes": [],
-    "formats": [],
-    "version": null,
-    "rightsList": [],
     "descriptions": [
       {
         "description": "Let A be an abelian category, I the full subcategory of A consisting of injective objects of A, and K(A) the category whose objects are cochain complexes of elements of A, and whose morphisms are homotopy classes of cochain maps.  In (5), lemma 4.6., p. 42, R. Hartshorne has proved that, under certain conditions, a cochain complex X˙ ε. |KA)| can be embedded in a complex I˙ ε. |K(I)| in such a way that I˙ has the same cohomology as X˙.  In Chapter I we show that the construction given in the two first parts of Hartshorne's Lemma is natural i.e. there exists a functor  J : K(A) → K(I) and a natural transformation [formula omitted]  (where E : K(I) → K(A) is the embedding functor) such that [formula omitted] is  injective and induces isomorphism in cohomology. The question whether the construction given in the third part of the lemma is functorial is still open.  We also prove that J is left adjoint to E, so that K(I) is a reflective subcategory of K(A).  In the special case where A is a category [formula omitted] of left A-modules, and [formula omitted] the category of cochain complexes in [formula omitted] and cochain maps (not homotopy classes), we prove the existence of a functor [formula omitted]  In Chapter II we study the natural homomorphism [formula omitted]   where A, B are rings, and M, L, N modules or chain complexes. In particular we give several sufficient conditions under which v is an isomorphism, or induces isomorphism in homology.  In the appendix we give a detailed proof of Hartshorne's Lemma. We think that this is useful, as no complete proof is, to our knowledge, to be found in the literature.",
         "descriptionType": "Abstract"
       }
     ],
-    "geoLocations": [],
+    "doi": "10.14288/1.0080520",
+    "formats": [],
     "fundingReferences": [],
-    "url": "https://doi.library.ubc.ca/10.14288/1.0080520",
-    "contentUrl": null,
+    "geoLocations": [],
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.14288/1.0080520",
+        "identifierType": "DOI"
+      }
+    ],
+    "isActive": true,
+    "language": "en",
     "metadataVersion": 5,
+    "publicationYear": 1973,
+    "published": "1973",
+    "publisher": "University of British Columbia",
+    "reason": null,
+    "registered": "2015-11-11T11:12:35.000Z",
+    "relatedIdentifiers": [],
+    "rightsList": [],
     "schemaVersion": "http://datacite.org/schema/kernel-3",
+    "sizes": [],
     "source": null,
-    "isActive": true,
     "state": "findable",
-    "reason": null,
-    "created": "2015-11-11T11:12:34.000Z",
-    "registered": "2015-11-11T11:12:35.000Z",
-    "published": "1973",
-    "updated": "2019-08-02T09:43:14.000Z"
+    "subjects": [],
+    "titles": [
+      {
+        "title": "On chain maps inducing isomorphisms in homology"
+      }
+    ],
+    "types": {
+      "bibtex": "article",
+      "citeproc": "article-journal",
+      "resourceType": "Text",
+      "resourceTypeGeneral": "Text",
+      "ris": "RPRT",
+      "schemaOrg": "ScholarlyArticle"
+    },
+    "updated": "2019-08-02T09:43:14.000Z",
+    "url": "https://doi.library.ubc.ca/10.14288/1.0080520",
+    "version": null
   },
+  "id": "10.14288/1.0080520",
   "relationships": {
     "client": {
       "data": {
@@ -76,5 +75,6 @@
         "type": "clients"
       }
     }
-  }
+  },
+  "type": "dois"
 }
diff --git a/python/tests/files/datacite/datacite_doc_05.json b/python/tests/files/datacite/datacite_doc_05.json
index 75e68e9d..76fb73a8 100644
--- a/python/tests/files/datacite/datacite_doc_05.json
+++ b/python/tests/files/datacite/datacite_doc_05.json
@@ -1,534 +1,515 @@
 {
-  "id": "10.15156/bio/sh409843.07fu",
-  "type": "dois",
   "attributes": {
-    "doi": "10.15156/bio/sh409843.07fu",
-    "identifiers": [
+    "container": {},
+    "contentUrl": null,
+    "contributors": [
       {
-        "identifier": "https://doi.org/10.15156/bio/sh409843.07fu",
-        "identifierType": "DOI"
+        "affiliation": [],
+        "name": "Kessy Abarenkov"
+      },
+      {
+        "affiliation": [],
+        "name": "NHM UT-University Of Tartu; Natural History Museum And Botanic Garden"
       }
     ],
+    "created": "2015-06-05T10:23:18.000Z",
     "creators": [
       {
-        "name": "Kõljalg, Urmas",
-        "nameType": "Personal",
-        "givenName": "Urmas",
+        "affiliation": [],
         "familyName": "Kõljalg",
-        "affiliation": []
+        "givenName": "Urmas",
+        "name": "Kõljalg, Urmas",
+        "nameType": "Personal"
       },
       {
-        "name": "Abarenkov, Kessy",
-        "nameType": "Personal",
-        "givenName": "Kessy",
+        "affiliation": [],
         "familyName": "Abarenkov",
-        "affiliation": []
+        "givenName": "Kessy",
+        "name": "Abarenkov, Kessy",
+        "nameType": "Personal"
       },
       {
-        "name": "Nilsson, R. Henrik",
-        "nameType": "Personal",
-        "givenName": "R. Henrik",
+        "affiliation": [],
         "familyName": "Nilsson",
-        "affiliation": []
+        "givenName": "R. Henrik",
+        "name": "Nilsson, R. Henrik",
+        "nameType": "Personal"
       },
       {
-        "name": "Larsson, Karl-Henrik",
-        "nameType": "Personal",
-        "givenName": "Karl-Henrik",
+        "affiliation": [],
         "familyName": "Larsson",
-        "affiliation": []
+        "givenName": "Karl-Henrik",
+        "name": "Larsson, Karl-Henrik",
+        "nameType": "Personal"
       },
       {
-        "name": "Aas, Anders Bjørnsgard",
-        "nameType": "Personal",
-        "givenName": "Anders Bjørnsgard",
+        "affiliation": [],
         "familyName": "Aas",
-        "affiliation": []
+        "givenName": "Anders Bjørnsgard",
+        "name": "Aas, Anders Bjørnsgard",
+        "nameType": "Personal"
       },
       {
-        "name": "Adams, Rachel",
-        "nameType": "Personal",
-        "givenName": "Rachel",
+        "affiliation": [],
         "familyName": "Adams",
-        "affiliation": []
+        "givenName": "Rachel",
+        "name": "Adams, Rachel",
+        "nameType": "Personal"
       },
       {
-        "name": "Alves, Artur",
-        "nameType": "Personal",
-        "givenName": "Artur",
+        "affiliation": [],
         "familyName": "Alves",
-        "affiliation": []
+        "givenName": "Artur",
+        "name": "Alves, Artur",
+        "nameType": "Personal"
       },
       {
-        "name": "Ammirati, Joseph F.",
-        "nameType": "Personal",
-        "givenName": "Joseph F.",
+        "affiliation": [],
         "familyName": "Ammirati",
-        "affiliation": []
+        "givenName": "Joseph F.",
+        "name": "Ammirati, Joseph F.",
+        "nameType": "Personal"
       },
       {
-        "name": "Arnold, A. Elizabeth",
-        "nameType": "Personal",
-        "givenName": "A. Elizabeth",
+        "affiliation": [],
         "familyName": "Arnold",
-        "affiliation": []
+        "givenName": "A. Elizabeth",
+        "name": "Arnold, A. Elizabeth",
+        "nameType": "Personal"
       },
       {
-        "name": "Bahram, Mohammad",
-        "nameType": "Personal",
-        "givenName": "Mohammad",
+        "affiliation": [],
         "familyName": "Bahram",
-        "affiliation": []
+        "givenName": "Mohammad",
+        "name": "Bahram, Mohammad",
+        "nameType": "Personal"
       },
       {
-        "name": "Bengtsson-Palme, Johan",
-        "nameType": "Personal",
-        "givenName": "Johan",
+        "affiliation": [],
         "familyName": "Bengtsson-Palme",
-        "affiliation": []
+        "givenName": "Johan",
+        "name": "Bengtsson-Palme, Johan",
+        "nameType": "Personal"
       },
       {
-        "name": "Berlin, Anna",
-        "nameType": "Personal",
-        "givenName": "Anna",
+        "affiliation": [],
         "familyName": "Berlin",
-        "affiliation": []
+        "givenName": "Anna",
+        "name": "Berlin, Anna",
+        "nameType": "Personal"
       },
       {
-        "name": "Botnen, Synnøve",
-        "nameType": "Personal",
-        "givenName": "Synnøve",
+        "affiliation": [],
         "familyName": "Botnen",
-        "affiliation": []
+        "givenName": "Synnøve",
+        "name": "Botnen, Synnøve",
+        "nameType": "Personal"
       },
       {
-        "name": "Bourlat, Sarah",
-        "nameType": "Personal",
-        "givenName": "Sarah",
+        "affiliation": [],
         "familyName": "Bourlat",
-        "affiliation": []
+        "givenName": "Sarah",
+        "name": "Bourlat, Sarah",
+        "nameType": "Personal"
       },
       {
-        "name": "Cheeke, Tanya",
-        "nameType": "Personal",
-        "givenName": "Tanya",
+        "affiliation": [],
         "familyName": "Cheeke",
-        "affiliation": []
+        "givenName": "Tanya",
+        "name": "Cheeke, Tanya",
+        "nameType": "Personal"
       },
       {
-        "name": "Dima, Bálint",
-        "nameType": "Personal",
-        "givenName": "Bálint",
+        "affiliation": [],
         "familyName": "Dima",
-        "affiliation": []
+        "givenName": "Bálint",
+        "name": "Dima, Bálint",
+        "nameType": "Personal"
       },
       {
-        "name": "Drenkhan, Rein",
-        "nameType": "Personal",
-        "givenName": "Rein",
+        "affiliation": [],
         "familyName": "Drenkhan",
-        "affiliation": []
+        "givenName": "Rein",
+        "name": "Drenkhan, Rein",
+        "nameType": "Personal"
       },
       {
-        "name": "Duarte, Camila",
-        "nameType": "Personal",
-        "givenName": "Camila",
+        "affiliation": [],
         "familyName": "Duarte",
-        "affiliation": []
+        "givenName": "Camila",
+        "name": "Duarte, Camila",
+        "nameType": "Personal"
       },
       {
-        "name": "Dueñas, Margarita",
-        "nameType": "Personal",
-        "givenName": "Margarita",
+        "affiliation": [],
         "familyName": "Dueñas",
-        "affiliation": []
+        "givenName": "Margarita",
+        "name": "Dueñas, Margarita",
+        "nameType": "Personal"
       },
       {
-        "name": "Eberhardt, Ursula",
-        "nameType": "Personal",
-        "givenName": "Ursula",
+        "affiliation": [],
         "familyName": "Eberhardt",
-        "affiliation": []
+        "givenName": "Ursula",
+        "name": "Eberhardt, Ursula",
+        "nameType": "Personal"
       },
       {
-        "name": "Friberg, Hanna",
-        "nameType": "Personal",
-        "givenName": "Hanna",
+        "affiliation": [],
         "familyName": "Friberg",
-        "affiliation": []
+        "givenName": "Hanna",
+        "name": "Friberg, Hanna",
+        "nameType": "Personal"
       },
       {
-        "name": "Frøslev, Tobias G.",
-        "nameType": "Personal",
-        "givenName": "Tobias G.",
+        "affiliation": [],
         "familyName": "Frøslev",
-        "affiliation": []
+        "givenName": "Tobias G.",
+        "name": "Frøslev, Tobias G.",
+        "nameType": "Personal"
       },
       {
-        "name": "Garnica, Sigisfredo",
-        "nameType": "Personal",
-        "givenName": "Sigisfredo",
+        "affiliation": [],
         "familyName": "Garnica",
-        "affiliation": []
+        "givenName": "Sigisfredo",
+        "name": "Garnica, Sigisfredo",
+        "nameType": "Personal"
       },
       {
-        "name": "Geml, József",
-        "nameType": "Personal",
-        "givenName": "József",
+        "affiliation": [],
         "familyName": "Geml",
-        "affiliation": []
+        "givenName": "József",
+        "name": "Geml, József",
+        "nameType": "Personal"
       },
       {
-        "name": "Ghobad-Nejhad, Masoomeh",
-        "nameType": "Personal",
-        "givenName": "Masoomeh",
+        "affiliation": [],
         "familyName": "Ghobad-Nejhad",
-        "affiliation": []
+        "givenName": "Masoomeh",
+        "name": "Ghobad-Nejhad, Masoomeh",
+        "nameType": "Personal"
       },
       {
-        "name": "Grebenc, Tine",
-        "nameType": "Personal",
-        "givenName": "Tine",
+        "affiliation": [],
         "familyName": "Grebenc",
-        "affiliation": []
+        "givenName": "Tine",
+        "name": "Grebenc, Tine",
+        "nameType": "Personal"
       },
       {
-        "name": "Griffith, Gareth W.",
-        "nameType": "Personal",
-        "givenName": "Gareth W.",
+        "affiliation": [],
         "familyName": "Griffith",
-        "affiliation": []
+        "givenName": "Gareth W.",
+        "name": "Griffith, Gareth W.",
+        "nameType": "Personal"
       },
       {
-        "name": "Hampe, Felix",
-        "nameType": "Personal",
-        "givenName": "Felix",
+        "affiliation": [],
         "familyName": "Hampe",
-        "affiliation": []
+        "givenName": "Felix",
+        "name": "Hampe, Felix",
+        "nameType": "Personal"
       },
       {
-        "name": "Kennedy, Peter",
-        "nameType": "Personal",
-        "givenName": "Peter",
+        "affiliation": [],
         "familyName": "Kennedy",
-        "affiliation": []
+        "givenName": "Peter",
+        "name": "Kennedy, Peter",
+        "nameType": "Personal"
       },
       {
-        "name": "Khomich, Maryia",
-        "nameType": "Personal",
-        "givenName": "Maryia",
+        "affiliation": [],
         "familyName": "Khomich",
-        "affiliation": []
+        "givenName": "Maryia",
+        "name": "Khomich, Maryia",
+        "nameType": "Personal"
       },
       {
-        "name": "Kohout, Petr",
-        "nameType": "Personal",
-        "givenName": "Petr",
+        "affiliation": [],
         "familyName": "Kohout",
-        "affiliation": []
+        "givenName": "Petr",
+        "name": "Kohout, Petr",
+        "nameType": "Personal"
       },
       {
-        "name": "Kollom, Anu",
-        "nameType": "Personal",
-        "givenName": "Anu",
+        "affiliation": [],
         "familyName": "Kollom",
-        "affiliation": []
+        "givenName": "Anu",
+        "name": "Kollom, Anu",
+        "nameType": "Personal"
       },
       {
-        "name": "Larsson, Ellen",
-        "nameType": "Personal",
-        "givenName": "Ellen",
+        "affiliation": [],
         "familyName": "Larsson",
-        "affiliation": []
+        "givenName": "Ellen",
+        "name": "Larsson, Ellen",
+        "nameType": "Personal"
       },
       {
-        "name": "Laszlo, Irinyi",
-        "nameType": "Personal",
-        "givenName": "Irinyi",
+        "affiliation": [],
         "familyName": "Laszlo",
-        "affiliation": []
+        "givenName": "Irinyi",
+        "name": "Laszlo, Irinyi",
+        "nameType": "Personal"
       },
       {
-        "name": "Leavitt, Steven",
-        "nameType": "Personal",
-        "givenName": "Steven",
+        "affiliation": [],
         "familyName": "Leavitt",
-        "affiliation": []
+        "givenName": "Steven",
+        "name": "Leavitt, Steven",
+        "nameType": "Personal"
       },
       {
-        "name": "Liimatainen, Kare",
-        "nameType": "Personal",
-        "givenName": "Kare",
+        "affiliation": [],
         "familyName": "Liimatainen",
-        "affiliation": []
+        "givenName": "Kare",
+        "name": "Liimatainen, Kare",
+        "nameType": "Personal"
       },
       {
-        "name": "Lindahl, Björn",
-        "nameType": "Personal",
-        "givenName": "Björn",
+        "affiliation": [],
         "familyName": "Lindahl",
-        "affiliation": []
+        "givenName": "Björn",
+        "name": "Lindahl, Björn",
+        "nameType": "Personal"
       },
       {
-        "name": "Lodge, Deborah J.",
-        "nameType": "Personal",
-        "givenName": "Deborah J.",
+        "affiliation": [],
         "familyName": "Lodge",
-        "affiliation": []
+        "givenName": "Deborah J.",
+        "name": "Lodge, Deborah J.",
+        "nameType": "Personal"
       },
       {
-        "name": "Lumbsch, Helge Thorsten",
-        "nameType": "Personal",
-        "givenName": "Helge Thorsten",
+        "affiliation": [],
         "familyName": "Lumbsch",
-        "affiliation": []
+        "givenName": "Helge Thorsten",
+        "name": "Lumbsch, Helge Thorsten",
+        "nameType": "Personal"
       },
       {
-        "name": "Martín Esteban, María Paz",
-        "nameType": "Personal",
-        "givenName": "María Paz",
+        "affiliation": [],
         "familyName": "Martín Esteban",
-        "affiliation": []
+        "givenName": "María Paz",
+        "name": "Martín Esteban, María Paz",
+        "nameType": "Personal"
       },
       {
-        "name": "Meyer, Wieland",
-        "nameType": "Personal",
-        "givenName": "Wieland",
+        "affiliation": [],
         "familyName": "Meyer",
-        "affiliation": []
+        "givenName": "Wieland",
+        "name": "Meyer, Wieland",
+        "nameType": "Personal"
       },
       {
-        "name": "Miettinen, Otto",
-        "nameType": "Personal",
-        "givenName": "Otto",
+        "affiliation": [],
         "familyName": "Miettinen",
-        "affiliation": []
+        "givenName": "Otto",
+        "name": "Miettinen, Otto",
+        "nameType": "Personal"
       },
       {
-        "name": "Nguyen, Nhu",
-        "nameType": "Personal",
-        "givenName": "Nhu",
+        "affiliation": [],
         "familyName": "Nguyen",
-        "affiliation": []
+        "givenName": "Nhu",
+        "name": "Nguyen, Nhu",
+        "nameType": "Personal"
       },
       {
-        "name": "Niskanen, Tuula",
-        "nameType": "Personal",
-        "givenName": "Tuula",
+        "affiliation": [],
         "familyName": "Niskanen",
-        "affiliation": []
+        "givenName": "Tuula",
+        "name": "Niskanen, Tuula",
+        "nameType": "Personal"
       },
       {
-        "name": "Oono, Ryoko",
-        "nameType": "Personal",
-        "givenName": "Ryoko",
+        "affiliation": [],
         "familyName": "Oono",
-        "affiliation": []
+        "givenName": "Ryoko",
+        "name": "Oono, Ryoko",
+        "nameType": "Personal"
       },
       {
-        "name": "Öpik, Maarja",
-        "nameType": "Personal",
-        "givenName": "Maarja",
+        "affiliation": [],
         "familyName": "Öpik",
-        "affiliation": []
+        "givenName": "Maarja",
+        "name": "Öpik, Maarja",
+        "nameType": "Personal"
       },
       {
-        "name": "Ordynets, Alexander",
-        "nameType": "Personal",
-        "givenName": "Alexander",
+        "affiliation": [],
         "familyName": "Ordynets",
-        "affiliation": []
+        "givenName": "Alexander",
+        "name": "Ordynets, Alexander",
+        "nameType": "Personal"
       },
       {
-        "name": "Pawłowska, Julia",
-        "nameType": "Personal",
-        "givenName": "Julia",
+        "affiliation": [],
         "familyName": "Pawłowska",
-        "affiliation": []
+        "givenName": "Julia",
+        "name": "Pawłowska, Julia",
+        "nameType": "Personal"
       },
       {
-        "name": "Peintner, Ursula",
-        "nameType": "Personal",
-        "givenName": "Ursula",
+        "affiliation": [],
         "familyName": "Peintner",
-        "affiliation": []
+        "givenName": "Ursula",
+        "name": "Peintner, Ursula",
+        "nameType": "Personal"
       },
       {
-        "name": "Pereira, Olinto Liparini",
-        "nameType": "Personal",
-        "givenName": "Olinto Liparini",
+        "affiliation": [],
         "familyName": "Pereira",
-        "affiliation": []
+        "givenName": "Olinto Liparini",
+        "name": "Pereira, Olinto Liparini",
+        "nameType": "Personal"
       },
       {
-        "name": "Pinho, Danilo Batista",
-        "nameType": "Personal",
-        "givenName": "Danilo Batista",
+        "affiliation": [],
         "familyName": "Pinho",
-        "affiliation": []
+        "givenName": "Danilo Batista",
+        "name": "Pinho, Danilo Batista",
+        "nameType": "Personal"
       },
       {
-        "name": "Põldmaa, Kadri",
-        "nameType": "Personal",
-        "givenName": "Kadri",
+        "affiliation": [],
         "familyName": "Põldmaa",
-        "affiliation": []
+        "givenName": "Kadri",
+        "name": "Põldmaa, Kadri",
+        "nameType": "Personal"
       },
       {
-        "name": "Runnel, Kadri",
-        "nameType": "Personal",
-        "givenName": "Kadri",
+        "affiliation": [],
         "familyName": "Runnel",
-        "affiliation": []
+        "givenName": "Kadri",
+        "name": "Runnel, Kadri",
+        "nameType": "Personal"
       },
       {
-        "name": "Ryberg, Martin",
-        "nameType": "Personal",
-        "givenName": "Martin",
+        "affiliation": [],
         "familyName": "Ryberg",
-        "affiliation": []
+        "givenName": "Martin",
+        "name": "Ryberg, Martin",
+        "nameType": "Personal"
       },
       {
-        "name": "Saar, Irja",
-        "nameType": "Personal",
-        "givenName": "Irja",
+        "affiliation": [],
         "familyName": "Saar",
-        "affiliation": []
+        "givenName": "Irja",
+        "name": "Saar, Irja",
+        "nameType": "Personal"
       },
       {
-        "name": "Sanli, Kemal",
-        "nameType": "Personal",
-        "givenName": "Kemal",
+        "affiliation": [],
         "familyName": "Sanli",
-        "affiliation": []
+        "givenName": "Kemal",
+        "name": "Sanli, Kemal",
+        "nameType": "Personal"
       },
       {
-        "name": "Scott, James",
-        "nameType": "Personal",
-        "givenName": "James",
+        "affiliation": [],
         "familyName": "Scott",
-        "affiliation": []
+        "givenName": "James",
+        "name": "Scott, James",
+        "nameType": "Personal"
       },
       {
-        "name": "Spirin, Viacheslav",
-        "nameType": "Personal",
-        "givenName": "Viacheslav",
+        "affiliation": [],
         "familyName": "Spirin",
-        "affiliation": []
+        "givenName": "Viacheslav",
+        "name": "Spirin, Viacheslav",
+        "nameType": "Personal"
       },
       {
-        "name": "Suija, Ave",
-        "nameType": "Personal",
-        "givenName": "Ave",
+        "affiliation": [],
         "familyName": "Suija",
-        "affiliation": []
+        "givenName": "Ave",
+        "name": "Suija, Ave",
+        "nameType": "Personal"
       },
       {
-        "name": "Svantesson, Sten",
-        "nameType": "Personal",
-        "givenName": "Sten",
+        "affiliation": [],
         "familyName": "Svantesson",
-        "affiliation": []
+        "givenName": "Sten",
+        "name": "Svantesson, Sten",
+        "nameType": "Personal"
       },
       {
-        "name": "Tadych, Mariusz",
-        "nameType": "Personal",
-        "givenName": "Mariusz",
+        "affiliation": [],
         "familyName": "Tadych",
-        "affiliation": []
+        "givenName": "Mariusz",
+        "name": "Tadych, Mariusz",
+        "nameType": "Personal"
       },
       {
-        "name": "Takamatsu, Susumu",
-        "nameType": "Personal",
-        "givenName": "Susumu",
+        "affiliation": [],
         "familyName": "Takamatsu",
-        "affiliation": []
+        "givenName": "Susumu",
+        "name": "Takamatsu, Susumu",
+        "nameType": "Personal"
       },
       {
-        "name": "Tamm, Heidi",
-        "nameType": "Personal",
-        "givenName": "Heidi",
+        "affiliation": [],
         "familyName": "Tamm",
-        "affiliation": []
+        "givenName": "Heidi",
+        "name": "Tamm, Heidi",
+        "nameType": "Personal"
       },
       {
-        "name": "Taylor, AFS.",
-        "nameType": "Personal",
-        "givenName": "AFS.",
+        "affiliation": [],
         "familyName": "Taylor",
-        "affiliation": []
+        "givenName": "AFS.",
+        "name": "Taylor, AFS.",
+        "nameType": "Personal"
       },
       {
-        "name": "Tedersoo, Leho",
-        "nameType": "Personal",
-        "givenName": "Leho",
+        "affiliation": [],
         "familyName": "Tedersoo",
-        "affiliation": []
+        "givenName": "Leho",
+        "name": "Tedersoo, Leho",
+        "nameType": "Personal"
       },
       {
-        "name": "Telleria, M.T.",
-        "nameType": "Personal",
-        "givenName": "M.T.",
+        "affiliation": [],
         "familyName": "Telleria",
-        "affiliation": []
+        "givenName": "M.T.",
+        "name": "Telleria, M.T.",
+        "nameType": "Personal"
       },
       {
-        "name": "Udayanga, Dhanushka",
-        "nameType": "Personal",
-        "givenName": "Dhanushka",
+        "affiliation": [],
         "familyName": "Udayanga",
-        "affiliation": []
+        "givenName": "Dhanushka",
+        "name": "Udayanga, Dhanushka",
+        "nameType": "Personal"
       },
       {
-        "name": "Unterseher, Martin",
-        "nameType": "Personal",
-        "givenName": "Martin",
+        "affiliation": [],
         "familyName": "Unterseher",
-        "affiliation": []
+        "givenName": "Martin",
+        "name": "Unterseher, Martin",
+        "nameType": "Personal"
       },
       {
-        "name": "Volobuev, Sergey",
-        "nameType": "Personal",
-        "givenName": "Sergey",
+        "affiliation": [],
         "familyName": "Volobuev",
-        "affiliation": []
+        "givenName": "Sergey",
+        "name": "Volobuev, Sergey",
+        "nameType": "Personal"
       },
       {
-        "name": "Weiss, Michael",
-        "nameType": "Personal",
-        "givenName": "Michael",
+        "affiliation": [],
         "familyName": "Weiss",
-        "affiliation": []
+        "givenName": "Michael",
+        "name": "Weiss, Michael",
+        "nameType": "Personal"
       },
       {
-        "name": "Wurzbacher, Christian",
-        "nameType": "Personal",
-        "givenName": "Christian",
+        "affiliation": [],
         "familyName": "Wurzbacher",
-        "affiliation": []
-      }
-    ],
-    "titles": [
-      {
-        "title": "SH409843.07FU"
-      },
-      {
-        "title": "Gomphales",
-        "titleType": "Subtitle"
-      }
-    ],
-    "publisher": "UNITE Community",
-    "container": {},
-    "publicationYear": 2015,
-    "subjects": [],
-    "contributors": [
-      {
-        "name": "Kessy Abarenkov",
-        "affiliation": []
-      },
-      {
-        "name": "NHM UT-University Of Tartu; Natural History Museum And Botanic Garden",
-        "affiliation": []
+        "givenName": "Christian",
+        "name": "Wurzbacher, Christian",
+        "nameType": "Personal"
       }
     ],
     "dates": [
@@ -545,48 +526,66 @@
         "dateType": "Issued"
       }
     ],
-    "language": "eng",
-    "types": {
-      "ris": "DATA",
-      "bibtex": "misc",
-      "citeproc": "dataset",
-      "schemaOrg": "Dataset",
-      "resourceType": "Dataset/UNITE Species Hypothesis",
-      "resourceTypeGeneral": "Dataset"
-    },
-    "relatedIdentifiers": [],
-    "sizes": [],
+    "descriptions": [
+      {
+        "description": "UNITE provides a unified way for delimiting, identifying, communicating, and working with DNA-based Species Hypotheses (SH). All fungal ITS sequences in the international nucleotide sequence databases are clustered to approximately the species level by applying a set of dynamic distance values (&lt;0.5 - 3.0%). All species hypotheses are given a unique, stable name in the form of a DOI, and their taxonomic and ecological annotations are verified through distributed, web-based third-party annotation efforts. SHs are connected to a taxon name and its classification as far as possible (phylum, class, order, etc.) by taking into account identifications for all sequences in the SH. An automatically or manually designated sequence is chosen to represent each such SH. These sequences are released (https://unite.ut.ee/repository.php) for use by the scientific community in, for example, local sequence similarity searches and next-generation sequencing analysis pipelines. The system and the data are updated automatically as the number of public fungal ITS sequences grows.",
+        "descriptionType": "Abstract"
+      }
+    ],
+    "doi": "10.15156/bio/sh409843.07fu",
     "formats": [
       "application/json"
     ],
-    "version": null,
+    "fundingReferences": [],
+    "geoLocations": [],
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.15156/bio/sh409843.07fu",
+        "identifierType": "DOI"
+      }
+    ],
+    "isActive": true,
+    "language": "eng",
+    "metadataVersion": 1,
+    "publicationYear": 2015,
+    "published": "2015",
+    "publisher": "UNITE Community",
+    "reason": null,
+    "registered": "2015-06-05T10:23:19.000Z",
+    "relatedIdentifiers": [],
     "rightsList": [
       {
         "rights": "Attribution-NonCommercial (CC BY-NC)",
         "rightsUri": "http://creativecommons.org/licenses/by-nc/4.0"
       }
     ],
-    "descriptions": [
+    "schemaVersion": "http://datacite.org/schema/kernel-3",
+    "sizes": [],
+    "source": null,
+    "state": "findable",
+    "subjects": [],
+    "titles": [
       {
-        "description": "UNITE provides a unified way for delimiting, identifying, communicating, and working with DNA-based Species Hypotheses (SH). All fungal ITS sequences in the international nucleotide sequence databases are clustered to approximately the species level by applying a set of dynamic distance values (&lt;0.5 - 3.0%). All species hypotheses are given a unique, stable name in the form of a DOI, and their taxonomic and ecological annotations are verified through distributed, web-based third-party annotation efforts. SHs are connected to a taxon name and its classification as far as possible (phylum, class, order, etc.) by taking into account identifications for all sequences in the SH. An automatically or manually designated sequence is chosen to represent each such SH. These sequences are released (https://unite.ut.ee/repository.php) for use by the scientific community in, for example, local sequence similarity searches and next-generation sequencing analysis pipelines. The system and the data are updated automatically as the number of public fungal ITS sequences grows.",
-        "descriptionType": "Abstract"
+        "title": "SH409843.07FU"
+      },
+      {
+        "title": "Gomphales",
+        "titleType": "Subtitle"
       }
     ],
-    "geoLocations": [],
-    "fundingReferences": [],
+    "types": {
+      "bibtex": "misc",
+      "citeproc": "dataset",
+      "resourceType": "Dataset/UNITE Species Hypothesis",
+      "resourceTypeGeneral": "Dataset",
+      "ris": "DATA",
+      "schemaOrg": "Dataset"
+    },
+    "updated": "2019-08-02T07:45:28.000Z",
     "url": "https://plutof.ut.ee/#/datacite/10.15156/BIO/SH409843.07FU",
-    "contentUrl": null,
-    "metadataVersion": 1,
-    "schemaVersion": "http://datacite.org/schema/kernel-3",
-    "source": null,
-    "isActive": true,
-    "state": "findable",
-    "reason": null,
-    "created": "2015-06-05T10:23:18.000Z",
-    "registered": "2015-06-05T10:23:19.000Z",
-    "published": "2015",
-    "updated": "2019-08-02T07:45:28.000Z"
+    "version": null
   },
+  "id": "10.15156/bio/sh409843.07fu",
   "relationships": {
     "client": {
       "data": {
@@ -594,5 +593,6 @@
         "type": "clients"
       }
     }
-  }
+  },
+  "type": "dois"
 }
diff --git a/python/tests/files/datacite/datacite_doc_06.json b/python/tests/files/datacite/datacite_doc_06.json
index a7f3ee70..01cb2cb3 100644
--- a/python/tests/files/datacite/datacite_doc_06.json
+++ b/python/tests/files/datacite/datacite_doc_06.json
@@ -1,31 +1,16 @@
 {
-  "id": "10.16903/ethz-grs-d_006220",
-  "type": "dois",
   "attributes": {
-    "doi": "10.16903/ethz-grs-d_006220",
-    "identifiers": [
-      {
-        "identifier": "https://doi.org/10.16903/ethz-grs-d_006220",
-        "identifierType": "DOI"
-      }
-    ],
+    "container": {},
+    "contentUrl": null,
+    "contributors": [],
+    "created": "2017-12-13T12:03:09.000Z",
     "creators": [
       {
+        "affiliation": [],
         "name": "Crispijn De Passe (Der Ältere) (1564-1637)",
-        "nameType": "Personal",
-        "affiliation": []
+        "nameType": "Personal"
       }
     ],
-    "titles": [
-      {
-        "title": "Der Eifer (Sedulitas), Blatt 7 der Folge \"Die Tugenden\""
-      }
-    ],
-    "publisher": "n.a.",
-    "container": {},
-    "publicationYear": 1590,
-    "subjects": [],
-    "contributors": [],
     "dates": [
       {
         "date": "1590",
@@ -36,42 +21,56 @@
         "dateType": "Issued"
       }
     ],
-    "language": null,
-    "types": {
-      "ris": "GEN",
-      "bibtex": "misc",
-      "citeproc": "article",
-      "schemaOrg": "CreativeWork",
-      "resourceTypeGeneral": "InteractiveResource"
-    },
-    "relatedIdentifiers": [],
-    "sizes": [],
+    "descriptions": [],
+    "doi": "10.16903/ethz-grs-d_006220",
     "formats": [
       "Blattgrösse: 21.0 x 14.4 x 0.0 cm (beschnitten)",
       "Kupferstich"
     ],
-    "version": null,
+    "fundingReferences": [],
+    "geoLocations": [],
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.16903/ethz-grs-d_006220",
+        "identifierType": "DOI"
+      }
+    ],
+    "isActive": true,
+    "language": null,
+    "metadataVersion": 1,
+    "publicationYear": 1590,
+    "published": "1590",
+    "publisher": "n.a.",
+    "reason": null,
+    "registered": "2017-12-13T12:03:09.000Z",
+    "relatedIdentifiers": [],
     "rightsList": [
       {
         "rights": "ETH-Bibliothek Zürich, Graphische Sammlung / D 6220 / Public Domain Mark 1.0"
       }
     ],
-    "descriptions": [],
-    "geoLocations": [],
-    "fundingReferences": [],
-    "url": "http://www.e-gs.ethz.ch/eMP/eMuseumPlus?service=ExternalInterface&module=collection&objectId=29469&viewType=detailView",
-    "contentUrl": null,
-    "metadataVersion": 1,
     "schemaVersion": "http://datacite.org/schema/kernel-3",
+    "sizes": [],
     "source": "mds",
-    "isActive": true,
     "state": "findable",
-    "reason": null,
-    "created": "2017-12-13T12:03:09.000Z",
-    "registered": "2017-12-13T12:03:09.000Z",
-    "published": "1590",
-    "updated": "2019-08-02T17:20:02.000Z"
+    "subjects": [],
+    "titles": [
+      {
+        "title": "Der Eifer (Sedulitas), Blatt 7 der Folge \"Die Tugenden\""
+      }
+    ],
+    "types": {
+      "bibtex": "misc",
+      "citeproc": "article",
+      "resourceTypeGeneral": "InteractiveResource",
+      "ris": "GEN",
+      "schemaOrg": "CreativeWork"
+    },
+    "updated": "2019-08-02T17:20:02.000Z",
+    "url": "http://www.e-gs.ethz.ch/eMP/eMuseumPlus?service=ExternalInterface&module=collection&objectId=29469&viewType=detailView",
+    "version": null
   },
+  "id": "10.16903/ethz-grs-d_006220",
   "relationships": {
     "client": {
       "data": {
@@ -79,5 +78,6 @@
         "type": "clients"
       }
     }
-  }
+  },
+  "type": "dois"
 }
diff --git a/python/tests/files/datacite/datacite_doc_07.json b/python/tests/files/datacite/datacite_doc_07.json
index c70695b6..8e292fea 100644
--- a/python/tests/files/datacite/datacite_doc_07.json
+++ b/python/tests/files/datacite/datacite_doc_07.json
@@ -1,49 +1,72 @@
 {
-  "id": "10.18462/iir.icr.2015.0926",
-  "type": "dois",
   "attributes": {
-    "doi": "10.18462/iir.icr.2015.0926",
-    "identifiers": [
-      {
-        "identifier": "https://doi.org/10.18462/iir.icr.2015.0926",
-        "identifierType": "DOI"
-      }
-    ],
+    "container": {},
+    "contentUrl": null,
+    "contributors": [],
+    "created": "2016-11-21T13:08:14.000Z",
     "creators": [
       {
-        "name": "ROTHUIZEN, E.",
-        "nameType": "Personal",
-        "givenName": "E.",
+        "affiliation": [],
         "familyName": "ROTHUIZEN",
-        "affiliation": []
+        "givenName": "E.",
+        "name": "ROTHUIZEN, E.",
+        "nameType": "Personal"
       },
       {
-        "name": "ELMEGAARD, B.",
-        "nameType": "Personal",
-        "givenName": "B.",
+        "affiliation": [],
         "familyName": "ELMEGAARD",
-        "affiliation": []
+        "givenName": "B.",
+        "name": "ELMEGAARD, B.",
+        "nameType": "Personal"
       },
       {
-        "name": "MARKUSSEN W., B.",
-        "nameType": "Personal",
-        "givenName": "B.",
+        "affiliation": [],
         "familyName": "MARKUSSEN W.",
-        "affiliation": []
+        "givenName": "B.",
+        "name": "MARKUSSEN W., B.",
+        "nameType": "Personal"
       },
       {
-        "name": "Et Al.",
-        "affiliation": []
+        "affiliation": [],
+        "name": "Et Al."
       }
     ],
-    "titles": [
+    "dates": [
       {
-        "title": "High efficient heat pump system using storage tanks to increase cop by means of the ISEC concept. 1: model validation."
+        "date": "2015",
+        "dateType": "Issued"
       }
     ],
-    "publisher": "International Institute of Refrigeration (IIR)",
-    "container": {},
+    "descriptions": [
+      {
+        "description": "The purpose of the ISEC concept is to provide a high-efficient heat pump system for hot water production. The ISEC concept uses two storage tanks for the water, one discharged and one charged. Hot water for the industrial process is tapped from the charged tank, while the other tank is charging. Charging is done by circulating the water in the tank through the condenser of a heat pump several times and thereby gradually heating the water. The charging is done with a higher mass flow rate than the discharging to reach several circulations of the water during the time frame of one discharging. This result in a lower condensing temperature than if the water was heated in one step. Two test setups were built, one to test the performance of the heat pump gradually heating the water and one to investigate the stratification in the storage tanks. Furthermore, a dynamic model of the system was implemented in Dymola, and validated by the use of test data from the two experimental setups. This paper shows that there is a good consistency between the model and the experimental tests.",
+        "descriptionType": "Abstract"
+      }
+    ],
+    "doi": "10.18462/iir.icr.2015.0926",
+    "formats": [],
+    "fundingReferences": [],
+    "geoLocations": [],
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.18462/iir.icr.2015.0926",
+        "identifierType": "DOI"
+      }
+    ],
+    "isActive": true,
+    "language": "eng",
+    "metadataVersion": 0,
     "publicationYear": 2015,
+    "published": "2015",
+    "publisher": "International Institute of Refrigeration (IIR)",
+    "reason": null,
+    "registered": "2016-11-21T13:08:14.000Z",
+    "relatedIdentifiers": [],
+    "rightsList": [],
+    "schemaVersion": null,
+    "sizes": [],
+    "source": null,
+    "state": "findable",
     "subjects": [
       {
         "subject": "HEAT PUMP"
@@ -67,48 +90,24 @@
         "subject": "MODEL"
       }
     ],
-    "contributors": [],
-    "dates": [
+    "titles": [
       {
-        "date": "2015",
-        "dateType": "Issued"
+        "title": "High efficient heat pump system using storage tanks to increase cop by means of the ISEC concept. 1: model validation."
       }
     ],
-    "language": "eng",
     "types": {
-      "ris": "DATA",
       "bibtex": "misc",
       "citeproc": "dataset",
-      "schemaOrg": "Dataset",
       "resourceType": "Dataset",
-      "resourceTypeGeneral": "Dataset"
+      "resourceTypeGeneral": "Dataset",
+      "ris": "DATA",
+      "schemaOrg": "Dataset"
     },
-    "relatedIdentifiers": [],
-    "sizes": [],
-    "formats": [],
-    "version": null,
-    "rightsList": [],
-    "descriptions": [
-      {
-        "description": "The purpose of the ISEC concept is to provide a high-efficient heat pump system for hot water production. The ISEC concept uses two storage tanks for the water, one discharged and one charged. Hot water for the industrial process is tapped from the charged tank, while the other tank is charging. Charging is done by circulating the water in the tank through the condenser of a heat pump several times and thereby gradually heating the water. The charging is done with a higher mass flow rate than the discharging to reach several circulations of the water during the time frame of one discharging. This result in a lower condensing temperature than if the water was heated in one step. Two test setups were built, one to test the performance of the heat pump gradually heating the water and one to investigate the stratification in the storage tanks. Furthermore, a dynamic model of the system was implemented in Dymola, and validated by the use of test data from the two experimental setups. This paper shows that there is a good consistency between the model and the experimental tests.",
-        "descriptionType": "Abstract"
-      }
-    ],
-    "geoLocations": [],
-    "fundingReferences": [],
+    "updated": "2019-08-16T18:00:59.000Z",
     "url": "http://www.iifiir.org/clientBookline/service/reference.asp?INSTANCE=EXPLOITATION&OUTPUT=PORTAL&DOCID=IFD_REFDOC_0015008&DOCBASE=IFD_REFDOC_EN&SETLANGUAGE=EN",
-    "contentUrl": null,
-    "metadataVersion": 0,
-    "schemaVersion": null,
-    "source": null,
-    "isActive": true,
-    "state": "findable",
-    "reason": null,
-    "created": "2016-11-21T13:08:14.000Z",
-    "registered": "2016-11-21T13:08:14.000Z",
-    "published": "2015",
-    "updated": "2019-08-16T18:00:59.000Z"
+    "version": null
   },
+  "id": "10.18462/iir.icr.2015.0926",
   "relationships": {
     "client": {
       "data": {
@@ -116,5 +115,6 @@
         "type": "clients"
       }
     }
-  }
+  },
+  "type": "dois"
 }
diff --git a/python/tests/files/datacite/datacite_doc_08.json b/python/tests/files/datacite/datacite_doc_08.json
index e9170788..84f756e8 100644
--- a/python/tests/files/datacite/datacite_doc_08.json
+++ b/python/tests/files/datacite/datacite_doc_08.json
@@ -1,40 +1,63 @@
 {
-  "id": "10.22004/ag.econ.284864",
-  "type": "dois",
   "attributes": {
-    "doi": "10.22004/ag.econ.284864",
-    "identifiers": [
-      {
-        "identifier": "https://doi.org/10.22004/ag.econ.284864",
-        "identifierType": "DOI"
-      }
-    ],
+    "container": {},
+    "contentUrl": null,
+    "contributors": [],
+    "created": "2019-08-24T07:46:47.000Z",
     "creators": [
       {
-        "name": "Kajisa, Kei",
-        "nameType": "Personal",
-        "givenName": "Kei",
-        "familyName": "Kajisa",
         "affiliation": [],
-        "nameIdentifiers": []
+        "familyName": "Kajisa",
+        "givenName": "Kei",
+        "name": "Kajisa, Kei",
+        "nameIdentifiers": [],
+        "nameType": "Personal"
       },
       {
-        "name": "Kajisa, Kei",
-        "nameType": "Personal",
-        "givenName": "Kei",
-        "familyName": "Kajisa",
         "affiliation": [],
-        "nameIdentifiers": []
+        "familyName": "Kajisa",
+        "givenName": "Kei",
+        "name": "Kajisa, Kei",
+        "nameIdentifiers": [],
+        "nameType": "Personal"
       }
     ],
-    "titles": [
+    "dates": [
       {
-        "title": "Irrigation Policies under Rapid Industrialization and Labor Migration: Lessons from Japan, China and India"
+        "date": "2017",
+        "dateType": "Issued"
       }
     ],
-    "publisher": "Unknown",
-    "container": {},
+    "descriptions": [
+      {
+        "description": "International society recognizes that the scarcity of fresh water is increasing and farming sectors suffer from lack of irrigation water. However, if we look at this issue with a framework of relative factor endowment, a different view will arise. In emerging states with rapid industrialization and labor migration, labor scarcity increases at a faster pace than that of irrigation water. Using the historical review of Japan’s irrigation policies as well as the case studies of India and China, this paper shows that the introduction of policies which do not reflect the actual relative resource scarcity may mislead the development path. We argue that under increasing relative labor scarcity it is important to realize the substitution of capital for labor for surface irrigation system management and that the substitution needs public support because the service of surface irrigation system has some externalities. Through this argument, this paper also intends to shed the light back to the role of the state for local resource management which seems to be unfairly undervalued since the boom of community participatory approach in the 1980s.",
+        "descriptionType": "Abstract"
+      }
+    ],
+    "doi": "10.22004/ag.econ.284864",
+    "formats": [],
+    "fundingReferences": [],
+    "geoLocations": [],
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.22004/ag.econ.284864",
+        "identifierType": "DOI"
+      }
+    ],
+    "isActive": true,
+    "language": "eng",
+    "metadataVersion": 1,
     "publicationYear": 2017,
+    "published": "2017",
+    "publisher": "Unknown",
+    "reason": null,
+    "registered": "2019-08-24T07:46:47.000Z",
+    "relatedIdentifiers": [],
+    "rightsList": [],
+    "schemaVersion": null,
+    "sizes": [],
+    "source": "mds",
+    "state": "findable",
     "subjects": [
       {
         "subject": "Land Economics/Use"
@@ -52,48 +75,24 @@
         "subjectScheme": "keyword"
       }
     ],
-    "contributors": [],
-    "dates": [
+    "titles": [
       {
-        "date": "2017",
-        "dateType": "Issued"
+        "title": "Irrigation Policies under Rapid Industrialization and Labor Migration: Lessons from Japan, China and India"
       }
     ],
-    "language": "eng",
     "types": {
-      "ris": "RPRT",
       "bibtex": "article",
       "citeproc": "article-journal",
-      "schemaOrg": "ScholarlyArticle",
       "resourceType": "Text",
-      "resourceTypeGeneral": "Text"
+      "resourceTypeGeneral": "Text",
+      "ris": "RPRT",
+      "schemaOrg": "ScholarlyArticle"
     },
-    "relatedIdentifiers": [],
-    "sizes": [],
-    "formats": [],
-    "version": null,
-    "rightsList": [],
-    "descriptions": [
-      {
-        "description": "International society recognizes that the scarcity of fresh water is increasing and farming sectors suffer from lack of irrigation water. However, if we look at this issue with a framework of relative factor endowment, a different view will arise. In emerging states with rapid industrialization and labor migration, labor scarcity increases at a faster pace than that of irrigation water. Using the historical review of Japan’s irrigation policies as well as the case studies of India and China, this paper shows that the introduction of policies which do not reflect the actual relative resource scarcity may mislead the development path. We argue that under increasing relative labor scarcity it is important to realize the substitution of capital for labor for surface irrigation system management and that the substitution needs public support because the service of surface irrigation system has some externalities. Through this argument, this paper also intends to shed the light back to the role of the state for local resource management which seems to be unfairly undervalued since the boom of community participatory approach in the 1980s.",
-        "descriptionType": "Abstract"
-      }
-    ],
-    "geoLocations": [],
-    "fundingReferences": [],
+    "updated": "2019-08-25T09:38:33.000Z",
     "url": "https://ageconsearch.umn.edu/record/284864",
-    "contentUrl": null,
-    "metadataVersion": 1,
-    "schemaVersion": null,
-    "source": "mds",
-    "isActive": true,
-    "state": "findable",
-    "reason": null,
-    "created": "2019-08-24T07:46:47.000Z",
-    "registered": "2019-08-24T07:46:47.000Z",
-    "published": "2017",
-    "updated": "2019-08-25T09:38:33.000Z"
+    "version": null
   },
+  "id": "10.22004/ag.econ.284864",
   "relationships": {
     "client": {
       "data": {
@@ -101,5 +100,6 @@
         "type": "clients"
       }
     }
-  }
+  },
+  "type": "dois"
 }
diff --git a/python/tests/files/datacite/datacite_doc_09.json b/python/tests/files/datacite/datacite_doc_09.json
index d09af545..d6617d0d 100644
--- a/python/tests/files/datacite/datacite_doc_09.json
+++ b/python/tests/files/datacite/datacite_doc_09.json
@@ -1,8 +1,46 @@
 {
-  "id": "10.2314/gbv:880813733",
-  "type": "dois",
   "attributes": {
+    "container": {},
+    "contentUrl": null,
+    "contributors": [
+      {
+        "affiliation": [],
+        "contributorType": "HostingInstitution",
+        "name": "TIB-Technische Informationsbibliothek Universitätsbibliothek Hannover",
+        "nameIdentifiers": [],
+        "nameType": "Organizational"
+      },
+      {
+        "affiliation": [],
+        "contributorType": "DataManager",
+        "name": "Technische Informationsbibliothek (TIB)",
+        "nameIdentifiers": []
+      }
+    ],
+    "created": "2017-02-25T00:00:18.000Z",
+    "creators": [
+      {
+        "affiliation": [],
+        "familyName": "Kirstaedter",
+        "givenName": "Nils",
+        "name": "Kirstaedter, Nils",
+        "nameIdentifiers": [],
+        "nameType": "Personal"
+      }
+    ],
+    "dates": [
+      {
+        "date": "2016",
+        "dateType": "Issued"
+      }
+    ],
+    "descriptions": [],
     "doi": "10.2314/gbv:880813733",
+    "formats": [
+      "application/pdf"
+    ],
+    "fundingReferences": [],
+    "geoLocations": [],
     "identifiers": [
       {
         "identifier": "https://doi.org/10.2314/gbv:880813733",
@@ -29,32 +67,22 @@
         "identifierType": "ftx-id"
       }
     ],
-    "creators": [
-      {
-        "name": "Kirstaedter, Nils",
-        "nameType": "Personal",
-        "givenName": "Nils",
-        "familyName": "Kirstaedter",
-        "affiliation": [],
-        "nameIdentifiers": []
-      }
-    ],
-    "titles": [
-      {
-        "title": "BrightLas : TP3.3. Module für Direktdiodenstrahlquellen bis 4kW und Untersuchungen zur Leistungsskalierung (Diodemodul) : zum Verbundvorhaben Direktdiodenlaseranlagen und -systeme (VP3) im Förderschwerpunkt innovative regionale Wachstumskerne, BMBF : Abschlussbericht"
-      },
-      {
-        "title": "Module für Direktdiodenstrahlquellen bis 4kW und Untersuchungen zur Leistungsskalierung (Diodemodul)",
-        "titleType": "AlternativeTitle"
-      },
-      {
-        "title": "Direktdiodenlaseranlagen und -systeme (VP3)",
-        "titleType": "AlternativeTitle"
-      }
-    ],
-    "publisher": "[Lumics GmbH]",
-    "container": {},
+    "isActive": true,
+    "language": "de",
+    "metadataVersion": 9,
     "publicationYear": 2016,
+    "published": "2016",
+    "publisher": "[Lumics GmbH]",
+    "reason": null,
+    "registered": "2017-02-25T00:00:19.000Z",
+    "relatedIdentifiers": [],
+    "rightsList": [],
+    "schemaVersion": "http://datacite.org/schema/kernel-4",
+    "sizes": [
+      "1 Online-Ressource (10 Seiten, 1,40 MB)"
+    ],
+    "source": "mds",
+    "state": "findable",
     "subjects": [
       {
         "subject": "Direktdiodenlasersysteme"
@@ -64,61 +92,32 @@
         "subjectScheme": "linsearch"
       }
     ],
-    "contributors": [
+    "titles": [
       {
-        "name": "TIB-Technische Informationsbibliothek Universitätsbibliothek Hannover",
-        "nameType": "Organizational",
-        "affiliation": [],
-        "contributorType": "HostingInstitution",
-        "nameIdentifiers": []
+        "title": "BrightLas : TP3.3. Module für Direktdiodenstrahlquellen bis 4kW und Untersuchungen zur Leistungsskalierung (Diodemodul) : zum Verbundvorhaben Direktdiodenlaseranlagen und -systeme (VP3) im Förderschwerpunkt innovative regionale Wachstumskerne, BMBF : Abschlussbericht"
       },
       {
-        "name": "Technische Informationsbibliothek (TIB)",
-        "affiliation": [],
-        "contributorType": "DataManager",
-        "nameIdentifiers": []
-      }
-    ],
-    "dates": [
+        "title": "Module für Direktdiodenstrahlquellen bis 4kW und Untersuchungen zur Leistungsskalierung (Diodemodul)",
+        "titleType": "AlternativeTitle"
+      },
       {
-        "date": "2016",
-        "dateType": "Issued"
+        "title": "Direktdiodenlaseranlagen und -systeme (VP3)",
+        "titleType": "AlternativeTitle"
       }
     ],
-    "language": "de",
     "types": {
-      "ris": "RPRT",
       "bibtex": "article",
       "citeproc": "report",
-      "schemaOrg": "ScholarlyArticle",
       "resourceType": "Report",
-      "resourceTypeGeneral": "Text"
+      "resourceTypeGeneral": "Text",
+      "ris": "RPRT",
+      "schemaOrg": "ScholarlyArticle"
     },
-    "relatedIdentifiers": [],
-    "sizes": [
-      "1 Online-Ressource (10 Seiten, 1,40 MB)"
-    ],
-    "formats": [
-      "application/pdf"
-    ],
-    "version": "1.0",
-    "rightsList": [],
-    "descriptions": [],
-    "geoLocations": [],
-    "fundingReferences": [],
+    "updated": "2019-08-03T05:53:51.000Z",
     "url": "https://www.tib.eu/suchen/id/TIBKAT:880813733/",
-    "contentUrl": null,
-    "metadataVersion": 9,
-    "schemaVersion": "http://datacite.org/schema/kernel-4",
-    "source": "mds",
-    "isActive": true,
-    "state": "findable",
-    "reason": null,
-    "created": "2017-02-25T00:00:18.000Z",
-    "registered": "2017-02-25T00:00:19.000Z",
-    "published": "2016",
-    "updated": "2019-08-03T05:53:51.000Z"
+    "version": "1.0"
   },
+  "id": "10.2314/gbv:880813733",
   "relationships": {
     "client": {
       "data": {
@@ -126,5 +125,6 @@
         "type": "clients"
       }
     }
-  }
+  },
+  "type": "dois"
 }
diff --git a/python/tests/files/datacite/datacite_doc_10.json b/python/tests/files/datacite/datacite_doc_10.json
index d40fc272..154242cb 100644
--- a/python/tests/files/datacite/datacite_doc_10.json
+++ b/python/tests/files/datacite/datacite_doc_10.json
@@ -1,28 +1,50 @@
 {
-  "id": "10.25549/wpacards-m6171",
-  "type": "dois",
   "attributes": {
-    "doi": "10.25549/wpacards-m6171",
-    "identifiers": [
+    "container": {},
+    "contentUrl": null,
+    "contributors": [],
+    "created": "2018-09-09T08:32:09.000Z",
+    "creators": [
       {
-        "identifier": "https://doi.org/10.25549/wpacards-m6171",
-        "identifierType": "DOI"
+        "affiliation": [],
+        "name": "Unknown"
       }
     ],
-    "creators": [
+    "dates": [
       {
-        "name": "Unknown",
-        "affiliation": []
+        "date": "2012",
+        "dateType": "Issued"
       }
     ],
-    "titles": [
+    "descriptions": [
       {
-        "title": "WPA household census for 210 E VERNON, Los Angeles"
+        "descriptionType": "Abstract"
       }
     ],
-    "publisher": "University of Southern California Digital Library (USC.DL)",
-    "container": {},
+    "doi": "10.25549/wpacards-m6171",
+    "formats": [],
+    "fundingReferences": [],
+    "geoLocations": [],
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.25549/wpacards-m6171",
+        "identifierType": "DOI"
+      }
+    ],
+    "isActive": true,
+    "language": "eng",
+    "metadataVersion": 0,
     "publicationYear": 2012,
+    "published": "2012",
+    "publisher": "University of Southern California Digital Library (USC.DL)",
+    "reason": null,
+    "registered": "2018-09-09T08:33:10.000Z",
+    "relatedIdentifiers": [],
+    "rightsList": [],
+    "schemaVersion": "http://datacite.org/schema/kernel-4",
+    "sizes": [],
+    "source": "mds",
+    "state": "findable",
     "subjects": [
       {
         "subject": "housing areas"
@@ -31,47 +53,24 @@
         "subject": "Dwellings"
       }
     ],
-    "contributors": [],
-    "dates": [
+    "titles": [
       {
-        "date": "2012",
-        "dateType": "Issued"
+        "title": "WPA household census for 210 E VERNON, Los Angeles"
       }
     ],
-    "language": "eng",
     "types": {
-      "ris": "DATA",
       "bibtex": "misc",
       "citeproc": "dataset",
-      "schemaOrg": "Dataset",
       "resourceType": "Dataset",
-      "resourceTypeGeneral": "Dataset"
+      "resourceTypeGeneral": "Dataset",
+      "ris": "DATA",
+      "schemaOrg": "Dataset"
     },
-    "relatedIdentifiers": [],
-    "sizes": [],
-    "formats": [],
-    "version": null,
-    "rightsList": [],
-    "descriptions": [
-      {
-        "descriptionType": "Abstract"
-      }
-    ],
-    "geoLocations": [],
-    "fundingReferences": [],
+    "updated": "2019-08-02T20:03:32.000Z",
     "url": "http://digitallibrary.usc.edu/cdm/ref/collection/p15799coll8/id/2608",
-    "contentUrl": null,
-    "metadataVersion": 0,
-    "schemaVersion": "http://datacite.org/schema/kernel-4",
-    "source": "mds",
-    "isActive": true,
-    "state": "findable",
-    "reason": null,
-    "created": "2018-09-09T08:32:09.000Z",
-    "registered": "2018-09-09T08:33:10.000Z",
-    "published": "2012",
-    "updated": "2019-08-02T20:03:32.000Z"
+    "version": null
   },
+  "id": "10.25549/wpacards-m6171",
   "relationships": {
     "client": {
       "data": {
@@ -79,5 +78,6 @@
         "type": "clients"
       }
     }
-  }
+  },
+  "type": "dois"
 }
diff --git a/python/tests/files/datacite/datacite_doc_11.json b/python/tests/files/datacite/datacite_doc_11.json
index 50fe8363..80194762 100644
--- a/python/tests/files/datacite/datacite_doc_11.json
+++ b/python/tests/files/datacite/datacite_doc_11.json
@@ -1,30 +1,15 @@
 {
-  "id": "10.3932/ethz-a-000055869",
-  "type": "dois",
   "attributes": {
-    "doi": "10.3932/ethz-a-000055869",
-    "identifiers": [
-      {
-        "identifier": "https://doi.org/10.3932/ethz-a-000055869",
-        "identifierType": "DOI"
-      }
-    ],
+    "container": {},
+    "contentUrl": null,
+    "contributors": [],
+    "created": "2019-03-04T23:56:42.000Z",
     "creators": [
       {
-        "name": "Comet Photo AG (Zürich)",
-        "affiliation": []
-      }
-    ],
-    "titles": [
-      {
-        "title": "N1 bei Safenwil"
+        "affiliation": [],
+        "name": "Comet Photo AG (Zürich)"
       }
     ],
-    "publisher": "ETH-Bibliothek Zürich, Bildarchiv",
-    "container": {},
-    "publicationYear": 1965,
-    "subjects": [],
-    "contributors": [],
     "dates": [
       {
         "date": "1965",
@@ -35,21 +20,6 @@
         "dateType": "Issued"
       }
     ],
-    "language": "de",
-    "types": {
-      "ris": "FIGURE",
-      "bibtex": "misc",
-      "citeproc": "graphic",
-      "schemaOrg": "ImageObject",
-      "resourceTypeGeneral": "Image"
-    },
-    "relatedIdentifiers": [],
-    "sizes": [],
-    "formats": [
-      "TIFF-Bild"
-    ],
-    "version": null,
-    "rightsList": [],
     "descriptions": [
       {
         "description": "Download und Nutzung frei",
@@ -60,21 +30,50 @@
         "descriptionType": "Other"
       }
     ],
-    "geoLocations": [],
+    "doi": "10.3932/ethz-a-000055869",
+    "formats": [
+      "TIFF-Bild"
+    ],
     "fundingReferences": [],
-    "url": "http://ba.e-pics.ethz.ch/link.jsp?id=44861",
-    "contentUrl": null,
+    "geoLocations": [],
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.3932/ethz-a-000055869",
+        "identifierType": "DOI"
+      }
+    ],
+    "isActive": true,
+    "language": "de",
     "metadataVersion": 6,
+    "publicationYear": 1965,
+    "published": "1965",
+    "publisher": "ETH-Bibliothek Zürich, Bildarchiv",
+    "reason": null,
+    "registered": "2019-07-30T13:17:45.000Z",
+    "relatedIdentifiers": [],
+    "rightsList": [],
     "schemaVersion": "http://datacite.org/schema/kernel-3",
+    "sizes": [],
     "source": "mds",
-    "isActive": true,
     "state": "findable",
-    "reason": null,
-    "created": "2019-03-04T23:56:42.000Z",
-    "registered": "2019-07-30T13:17:45.000Z",
-    "published": "1965",
-    "updated": "2019-08-02T22:08:26.000Z"
+    "subjects": [],
+    "titles": [
+      {
+        "title": "N1 bei Safenwil"
+      }
+    ],
+    "types": {
+      "bibtex": "misc",
+      "citeproc": "graphic",
+      "resourceTypeGeneral": "Image",
+      "ris": "FIGURE",
+      "schemaOrg": "ImageObject"
+    },
+    "updated": "2019-08-02T22:08:26.000Z",
+    "url": "http://ba.e-pics.ethz.ch/link.jsp?id=44861",
+    "version": null
   },
+  "id": "10.3932/ethz-a-000055869",
   "relationships": {
     "client": {
       "data": {
@@ -82,5 +81,6 @@
         "type": "clients"
       }
     }
-  }
+  },
+  "type": "dois"
 }
diff --git a/python/tests/files/datacite/datacite_doc_12.json b/python/tests/files/datacite/datacite_doc_12.json
index 31c0f0ca..642011d5 100644
--- a/python/tests/files/datacite/datacite_doc_12.json
+++ b/python/tests/files/datacite/datacite_doc_12.json
@@ -1,58 +1,43 @@
 {
-  "id": "10.5167/uzh-171449",
-  "type": "dois",
   "attributes": {
-    "doi": "10.5167/uzh-171449",
-    "identifiers": [
-      {
-        "identifier": "https://doi.org/10.5167/uzh-171449",
-        "identifierType": "DOI"
-      }
-    ],
+    "container": {},
+    "contentUrl": null,
+    "contributors": [],
+    "created": "2019-06-27T01:01:35.000Z",
     "creators": [
       {
-        "name": "Spanias, Charalampos",
-        "nameType": "Personal",
-        "givenName": "Charalampos",
-        "familyName": "Spanias",
         "affiliation": [],
-        "nameIdentifiers": []
+        "familyName": "Spanias",
+        "givenName": "Charalampos",
+        "name": "Spanias, Charalampos",
+        "nameIdentifiers": [],
+        "nameType": "Personal"
       },
       {
-        "name": "Nikolaidis, Pantelis T",
-        "nameType": "Personal",
-        "givenName": "Pantelis T",
-        "familyName": "Nikolaidis",
         "affiliation": [],
-        "nameIdentifiers": []
+        "familyName": "Nikolaidis",
+        "givenName": "Pantelis T",
+        "name": "Nikolaidis, Pantelis T",
+        "nameIdentifiers": [],
+        "nameType": "Personal"
       },
       {
-        "name": "Rosemann, Thomas",
-        "nameType": "Personal",
-        "givenName": "Thomas",
-        "familyName": "Rosemann",
         "affiliation": [],
-        "nameIdentifiers": []
+        "familyName": "Rosemann",
+        "givenName": "Thomas",
+        "name": "Rosemann, Thomas",
+        "nameIdentifiers": [],
+        "nameType": "Personal"
       },
       {
-        "name": "Knechtle, Beat",
-        "nameType": "Personal",
-        "givenName": "Beat",
-        "familyName": "Knechtle",
         "affiliation": [],
-        "nameIdentifiers": []
-      }
-    ],
-    "titles": [
-      {
-        "title": "Anthropometric and Physiological Profile of Mixed Martial Art Athletes: A Brief Review"
+        "familyName": "Knechtle",
+        "givenName": "Beat",
+        "name": "Knechtle, Beat",
+        "nameIdentifiers": [],
+        "nameType": "Personal"
       }
     ],
-    "publisher": "MDPI Publishing",
-    "container": {},
-    "publicationYear": 2019,
-    "subjects": [],
-    "contributors": [],
     "dates": [
       {
         "date": "2019-06-14",
@@ -63,35 +48,49 @@
         "dateType": "Issued"
       }
     ],
-    "language": null,
-    "types": {
-      "ris": "RPRT",
-      "bibtex": "article",
-      "citeproc": "article-journal",
-      "schemaOrg": "ScholarlyArticle",
-      "resourceTypeGeneral": "Text"
-    },
-    "relatedIdentifiers": [],
-    "sizes": [],
-    "formats": [],
-    "version": null,
-    "rightsList": [],
     "descriptions": [],
-    "geoLocations": [],
+    "doi": "10.5167/uzh-171449",
+    "formats": [],
     "fundingReferences": [],
-    "url": "https://www.zora.uzh.ch/id/eprint/171449",
-    "contentUrl": null,
+    "geoLocations": [],
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.5167/uzh-171449",
+        "identifierType": "DOI"
+      }
+    ],
+    "isActive": true,
+    "language": null,
     "metadataVersion": 0,
+    "publicationYear": 2019,
+    "published": "2019",
+    "publisher": "MDPI Publishing",
+    "reason": null,
+    "registered": "2019-06-27T01:01:36.000Z",
+    "relatedIdentifiers": [],
+    "rightsList": [],
     "schemaVersion": null,
+    "sizes": [],
     "source": "mds",
-    "isActive": true,
     "state": "findable",
-    "reason": null,
-    "created": "2019-06-27T01:01:35.000Z",
-    "registered": "2019-06-27T01:01:36.000Z",
-    "published": "2019",
-    "updated": "2019-09-26T16:44:24.000Z"
+    "subjects": [],
+    "titles": [
+      {
+        "title": "Anthropometric and Physiological Profile of Mixed Martial Art Athletes: A Brief Review"
+      }
+    ],
+    "types": {
+      "bibtex": "article",
+      "citeproc": "article-journal",
+      "resourceTypeGeneral": "Text",
+      "ris": "RPRT",
+      "schemaOrg": "ScholarlyArticle"
+    },
+    "updated": "2019-09-26T16:44:24.000Z",
+    "url": "https://www.zora.uzh.ch/id/eprint/171449",
+    "version": null
   },
+  "id": "10.5167/uzh-171449",
   "relationships": {
     "client": {
       "data": {
@@ -99,5 +98,6 @@
         "type": "clients"
       }
     }
-  }
+  },
+  "type": "dois"
 }
diff --git a/python/tests/files/datacite/datacite_doc_13.json b/python/tests/files/datacite/datacite_doc_13.json
index ff6eb229..0cada273 100644
--- a/python/tests/files/datacite/datacite_doc_13.json
+++ b/python/tests/files/datacite/datacite_doc_13.json
@@ -1,37 +1,22 @@
 {
-  "id": "10.5169/seals-314104",
-  "type": "dois",
   "attributes": {
-    "doi": "10.5169/seals-314104",
-    "identifiers": [
-      {
-        "identifier": "https://doi.org/10.5169/seals-314104",
-        "identifierType": "DOI"
-      }
-    ],
+    "container": {},
+    "contentUrl": null,
+    "contributors": [],
+    "created": "2013-03-22T14:02:08.000Z",
     "creators": [
       {
-        "name": "O.M.",
-        "affiliation": []
+        "affiliation": [],
+        "name": "O.M."
       },
       {
-        "name": "Hiltbrunner, Hermann",
-        "nameType": "Personal",
-        "givenName": "Hermann",
+        "affiliation": [],
         "familyName": "Hiltbrunner",
-        "affiliation": []
-      }
-    ],
-    "titles": [
-      {
-        "title": "[Müssen wir des Glücks uns schämen?]"
+        "givenName": "Hermann",
+        "name": "Hiltbrunner, Hermann",
+        "nameType": "Personal"
       }
     ],
-    "publisher": "Buchdruckerei Büchler & Co.",
-    "container": {},
-    "publicationYear": 1940,
-    "subjects": [],
-    "contributors": [],
     "dates": [
       {
         "date": "1940-10-05",
@@ -42,39 +27,53 @@
         "dateType": "Issued"
       }
     ],
-    "language": null,
-    "types": {
-      "ris": "JOUR",
-      "bibtex": "article",
-      "citeproc": "article-journal",
-      "schemaOrg": "ScholarlyArticle",
-      "resourceType": "Journal Article",
-      "resourceTypeGeneral": "Text"
-    },
-    "relatedIdentifiers": [],
-    "sizes": [],
+    "descriptions": [],
+    "doi": "10.5169/seals-314104",
     "formats": [
       "text/html",
       "application/pdf"
     ],
-    "version": null,
-    "rightsList": [],
-    "descriptions": [],
-    "geoLocations": [],
     "fundingReferences": [],
-    "url": "https://www.e-periodica.ch/digbib/view?pid=sle-001:1940-1941:45::13",
-    "contentUrl": null,
+    "geoLocations": [],
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.5169/seals-314104",
+        "identifierType": "DOI"
+      }
+    ],
+    "isActive": true,
+    "language": null,
     "metadataVersion": 17,
+    "publicationYear": 1940,
+    "published": "1940",
+    "publisher": "Buchdruckerei Büchler & Co.",
+    "reason": null,
+    "registered": "2013-03-22T13:58:11.000Z",
+    "relatedIdentifiers": [],
+    "rightsList": [],
     "schemaVersion": "http://datacite.org/schema/kernel-3",
+    "sizes": [],
     "source": null,
-    "isActive": true,
     "state": "findable",
-    "reason": null,
-    "created": "2013-03-22T14:02:08.000Z",
-    "registered": "2013-03-22T13:58:11.000Z",
-    "published": "1940",
-    "updated": "2019-08-02T02:22:55.000Z"
+    "subjects": [],
+    "titles": [
+      {
+        "title": "[Müssen wir des Glücks uns schämen?]"
+      }
+    ],
+    "types": {
+      "bibtex": "article",
+      "citeproc": "article-journal",
+      "resourceType": "Journal Article",
+      "resourceTypeGeneral": "Text",
+      "ris": "JOUR",
+      "schemaOrg": "ScholarlyArticle"
+    },
+    "updated": "2019-08-02T02:22:55.000Z",
+    "url": "https://www.e-periodica.ch/digbib/view?pid=sle-001:1940-1941:45::13",
+    "version": null
   },
+  "id": "10.5169/seals-314104",
   "relationships": {
     "client": {
       "data": {
@@ -82,5 +81,6 @@
         "type": "clients"
       }
     }
-  }
+  },
+  "type": "dois"
 }
diff --git a/python/tests/files/datacite/datacite_doc_14.json b/python/tests/files/datacite/datacite_doc_14.json
index b1e1ebf2..c0911819 100644
--- a/python/tests/files/datacite/datacite_doc_14.json
+++ b/python/tests/files/datacite/datacite_doc_14.json
@@ -1,84 +1,119 @@
 {
-  "id": "10.5517/cc7gns3",
-  "type": "dois",
   "attributes": {
-    "doi": "10.5517/cc7gns3",
-    "identifiers": [
-      {
-        "identifier": "https://doi.org/10.5517/cc7gns3",
-        "identifierType": "DOI"
-      },
-      {
-        "identifier": "222635",
-        "identifierType": "CCDC"
-      }
-    ],
+    "container": {},
+    "contentUrl": null,
+    "contributors": [],
+    "created": "2014-03-18T07:28:28.000Z",
     "creators": [
       {
-        "name": "Stulz, E.",
-        "nameType": "Personal",
-        "givenName": "E.",
+        "affiliation": [],
         "familyName": "Stulz",
-        "affiliation": []
+        "givenName": "E.",
+        "name": "Stulz, E.",
+        "nameType": "Personal"
       },
       {
-        "name": "Scott, S.M.",
-        "nameType": "Personal",
-        "givenName": "S.M.",
+        "affiliation": [],
         "familyName": "Scott",
-        "affiliation": []
+        "givenName": "S.M.",
+        "name": "Scott, S.M.",
+        "nameType": "Personal"
       },
       {
-        "name": "Ng, Yiu-Fai",
-        "nameType": "Personal",
-        "givenName": "Yiu-Fai",
+        "affiliation": [],
         "familyName": "Ng",
-        "affiliation": []
+        "givenName": "Yiu-Fai",
+        "name": "Ng, Yiu-Fai",
+        "nameType": "Personal"
       },
       {
-        "name": "Bond, A.D.",
-        "nameType": "Personal",
-        "givenName": "A.D.",
+        "affiliation": [],
         "familyName": "Bond",
-        "affiliation": []
+        "givenName": "A.D.",
+        "name": "Bond, A.D.",
+        "nameType": "Personal"
       },
       {
-        "name": "Teat, S.J.",
-        "nameType": "Personal",
-        "givenName": "S.J.",
+        "affiliation": [],
         "familyName": "Teat",
-        "affiliation": []
+        "givenName": "S.J.",
+        "name": "Teat, S.J.",
+        "nameType": "Personal"
       },
       {
-        "name": "Darling, S.L.",
-        "nameType": "Personal",
-        "givenName": "S.L.",
+        "affiliation": [],
         "familyName": "Darling",
-        "affiliation": []
+        "givenName": "S.L.",
+        "name": "Darling, S.L.",
+        "nameType": "Personal"
       },
       {
-        "name": "Feeder, N.",
-        "nameType": "Personal",
-        "givenName": "N.",
+        "affiliation": [],
         "familyName": "Feeder",
-        "affiliation": []
+        "givenName": "N.",
+        "name": "Feeder, N.",
+        "nameType": "Personal"
       },
       {
-        "name": "Sanders, J.K.M.",
-        "nameType": "Personal",
-        "givenName": "J.K.M.",
+        "affiliation": [],
         "familyName": "Sanders",
-        "affiliation": []
+        "givenName": "J.K.M.",
+        "name": "Sanders, J.K.M.",
+        "nameType": "Personal"
       }
     ],
-    "titles": [
+    "dates": [
       {
-        "title": "CCDC 222635: Experimental Crystal Structure Determination"
+        "date": "2004",
+        "dateType": "Issued"
       }
     ],
-    "publisher": "Cambridge Crystallographic Data Centre",
-    "container": {},
+    "descriptions": [
+      {
+        "description": "Related Article: E.Stulz, S.M.Scott, Yiu-Fai Ng, A.D.Bond, S.J.Teat, S.L.Darling, N.Feeder, J.K.M.Sanders|2003|Inorg.Chem.|42|6564|doi:10.1021/ic034699w",
+        "descriptionType": "Other"
+      },
+      {
+        "description": "An entry from the Cambridge Structural Database, the world’s repository for small molecule crystal structures. The entry contains experimental data from a crystal diffraction study. The deposited dataset for this entry is freely available from the CCDC and typically includes 3D coordinates, cell parameters, space group, experimental conditions and quality measures.",
+        "descriptionType": "Abstract"
+      }
+    ],
+    "doi": "10.5517/cc7gns3",
+    "formats": [
+      "CIF"
+    ],
+    "fundingReferences": [],
+    "geoLocations": [],
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.5517/cc7gns3",
+        "identifierType": "DOI"
+      },
+      {
+        "identifier": "222635",
+        "identifierType": "CCDC"
+      }
+    ],
+    "isActive": true,
+    "language": "eng",
+    "metadataVersion": 2,
     "publicationYear": 2004,
+    "published": "2004",
+    "publisher": "Cambridge Crystallographic Data Centre",
+    "reason": null,
+    "registered": "2014-03-18T07:28:29.000Z",
+    "relatedIdentifiers": [
+      {
+        "relatedIdentifier": "10.1021/ic034699w",
+        "relatedIdentifierType": "DOI",
+        "relationType": "IsSupplementTo"
+      }
+    ],
+    "rightsList": [],
+    "schemaVersion": "http://datacite.org/schema/kernel-3",
+    "sizes": [],
+    "source": null,
+    "state": "findable",
     "subjects": [
       {
         "subject": "Crystal Structure"
@@ -102,59 +137,23 @@
         "subject": "bis(mu~2~-5-(3,5-Di-t-butylphenyl)-15-(4-(2-(diphenylphosphino)ethynyl)phenyl)-2,8,12,18-tetrahexyl-3,7,13,17-tetramethylporphyrinato)-(5,15-bis(3,5-di-t-butylphenyl)-2,8,12,18-tetraethyl-3,7,13,17-tetramethylporphyrinato)-di-nickel-ruthenium chloroform solvate"
       }
     ],
-    "contributors": [],
-    "dates": [
+    "titles": [
       {
-        "date": "2004",
-        "dateType": "Issued"
+        "title": "CCDC 222635: Experimental Crystal Structure Determination"
       }
     ],
-    "language": "eng",
     "types": {
-      "ris": "DATA",
       "bibtex": "misc",
       "citeproc": "dataset",
-      "schemaOrg": "Dataset",
-      "resourceTypeGeneral": "Dataset"
+      "resourceTypeGeneral": "Dataset",
+      "ris": "DATA",
+      "schemaOrg": "Dataset"
     },
-    "relatedIdentifiers": [
-      {
-        "relationType": "IsSupplementTo",
-        "relatedIdentifier": "10.1021/ic034699w",
-        "relatedIdentifierType": "DOI"
-      }
-    ],
-    "sizes": [],
-    "formats": [
-      "CIF"
-    ],
-    "version": null,
-    "rightsList": [],
-    "descriptions": [
-      {
-        "description": "Related Article: E.Stulz, S.M.Scott, Yiu-Fai Ng, A.D.Bond, S.J.Teat, S.L.Darling, N.Feeder, J.K.M.Sanders|2003|Inorg.Chem.|42|6564|doi:10.1021/ic034699w",
-        "descriptionType": "Other"
-      },
-      {
-        "description": "An entry from the Cambridge Structural Database, the world’s repository for small molecule crystal structures. The entry contains experimental data from a crystal diffraction study. The deposited dataset for this entry is freely available from the CCDC and typically includes 3D coordinates, cell parameters, space group, experimental conditions and quality measures.",
-        "descriptionType": "Abstract"
-      }
-    ],
-    "geoLocations": [],
-    "fundingReferences": [],
+    "updated": "2019-08-02T03:38:32.000Z",
     "url": "http://www.ccdc.cam.ac.uk/services/structure_request?id=doi:10.5517/cc7gns3&sid=DataCite",
-    "contentUrl": null,
-    "metadataVersion": 2,
-    "schemaVersion": "http://datacite.org/schema/kernel-3",
-    "source": null,
-    "isActive": true,
-    "state": "findable",
-    "reason": null,
-    "created": "2014-03-18T07:28:28.000Z",
-    "registered": "2014-03-18T07:28:29.000Z",
-    "published": "2004",
-    "updated": "2019-08-02T03:38:32.000Z"
+    "version": null
   },
+  "id": "10.5517/cc7gns3",
   "relationships": {
     "client": {
       "data": {
@@ -162,5 +161,6 @@
         "type": "clients"
       }
     }
-  }
+  },
+  "type": "dois"
 }
diff --git a/python/tests/files/datacite/datacite_doc_15.json b/python/tests/files/datacite/datacite_doc_15.json
index 5b4ee8ec..8dc67267 100644
--- a/python/tests/files/datacite/datacite_doc_15.json
+++ b/python/tests/files/datacite/datacite_doc_15.json
@@ -1,8 +1,29 @@
 {
-  "id": "10.6073/pasta/95296d8416aae24f3d39b4ecb27f0b28",
-  "type": "dois",
   "attributes": {
+    "container": {},
+    "contentUrl": null,
+    "contributors": [],
+    "created": "2017-02-01T18:20:04.000Z",
+    "creators": [
+      {
+        "affiliation": [],
+        "familyName": "Richardson",
+        "givenName": "David",
+        "name": "Richardson, David",
+        "nameType": "Personal"
+      }
+    ],
+    "dates": [
+      {
+        "date": "2017",
+        "dateType": "Issued"
+      }
+    ],
+    "descriptions": [],
     "doi": "10.6073/pasta/95296d8416aae24f3d39b4ecb27f0b28",
+    "formats": [],
+    "fundingReferences": [],
+    "geoLocations": [],
     "identifiers": [
       {
         "identifier": "https://doi.org/10.6073/pasta/95296d8416aae24f3d39b4ecb27f0b28",
@@ -13,61 +34,39 @@
         "identifierType": "URL"
       }
     ],
-    "creators": [
-      {
-        "name": "Richardson, David",
-        "nameType": "Personal",
-        "givenName": "David",
-        "familyName": "Richardson",
-        "affiliation": []
-      }
-    ],
-    "titles": [
-      {
-        "title": "Parramore Island of the Virginia Coast Reserve Permanent Plot Resurvey: Tree data 1997"
-      }
-    ],
-    "publisher": "Environmental Data Initiative",
-    "container": {},
+    "isActive": true,
+    "language": null,
+    "metadataVersion": 1,
     "publicationYear": 2017,
+    "published": "2017",
+    "publisher": "Environmental Data Initiative",
+    "reason": null,
+    "registered": "2017-02-01T18:20:05.000Z",
+    "relatedIdentifiers": [],
+    "rightsList": [],
+    "schemaVersion": "http://datacite.org/schema/kernel-2.2",
+    "sizes": [],
+    "source": null,
+    "state": "findable",
     "subjects": [],
-    "contributors": [],
-    "dates": [
+    "titles": [
       {
-        "date": "2017",
-        "dateType": "Issued"
+        "title": "Parramore Island of the Virginia Coast Reserve Permanent Plot Resurvey: Tree data 1997"
       }
     ],
-    "language": null,
     "types": {
-      "ris": "DATA",
       "bibtex": "misc",
       "citeproc": "dataset",
-      "schemaOrg": "Dataset",
       "resourceType": "dataPackage",
-      "resourceTypeGeneral": "Dataset"
+      "resourceTypeGeneral": "Dataset",
+      "ris": "DATA",
+      "schemaOrg": "Dataset"
     },
-    "relatedIdentifiers": [],
-    "sizes": [],
-    "formats": [],
-    "version": null,
-    "rightsList": [],
-    "descriptions": [],
-    "geoLocations": [],
-    "fundingReferences": [],
+    "updated": "2019-08-02T14:16:49.000Z",
     "url": "https://portal.lternet.edu/nis/mapbrowse?packageid=knb-lter-vcr.102.16",
-    "contentUrl": null,
-    "metadataVersion": 1,
-    "schemaVersion": "http://datacite.org/schema/kernel-2.2",
-    "source": null,
-    "isActive": true,
-    "state": "findable",
-    "reason": null,
-    "created": "2017-02-01T18:20:04.000Z",
-    "registered": "2017-02-01T18:20:05.000Z",
-    "published": "2017",
-    "updated": "2019-08-02T14:16:49.000Z"
+    "version": null
   },
+  "id": "10.6073/pasta/95296d8416aae24f3d39b4ecb27f0b28",
   "relationships": {
     "client": {
       "data": {
@@ -75,5 +74,6 @@
         "type": "clients"
       }
     }
-  }
+  },
+  "type": "dois"
 }
diff --git a/python/tests/files/datacite/datacite_doc_16.json b/python/tests/files/datacite/datacite_doc_16.json
index 5af7fbe1..72ad59ac 100644
--- a/python/tests/files/datacite/datacite_doc_16.json
+++ b/python/tests/files/datacite/datacite_doc_16.json
@@ -1,74 +1,73 @@
 {
-  "id": "10.6084/m9.figshare.1282478",
-  "type": "dois",
   "attributes": {
-    "doi": "10.6084/m9.figshare.1282478",
-    "identifiers": [
-      {
-        "identifier": "https://doi.org/10.6084/m9.figshare.1282478",
-        "identifierType": "DOI"
-      }
-    ],
+    "container": {},
+    "contentUrl": null,
+    "contributors": [],
+    "created": "2014-12-31T15:38:16.000Z",
     "creators": [
       {
-        "name": "Sochi, Taha",
-        "nameType": "Personal",
-        "givenName": "Taha",
+        "affiliation": [],
         "familyName": "Sochi",
-        "affiliation": []
-      }
-    ],
-    "titles": [
-      {
-        "title": "Testing the Connectivity of Networks"
+        "givenName": "Taha",
+        "name": "Sochi, Taha",
+        "nameType": "Personal"
       }
     ],
-    "publisher": "Figshare",
-    "container": {},
-    "publicationYear": 2014,
-    "subjects": [],
-    "contributors": [],
     "dates": [
       {
         "date": "2014",
         "dateType": "Issued"
       }
     ],
+    "descriptions": [],
+    "doi": "10.6084/m9.figshare.1282478",
+    "formats": [],
+    "fundingReferences": [],
+    "geoLocations": [],
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.6084/m9.figshare.1282478",
+        "identifierType": "DOI"
+      }
+    ],
+    "isActive": true,
     "language": null,
-    "types": {
-      "ris": "DATA",
-      "bibtex": "misc",
-      "citeproc": "dataset",
-      "schemaOrg": "Dataset",
-      "resourceType": "Paper",
-      "resourceTypeGeneral": "Dataset"
-    },
+    "metadataVersion": 0,
+    "publicationYear": 2014,
+    "published": "2014",
+    "publisher": "Figshare",
+    "reason": null,
+    "registered": "2014-12-31T15:38:18.000Z",
     "relatedIdentifiers": [],
-    "sizes": [],
-    "formats": [],
-    "version": null,
     "rightsList": [
       {
         "rights": "CC-BY",
         "rightsUri": "http://creativecommons.org/licenses/by/3.0/us"
       }
     ],
-    "descriptions": [],
-    "geoLocations": [],
-    "fundingReferences": [],
-    "url": "http://figshare.com/articles/Testing_the_Connectivity_of_Networks/1282478",
-    "contentUrl": null,
-    "metadataVersion": 0,
     "schemaVersion": "http://datacite.org/schema/kernel-3",
+    "sizes": [],
     "source": null,
-    "isActive": true,
     "state": "findable",
-    "reason": null,
-    "created": "2014-12-31T15:38:16.000Z",
-    "registered": "2014-12-31T15:38:18.000Z",
-    "published": "2014",
-    "updated": "2019-08-02T04:52:11.000Z"
+    "subjects": [],
+    "titles": [
+      {
+        "title": "Testing the Connectivity of Networks"
+      }
+    ],
+    "types": {
+      "bibtex": "misc",
+      "citeproc": "dataset",
+      "resourceType": "Paper",
+      "resourceTypeGeneral": "Dataset",
+      "ris": "DATA",
+      "schemaOrg": "Dataset"
+    },
+    "updated": "2019-08-02T04:52:11.000Z",
+    "url": "http://figshare.com/articles/Testing_the_Connectivity_of_Networks/1282478",
+    "version": null
   },
+  "id": "10.6084/m9.figshare.1282478",
   "relationships": {
     "client": {
       "data": {
@@ -76,5 +75,6 @@
         "type": "clients"
       }
     }
-  }
+  },
+  "type": "dois"
 }
diff --git a/python/tests/files/datacite/datacite_doc_17.json b/python/tests/files/datacite/datacite_doc_17.json
index f1363a61..93ec715e 100644
--- a/python/tests/files/datacite/datacite_doc_17.json
+++ b/python/tests/files/datacite/datacite_doc_17.json
@@ -1,66 +1,65 @@
 {
-  "id": "10.7910/dvn/tsqfwc/yytj22",
-  "type": "dois",
   "attributes": {
-    "doi": "10.7910/dvn/tsqfwc/yytj22",
-    "identifiers": [
+    "container": {},
+    "contentUrl": null,
+    "contributors": [],
+    "created": "2018-08-22T17:36:10.000Z",
+    "creators": [
       {
-        "identifier": "https://doi.org/10.7910/dvn/tsqfwc/yytj22",
-        "identifierType": "DOI"
+        "affiliation": [],
+        "name": "Di Giovanna, Antonino Paolo (University Of Florence)",
+        "nameType": "Personal"
       }
     ],
-    "creators": [
+    "dates": [
       {
-        "name": "Di Giovanna, Antonino Paolo (University Of Florence)",
-        "nameType": "Personal",
-        "affiliation": []
+        "date": "2018",
+        "dateType": "Issued"
       }
     ],
-    "titles": [
+    "descriptions": [],
+    "doi": "10.7910/dvn/tsqfwc/yytj22",
+    "formats": [],
+    "fundingReferences": [],
+    "geoLocations": [],
+    "identifiers": [
       {
-        "title": "gel_BSA-FITC_Markov_segmntation0343.tif"
+        "identifier": "https://doi.org/10.7910/dvn/tsqfwc/yytj22",
+        "identifierType": "DOI"
       }
     ],
-    "publisher": "Harvard Dataverse",
-    "container": {},
+    "isActive": true,
+    "language": null,
+    "metadataVersion": 0,
     "publicationYear": 2018,
+    "published": "2018",
+    "publisher": "Harvard Dataverse",
+    "reason": null,
+    "registered": "2018-08-22T17:37:30.000Z",
+    "relatedIdentifiers": [],
+    "rightsList": [],
+    "schemaVersion": "http://datacite.org/schema/kernel-4",
+    "sizes": [],
+    "source": "mds",
+    "state": "findable",
     "subjects": [],
-    "contributors": [],
-    "dates": [
+    "titles": [
       {
-        "date": "2018",
-        "dateType": "Issued"
+        "title": "gel_BSA-FITC_Markov_segmntation0343.tif"
       }
     ],
-    "language": null,
     "types": {
-      "ris": "DATA",
       "bibtex": "misc",
       "citeproc": "dataset",
-      "schemaOrg": "Dataset",
-      "resourceTypeGeneral": "Dataset"
+      "resourceTypeGeneral": "Dataset",
+      "ris": "DATA",
+      "schemaOrg": "Dataset"
     },
-    "relatedIdentifiers": [],
-    "sizes": [],
-    "formats": [],
-    "version": null,
-    "rightsList": [],
-    "descriptions": [],
-    "geoLocations": [],
-    "fundingReferences": [],
+    "updated": "2019-08-02T19:43:20.000Z",
     "url": "https://dataverse.harvard.edu/file.xhtml?persistentId=doi:10.7910/DVN/TSQFWC/YYTJ22",
-    "contentUrl": null,
-    "metadataVersion": 0,
-    "schemaVersion": "http://datacite.org/schema/kernel-4",
-    "source": "mds",
-    "isActive": true,
-    "state": "findable",
-    "reason": null,
-    "created": "2018-08-22T17:36:10.000Z",
-    "registered": "2018-08-22T17:37:30.000Z",
-    "published": "2018",
-    "updated": "2019-08-02T19:43:20.000Z"
+    "version": null
   },
+  "id": "10.7910/dvn/tsqfwc/yytj22",
   "relationships": {
     "client": {
       "data": {
@@ -68,5 +67,6 @@
         "type": "clients"
       }
     }
-  }
+  },
+  "type": "dois"
 }
diff --git a/python/tests/files/datacite/datacite_doc_18.json b/python/tests/files/datacite/datacite_doc_18.json
index f6bc81a6..b5c41b68 100644
--- a/python/tests/files/datacite/datacite_doc_18.json
+++ b/python/tests/files/datacite/datacite_doc_18.json
@@ -1,31 +1,16 @@
 {
-  "id": "10.7916/d81z522m",
-  "type": "dois",
   "attributes": {
-    "doi": "10.7916/d81z522m",
-    "identifiers": [
-      {
-        "identifier": "https://doi.org/10.7916/d81z522m",
-        "identifierType": "DOI"
-      }
-    ],
+    "container": {},
+    "contentUrl": null,
+    "contributors": [],
+    "created": "2017-11-29T02:15:31.000Z",
     "creators": [
       {
-        "name": "(:Unav)",
         "affiliation": [],
+        "name": "(:Unav)",
         "nameIdentifiers": []
       }
     ],
-    "titles": [
-      {
-        "title": "Eastern questionnaire, answer sheet for Interviewee 53215, page 064"
-      }
-    ],
-    "publisher": "Columbia University",
-    "container": {},
-    "publicationYear": 2017,
-    "subjects": [],
-    "contributors": [],
     "dates": [
       {
         "date": "2017-08-21",
@@ -40,34 +25,48 @@
         "dateType": "Issued"
       }
     ],
+    "descriptions": [],
+    "doi": "10.7916/d81z522m",
+    "formats": [],
+    "fundingReferences": [],
+    "geoLocations": [],
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.7916/d81z522m",
+        "identifierType": "DOI"
+      }
+    ],
+    "isActive": true,
     "language": null,
+    "metadataVersion": 2,
+    "publicationYear": 2017,
+    "published": "2017",
+    "publisher": "Columbia University",
+    "reason": null,
+    "registered": "2017-11-29T02:15:32.000Z",
+    "relatedIdentifiers": [],
+    "rightsList": [],
+    "schemaVersion": "http://datacite.org/schema/kernel-3",
+    "sizes": [],
+    "source": "ez",
+    "state": "findable",
+    "subjects": [],
+    "titles": [
+      {
+        "title": "Eastern questionnaire, answer sheet for Interviewee 53215, page 064"
+      }
+    ],
     "types": {
-      "ris": "GEN",
       "bibtex": "misc",
       "citeproc": "article",
+      "ris": "GEN",
       "schemaOrg": "CreativeWork"
     },
-    "relatedIdentifiers": [],
-    "sizes": [],
-    "formats": [],
-    "version": null,
-    "rightsList": [],
-    "descriptions": [],
-    "geoLocations": [],
-    "fundingReferences": [],
+    "updated": "2019-08-04T13:17:58.000Z",
     "url": "https://dlc.library.columbia.edu/lcaaj/cul:k3j9kd52d6",
-    "contentUrl": null,
-    "metadataVersion": 2,
-    "schemaVersion": "http://datacite.org/schema/kernel-3",
-    "source": "ez",
-    "isActive": true,
-    "state": "findable",
-    "reason": null,
-    "created": "2017-11-29T02:15:31.000Z",
-    "registered": "2017-11-29T02:15:32.000Z",
-    "published": "2017",
-    "updated": "2019-08-04T13:17:58.000Z"
+    "version": null
   },
+  "id": "10.7916/d81z522m",
   "relationships": {
     "client": {
       "data": {
@@ -75,5 +74,6 @@
         "type": "clients"
       }
     }
-  }
+  },
+  "type": "dois"
 }
diff --git a/python/tests/files/datacite/datacite_doc_19.json b/python/tests/files/datacite/datacite_doc_19.json
index c0bc25ba..9fbe7372 100644
--- a/python/tests/files/datacite/datacite_doc_19.json
+++ b/python/tests/files/datacite/datacite_doc_19.json
@@ -1,31 +1,16 @@
 {
-  "id": "10.7916/d86x0cg1",
-  "type": "dois",
   "attributes": {
-    "doi": "10.7916/d86x0cg1",
-    "identifiers": [
-      {
-        "identifier": "https://doi.org/10.7916/d86x0cg1",
-        "identifierType": "DOI"
-      }
-    ],
+    "container": {},
+    "contentUrl": null,
+    "contributors": [],
+    "created": "2017-11-29T09:29:33.000Z",
     "creators": [
       {
-        "name": "(:Unav)",
         "affiliation": [],
+        "name": "(:Unav)",
         "nameIdentifiers": []
       }
     ],
-    "titles": [
-      {
-        "title": "Eastern questionnaire, answer sheet for Interviewee 55236, page 092"
-      }
-    ],
-    "publisher": "Columbia University",
-    "container": {},
-    "publicationYear": 2017,
-    "subjects": [],
-    "contributors": [],
     "dates": [
       {
         "date": "2017-08-24",
@@ -40,34 +25,48 @@
         "dateType": "Issued"
       }
     ],
+    "descriptions": [],
+    "doi": "10.7916/d86x0cg1",
+    "formats": [],
+    "fundingReferences": [],
+    "geoLocations": [],
+    "identifiers": [
+      {
+        "identifier": "https://doi.org/10.7916/d86x0cg1",
+        "identifierType": "DOI"
+      }
+    ],
+    "isActive": true,
     "language": null,
+    "metadataVersion": 3,
+    "publicationYear": 2017,
+    "published": "2017",
+    "publisher": "Columbia University",
+    "reason": null,
+    "registered": "2017-11-29T09:29:34.000Z",
+    "relatedIdentifiers": [],
+    "rightsList": [],
+    "schemaVersion": "http://datacite.org/schema/kernel-3",
+    "sizes": [],
+    "source": "ez",
+    "state": "findable",
+    "subjects": [],
+    "titles": [
+      {
+        "title": "Eastern questionnaire, answer sheet for Interviewee 55236, page 092"
+      }
+    ],
     "types": {
-      "ris": "GEN",
       "bibtex": "misc",
       "citeproc": "article",
+      "ris": "GEN",
       "schemaOrg": "CreativeWork"
     },
-    "relatedIdentifiers": [],
-    "sizes": [],
-    "formats": [],
-    "version": null,
-    "rightsList": [],
-    "descriptions": [],
-    "geoLocations": [],
-    "fundingReferences": [],
+    "updated": "2019-08-04T23:43:40.000Z",
     "url": "https://dlc.library.columbia.edu/lcaaj/cul:44j0zpc98s",
-    "contentUrl": null,
-    "metadataVersion": 3,
-    "schemaVersion": "http://datacite.org/schema/kernel-3",
-    "source": "ez",
-    "isActive": true,
-    "state": "findable",
-    "reason": null,
-    "created": "2017-11-29T09:29:33.000Z",
-    "registered": "2017-11-29T09:29:34.000Z",
-    "published": "2017",
-    "updated": "2019-08-04T23:43:40.000Z"
+    "version": null
   },
+  "id": "10.7916/d86x0cg1",
   "relationships": {
     "client": {
       "data": {
@@ -75,5 +74,6 @@
         "type": "clients"
       }
     }
-  }
+  },
+  "type": "dois"
 }
diff --git a/python/tests/files/datacite/datacite_doc_20.json b/python/tests/files/datacite/datacite_doc_20.json
index cc6cc1fb..7126ee37 100644
--- a/python/tests/files/datacite/datacite_doc_20.json
+++ b/python/tests/files/datacite/datacite_doc_20.json
@@ -1,19 +1,12 @@
 {
   "attributes": {
-    "doi": "10.7916/d86x0cg1",
     "creators": [
       {
-        "name": "(:Unav)",
         "affiliation": [],
+        "name": "(:Unav)",
         "nameIdentifiers": []
       }
     ],
-    "titles": [
-      {
-        "title": "<h1>Eastern questionnaire</h1>"
-      }
-    ],
-    "publicationYear": 2017,
     "dates": [
       {
         "date": "2017-08-24",
@@ -28,14 +21,21 @@
         "dateType": "Issued"
       }
     ],
+    "doi": "10.7916/d86x0cg1",
+    "isActive": true,
     "language": null,
+    "publicationYear": 2017,
+    "state": "findable",
+    "titles": [
+      {
+        "title": "<h1>Eastern questionnaire</h1>"
+      }
+    ],
     "types": {
-      "ris": "GEN",
       "bibtex": "misc",
       "citeproc": "article",
+      "ris": "GEN",
       "schemaOrg": "CreativeWork"
-    },
-    "isActive": true,
-    "state": "findable"
+    }
   }
 }
diff --git a/python/tests/files/datacite/datacite_doc_21.json b/python/tests/files/datacite/datacite_doc_21.json
index 04b196a6..248879c2 100644
--- a/python/tests/files/datacite/datacite_doc_21.json
+++ b/python/tests/files/datacite/datacite_doc_21.json
@@ -1,26 +1,12 @@
 {
   "attributes": {
-    "doi": "10.7916/d86x0cg1",
     "creators": [
       {
-        "name": "(:Unav)",
         "affiliation": [],
+        "name": "(:Unav)",
         "nameIdentifiers": []
       }
     ],
-    "titles": [
-      {
-        "title": "ABC"
-      }
-    ],
-    "publicationYear": 2017,
-    "language": "GERMAN",
-    "types": {
-      "ris": "GEN",
-      "bibtex": "misc",
-      "citeproc": "article",
-      "schemaOrg": "CreativeWork"
-    },
     "dates": [
       {
         "date": "2017-08-24",
@@ -35,7 +21,21 @@
         "dateType": "Issued"
       }
     ],
+    "doi": "10.7916/d86x0cg1",
     "isActive": true,
-    "state": "findable"
+    "language": "GERMAN",
+    "publicationYear": 2017,
+    "state": "findable",
+    "titles": [
+      {
+        "title": "ABC"
+      }
+    ],
+    "types": {
+      "bibtex": "misc",
+      "citeproc": "article",
+      "ris": "GEN",
+      "schemaOrg": "CreativeWork"
+    }
   }
 }
diff --git a/python/tests/files/datacite/datacite_doc_22.json b/python/tests/files/datacite/datacite_doc_22.json
index 365b1361..0f7c5e57 100644
--- a/python/tests/files/datacite/datacite_doc_22.json
+++ b/python/tests/files/datacite/datacite_doc_22.json
@@ -1,28 +1,14 @@
 {
   "attributes": {
-    "doi": "10.7916/d86x0cg1",
     "creators": [
       {
-        "name": "Anton Welch",
         "affiliation": [
           "Department of pataphysics"
         ],
+        "name": "Anton Welch",
         "nameIdentifiers": []
       }
     ],
-    "titles": [
-      {
-        "title": "ABC"
-      }
-    ],
-    "publicationYear": 2017,
-    "language": "GERMAN",
-    "types": {
-      "ris": "GEN",
-      "bibtex": "misc",
-      "citeproc": "article",
-      "schemaOrg": "CreativeWork"
-    },
     "dates": [
       {
         "date": "2017-08-24",
@@ -37,7 +23,21 @@
         "dateType": "Issued"
       }
     ],
+    "doi": "10.7916/d86x0cg1",
     "isActive": true,
-    "state": "findable"
+    "language": "GERMAN",
+    "publicationYear": 2017,
+    "state": "findable",
+    "titles": [
+      {
+        "title": "ABC"
+      }
+    ],
+    "types": {
+      "bibtex": "misc",
+      "citeproc": "article",
+      "ris": "GEN",
+      "schemaOrg": "CreativeWork"
+    }
   }
 }
diff --git a/python/tests/files/datacite/datacite_doc_23.json b/python/tests/files/datacite/datacite_doc_23.json
index 1dcdfc27..b755f1a5 100644
--- a/python/tests/files/datacite/datacite_doc_23.json
+++ b/python/tests/files/datacite/datacite_doc_23.json
@@ -1,28 +1,14 @@
 {
   "attributes": {
-    "doi": "10.7916/d86x0cg1–xxx",
     "creators": [
       {
-        "name": "Anton Welch",
         "affiliation": [
           "Department of pataphysics"
         ],
+        "name": "Anton Welch",
         "nameIdentifiers": []
       }
     ],
-    "titles": [
-      {
-        "title": "ABC"
-      }
-    ],
-    "publicationYear": 2017,
-    "language": "GERMAN",
-    "types": {
-      "ris": "GEN",
-      "bibtex": "misc",
-      "citeproc": "article",
-      "schemaOrg": "CreativeWork"
-    },
     "dates": [
       {
         "date": "2017-08-24",
@@ -37,7 +23,21 @@
         "dateType": "Issued"
       }
     ],
+    "doi": "10.7916/d86x0cg1–xxx",
     "isActive": true,
-    "state": "findable"
+    "language": "GERMAN",
+    "publicationYear": 2017,
+    "state": "findable",
+    "titles": [
+      {
+        "title": "ABC"
+      }
+    ],
+    "types": {
+      "bibtex": "misc",
+      "citeproc": "article",
+      "ris": "GEN",
+      "schemaOrg": "CreativeWork"
+    }
   }
 }
diff --git a/python/tests/files/datacite/datacite_doc_24.json b/python/tests/files/datacite/datacite_doc_24.json
index 4ea6945f..4023055b 100644
--- a/python/tests/files/datacite/datacite_doc_24.json
+++ b/python/tests/files/datacite/datacite_doc_24.json
@@ -1,32 +1,14 @@
 {
   "attributes": {
-    "doi": "10.7916/d86x0cg1",
     "creators": [
       {
-        "name": "Anton Welch",
         "affiliation": [
           "Department of pataphysics"
         ],
+        "name": "Anton Welch",
         "nameIdentifiers": []
       }
     ],
-    "titles": [
-      {
-        "title": "ABC"
-      },
-      {
-        "title": "DEF",
-        "titleType": "Subtitle"
-      }
-    ],
-    "publicationYear": 2016,
-    "language": "DE-CH",
-    "types": {
-      "ris": "GEN",
-      "bibtex": "misc",
-      "citeproc": "article",
-      "schemaOrg": "CreativeWork"
-    },
     "dates": [
       {
         "date": "2017-08-24",
@@ -41,7 +23,25 @@
         "dateType": "Issued"
       }
     ],
+    "doi": "10.7916/d86x0cg1",
     "isActive": true,
-    "state": "findable"
+    "language": "DE-CH",
+    "publicationYear": 2016,
+    "state": "findable",
+    "titles": [
+      {
+        "title": "ABC"
+      },
+      {
+        "title": "DEF",
+        "titleType": "Subtitle"
+      }
+    ],
+    "types": {
+      "bibtex": "misc",
+      "citeproc": "article",
+      "ris": "GEN",
+      "schemaOrg": "CreativeWork"
+    }
   }
 }
diff --git a/python/tests/files/datacite/datacite_doc_25.json b/python/tests/files/datacite/datacite_doc_25.json
index 60cd0ab7..2b219728 100644
--- a/python/tests/files/datacite/datacite_doc_25.json
+++ b/python/tests/files/datacite/datacite_doc_25.json
@@ -1,32 +1,14 @@
 {
   "attributes": {
-    "doi": "10.7916/d86x0cg1",
     "creators": [
       {
-        "name": "Anton Welch",
         "affiliation": [
           "Department of pataphysics"
         ],
+        "name": "Anton Welch",
         "nameIdentifiers": []
       }
     ],
-    "titles": [
-      {
-        "title": "Additional file 123: ABC"
-      },
-      {
-        "title": "DEF",
-        "titleType": "Subtitle"
-      }
-    ],
-    "publicationYear": 2016,
-    "language": "DE-CH",
-    "types": {
-      "ris": "GEN",
-      "bibtex": "misc",
-      "citeproc": "article",
-      "schemaOrg": "CreativeWork"
-    },
     "dates": [
       {
         "date": "2017-08-24",
@@ -41,7 +23,25 @@
         "dateType": "Issued"
       }
     ],
+    "doi": "10.7916/d86x0cg1",
     "isActive": true,
-    "state": "findable"
+    "language": "DE-CH",
+    "publicationYear": 2016,
+    "state": "findable",
+    "titles": [
+      {
+        "title": "Additional file 123: ABC"
+      },
+      {
+        "title": "DEF",
+        "titleType": "Subtitle"
+      }
+    ],
+    "types": {
+      "bibtex": "misc",
+      "citeproc": "article",
+      "ris": "GEN",
+      "schemaOrg": "CreativeWork"
+    }
   }
 }
diff --git a/python/tests/files/datacite/datacite_doc_26.json b/python/tests/files/datacite/datacite_doc_26.json
index c2abb1b2..36fa565d 100644
--- a/python/tests/files/datacite/datacite_doc_26.json
+++ b/python/tests/files/datacite/datacite_doc_26.json
@@ -1,25 +1,43 @@
 {
   "attributes": {
-    "doi": "10.7916/d86x0cg1",
+    "contributors": [
+      {
+        "affiliation": [],
+        "contributorType": "Editor",
+        "familyName": "Wemmer",
+        "givenName": "David",
+        "name": "Wemmer, David",
+        "nameType": "Personal"
+      }
+    ],
     "creators": [
       {
-        "name": "Anton Welch",
         "affiliation": [
           "Department of pataphysics"
         ],
+        "name": "Anton Welch",
         "nameIdentifiers": []
       }
     ],
-    "contributors": [
+    "dates": [
       {
-        "name": "Wemmer, David",
-        "nameType": "Personal",
-        "givenName": "David",
-        "familyName": "Wemmer",
-        "affiliation": [],
-        "contributorType": "Editor"
+        "date": "2017-08-24",
+        "dateType": "Created"
+      },
+      {
+        "date": "2019-08-04",
+        "dateType": "Updated"
+      },
+      {
+        "date": "2017",
+        "dateType": "Issued"
       }
     ],
+    "doi": "10.7916/d86x0cg1",
+    "isActive": true,
+    "language": "DE-CH",
+    "publicationYear": 2016,
+    "state": "findable",
     "titles": [
       {
         "title": "Additional file 123: ABC"
@@ -29,29 +47,11 @@
         "titleType": "Subtitle"
       }
     ],
-    "publicationYear": 2016,
-    "language": "DE-CH",
     "types": {
-      "ris": "GEN",
       "bibtex": "misc",
       "citeproc": "article",
+      "ris": "GEN",
       "schemaOrg": "CreativeWork"
-    },
-    "dates": [
-      {
-        "date": "2017-08-24",
-        "dateType": "Created"
-      },
-      {
-        "date": "2019-08-04",
-        "dateType": "Updated"
-      },
-      {
-        "date": "2017",
-        "dateType": "Issued"
-      }
-    ],
-    "isActive": true,
-    "state": "findable"
+    }
   }
 }
diff --git a/python/tests/files/datacite/datacite_result_00.json b/python/tests/files/datacite/datacite_result_00.json
index 0a84e7bd..89450f9d 100644
--- a/python/tests/files/datacite/datacite_result_00.json
+++ b/python/tests/files/datacite/datacite_result_00.json
@@ -1,4 +1,24 @@
 {
+  "abstracts": [],
+  "contribs": [
+    {
+      "given_name": "Qian-Jin",
+      "index": 0,
+      "raw_name": "Qian-Jin Li",
+      "role": "author",
+      "surname": "Li"
+    },
+    {
+      "given_name": "Chun-Long",
+      "index": 1,
+      "raw_name": "Chun-Long Yang",
+      "role": "author",
+      "surname": "Yang"
+    }
+  ],
+  "ext_ids": {
+    "doi": "10.1007/s10870-008-9413-z"
+  },
   "extra": {
     "container_name": "Journal of Chemical Crystallography",
     "datacite": {
@@ -7,86 +27,66 @@
           "rightsUri": "http://www.springer.com/tdm"
         }
       ],
+      "metadataVersion": 1,
       "relations": [
         {
-          "relationType": "IsPartOf",
           "relatedIdentifier": "1074-1542",
-          "resourceTypeGeneral": "Collection",
-          "relatedIdentifierType": "ISSN"
+          "relatedIdentifierType": "ISSN",
+          "relationType": "IsPartOf",
+          "resourceTypeGeneral": "Collection"
         }
       ],
       "resourceType": "JournalArticle",
       "resourceTypeGeneral": "Text",
-      "schemaVersion": "http://datacite.org/schema/kernel-4",
-      "metadataVersion": 1
+      "schemaVersion": "http://datacite.org/schema/kernel-4"
     },
     "release_month": 5
   },
-  "title": "Synthesis and Crystal Structure of a Compound with Two Conformational Isomers: N-(2-methylbenzoyl)-N′-(4-nitrophenyl)thiourea",
-  "release_type": "article-journal",
-  "release_stage": "published",
-  "release_date": "2019-05-31",
-  "release_year": 2019,
-  "ext_ids": {
-    "doi": "10.1007/s10870-008-9413-z"
-  },
-  "volume": "38",
   "issue": "12",
   "pages": "927-930",
   "publisher": "Springer Science and Business Media LLC",
-  "contribs": [
-    {
-      "index": 0,
-      "raw_name": "Qian-Jin Li",
-      "given_name": "Qian-Jin",
-      "surname": "Li",
-      "role": "author"
-    },
-    {
-      "index": 1,
-      "raw_name": "Chun-Long Yang",
-      "given_name": "Chun-Long",
-      "surname": "Yang",
-      "role": "author"
-    }
-  ],
   "refs": [
     {
-      "index": 0,
       "extra": {
         "doi": "10.1016/j.bmcl.2005.09.033"
-      }
+      },
+      "index": 0
     },
     {
-      "index": 1,
       "extra": {
         "doi": "10.1016/s0022-1139(02)00330-5"
-      }
+      },
+      "index": 1
     },
     {
-      "index": 2,
       "extra": {
         "doi": "10.1016/s0010-8545(01)00337-x"
-      }
+      },
+      "index": 2
     },
     {
-      "index": 3,
       "extra": {
         "doi": "10.1016/j.tetlet.2005.06.135"
-      }
+      },
+      "index": 3
     },
     {
-      "index": 4,
       "extra": {
         "doi": "10.1039/p298700000s1"
-      }
+      },
+      "index": 4
     },
     {
-      "index": 5,
       "extra": {
         "doi": "10.1002/anie.199515551"
-      }
+      },
+      "index": 5
     }
   ],
-  "abstracts": []
+  "release_date": "2019-05-31",
+  "release_stage": "published",
+  "release_type": "article-journal",
+  "release_year": 2019,
+  "title": "Synthesis and Crystal Structure of a Compound with Two Conformational Isomers: N-(2-methylbenzoyl)-N′-(4-nitrophenyl)thiourea",
+  "volume": "38"
 }
diff --git a/python/tests/files/datacite/datacite_result_01.json b/python/tests/files/datacite/datacite_result_01.json
index 956357b8..9fc62db4 100644
--- a/python/tests/files/datacite/datacite_result_01.json
+++ b/python/tests/files/datacite/datacite_result_01.json
@@ -1,4 +1,17 @@
 {
+  "abstracts": [],
+  "contribs": [
+    {
+      "given_name": "G.",
+      "index": 0,
+      "raw_name": "G. Dargenty",
+      "role": "author",
+      "surname": "Dargenty"
+    }
+  ],
+  "ext_ids": {
+    "doi": "10.11588/diglit.25558.39"
+  },
   "extra": {
     "datacite": {
       "license": [
@@ -13,24 +26,11 @@
       "schemaVersion": "http://datacite.org/schema/kernel-4"
     }
   },
-  "title": "Ferdinand Gaillard, [1]: né à Paris le 16 janvier 1834, mort à Paris le 19 janvier 1887",
-  "release_type": "article-journal",
-  "release_stage": "published",
-  "release_year": 1887,
-  "ext_ids": {
-    "doi": "10.11588/diglit.25558.39"
-  },
-  "publisher": "University Library Heidelberg",
   "language": "fr",
-  "contribs": [
-    {
-      "index": 0,
-      "raw_name": "G. Dargenty",
-      "given_name": "G.",
-      "surname": "Dargenty",
-      "role": "author"
-    }
-  ],
+  "publisher": "University Library Heidelberg",
   "refs": [],
-  "abstracts": []
+  "release_stage": "published",
+  "release_type": "article-journal",
+  "release_year": 1887,
+  "title": "Ferdinand Gaillard, [1]: né à Paris le 16 janvier 1834, mort à Paris le 19 janvier 1887"
 }
diff --git a/python/tests/files/datacite/datacite_result_02.json b/python/tests/files/datacite/datacite_result_02.json
index 322baf59..d6b9556f 100644
--- a/python/tests/files/datacite/datacite_result_02.json
+++ b/python/tests/files/datacite/datacite_result_02.json
@@ -1,4 +1,17 @@
 {
+  "abstracts": [],
+  "contribs": [
+    {
+      "given_name": "Albert",
+      "index": 0,
+      "raw_name": "Albert Weyersberg",
+      "role": "author",
+      "surname": "Weyersberg"
+    }
+  ],
+  "ext_ids": {
+    "doi": "10.11588/diglit.37715.57"
+  },
   "extra": {
     "datacite": {
       "license": [
@@ -17,24 +30,11 @@
       "schemaVersion": "http://datacite.org/schema/kernel-4"
     }
   },
-  "title": "Solinger Schwertschmiede-Familien, [4]",
-  "release_type": "article-journal",
-  "release_stage": "published",
-  "release_year": 1897,
-  "ext_ids": {
-    "doi": "10.11588/diglit.37715.57"
-  },
-  "publisher": "University Library Heidelberg",
   "language": "de",
-  "contribs": [
-    {
-      "index": 0,
-      "raw_name": "Albert Weyersberg",
-      "given_name": "Albert",
-      "surname": "Weyersberg",
-      "role": "author"
-    }
-  ],
+  "publisher": "University Library Heidelberg",
   "refs": [],
-  "abstracts": []
+  "release_stage": "published",
+  "release_type": "article-journal",
+  "release_year": 1897,
+  "title": "Solinger Schwertschmiede-Familien, [4]"
 }
diff --git a/python/tests/files/datacite/datacite_result_03.json b/python/tests/files/datacite/datacite_result_03.json
index 41d8d4cd..6aa65aee 100644
--- a/python/tests/files/datacite/datacite_result_03.json
+++ b/python/tests/files/datacite/datacite_result_03.json
@@ -1,16 +1,5 @@
 {
-  "extra": {
-    "datacite": {
-      "schemaVersion": "http://datacite.org/schema/kernel-3"
-    }
-  },
-  "title": "midterm ah30903",
-  "release_type": "article",
-  "release_year": 2016,
-  "ext_ids": {
-    "doi": "10.13140/rg.2.2.30434.53446"
-  },
-  "language": "ms",
+  "abstracts": [],
   "contribs": [
     {
       "index": 0,
@@ -18,6 +7,17 @@
       "role": "author"
     }
   ],
+  "ext_ids": {
+    "doi": "10.13140/rg.2.2.30434.53446"
+  },
+  "extra": {
+    "datacite": {
+      "schemaVersion": "http://datacite.org/schema/kernel-3"
+    }
+  },
+  "language": "ms",
   "refs": [],
-  "abstracts": []
+  "release_type": "article",
+  "release_year": 2016,
+  "title": "midterm ah30903"
 }
diff --git a/python/tests/files/datacite/datacite_result_04.json b/python/tests/files/datacite/datacite_result_04.json
index 0976e40e..571c3f64 100644
--- a/python/tests/files/datacite/datacite_result_04.json
+++ b/python/tests/files/datacite/datacite_result_04.json
@@ -1,4 +1,23 @@
 {
+  "abstracts": [
+    {
+      "content": "Let A be an abelian category, I the full subcategory of A consisting of injective objects of A, and K(A) the category whose objects are cochain complexes of elements of A, and whose morphisms are homotopy classes of cochain maps.  In (5), lemma 4.6., p. 42, R. Hartshorne has proved that, under certain conditions, a cochain complex X˙ ε. |KA)| can be embedded in a complex I˙ ε. |K(I)| in such a way that I˙ has the same cohomology as X˙.  In Chapter I we show that the construction given in the two first parts of Hartshorne's Lemma is natural i.e. there exists a functor  J : K(A) → K(I) and a natural transformation [formula omitted]  (where E : K(I) → K(A) is the embedding functor) such that [formula omitted] is  injective and induces isomorphism in cohomology. The question whether the construction given in the third part of the lemma is functorial is still open.  We also prove that J is left adjoint to E, so that K(I) is a reflective subcategory of K(A).  In the special case where A is a category [formula omitted] of left A-modules, and [formula omitted] the category of cochain complexes in [formula omitted] and cochain maps (not homotopy classes), we prove the existence of a functor [formula omitted]  In Chapter II we study the natural homomorphism [formula omitted]   where A, B are rings, and M, L, N modules or chain complexes. In particular we give several sufficient conditions under which v is an isomorphism, or induces isomorphism in homology.  In the appendix we give a detailed proof of Hartshorne's Lemma. We think that this is useful, as no complete proof is, to our knowledge, to be found in the literature.",
+      "lang": "en",
+      "mimetype": "text/plain"
+    }
+  ],
+  "contribs": [
+    {
+      "given_name": "Marc Andre",
+      "index": 0,
+      "raw_name": "Marc Andre Nicollerat",
+      "role": "author",
+      "surname": "Nicollerat"
+    }
+  ],
+  "ext_ids": {
+    "doi": "10.14288/1.0080520"
+  },
   "extra": {
     "datacite": {
       "metadataVersion": 5,
@@ -7,30 +26,11 @@
       "schemaVersion": "http://datacite.org/schema/kernel-3"
     }
   },
-  "title": "On chain maps inducing isomorphisms in homology",
-  "release_type": "article-journal",
-  "release_stage": "published",
-  "release_year": 1973,
-  "ext_ids": {
-    "doi": "10.14288/1.0080520"
-  },
-  "publisher": "University of British Columbia",
   "language": "en",
-  "contribs": [
-    {
-      "index": 0,
-      "raw_name": "Marc Andre Nicollerat",
-      "given_name": "Marc Andre",
-      "surname": "Nicollerat",
-      "role": "author"
-    }
-  ],
+  "publisher": "University of British Columbia",
   "refs": [],
-  "abstracts": [
-    {
-      "content": "Let A be an abelian category, I the full subcategory of A consisting of injective objects of A, and K(A) the category whose objects are cochain complexes of elements of A, and whose morphisms are homotopy classes of cochain maps.  In (5), lemma 4.6., p. 42, R. Hartshorne has proved that, under certain conditions, a cochain complex X˙ ε. |KA)| can be embedded in a complex I˙ ε. |K(I)| in such a way that I˙ has the same cohomology as X˙.  In Chapter I we show that the construction given in the two first parts of Hartshorne's Lemma is natural i.e. there exists a functor  J : K(A) → K(I) and a natural transformation [formula omitted]  (where E : K(I) → K(A) is the embedding functor) such that [formula omitted] is  injective and induces isomorphism in cohomology. The question whether the construction given in the third part of the lemma is functorial is still open.  We also prove that J is left adjoint to E, so that K(I) is a reflective subcategory of K(A).  In the special case where A is a category [formula omitted] of left A-modules, and [formula omitted] the category of cochain complexes in [formula omitted] and cochain maps (not homotopy classes), we prove the existence of a functor [formula omitted]  In Chapter II we study the natural homomorphism [formula omitted]   where A, B are rings, and M, L, N modules or chain complexes. In particular we give several sufficient conditions under which v is an isomorphism, or induces isomorphism in homology.  In the appendix we give a detailed proof of Hartshorne's Lemma. We think that this is useful, as no complete proof is, to our knowledge, to be found in the literature.",
-      "mimetype": "text/plain",
-      "lang": "en"
-    }
-  ]
+  "release_stage": "published",
+  "release_type": "article-journal",
+  "release_year": 1973,
+  "title": "On chain maps inducing isomorphisms in homology"
 }
diff --git a/python/tests/files/datacite/datacite_result_05.json b/python/tests/files/datacite/datacite_result_05.json
index c4e5418d..5b7b4ed2 100644
--- a/python/tests/files/datacite/datacite_result_05.json
+++ b/python/tests/files/datacite/datacite_result_05.json
@@ -1,528 +1,508 @@
 {
-  "extra": {
-    "datacite": {
-      "license": [
-        {
-          "rights": "Attribution-NonCommercial (CC BY-NC)",
-          "rightsUri": "http://creativecommons.org/licenses/by-nc/4.0"
-        }
-      ],
-      "metadataVersion": 1,
-      "resourceType": "Dataset/UNITE Species Hypothesis",
-      "resourceTypeGeneral": "Dataset",
-      "schemaVersion": "http://datacite.org/schema/kernel-3"
-    },
-    "release_month": 10
-  },
-  "title": "SH409843.07FU",
-  "subtitle": "Gomphales",
-  "release_type": "dataset",
-  "release_stage": "published",
-  "release_date": "2014-10-05",
-  "release_year": 2014,
-  "ext_ids": {
-    "doi": "10.15156/bio/sh409843.07fu"
-  },
-  "publisher": "UNITE Community",
-  "language": "en",
-  "license_slug": "CC-BY-NC",
+  "abstracts": [
+    {
+      "content": "UNITE provides a unified way for delimiting, identifying, communicating, and working with DNA-based Species Hypotheses (SH). All fungal ITS sequences in the international nucleotide sequence databases are clustered to approximately the species level by applying a set of dynamic distance values (<0.5 - 3.0%). All species hypotheses are given a unique, stable name in the form of a DOI, and their taxonomic and ecological annotations are verified through distributed, web-based third-party annotation efforts. SHs are connected to a taxon name and its classification as far as possible (phylum, class, order, etc.) by taking into account identifications for all sequences in the SH. An automatically or manually designated sequence is chosen to represent each such SH. These sequences are released (https://unite.ut.ee/repository.php) for use by the scientific community in, for example, local sequence similarity searches and next-generation sequencing analysis pipelines. The system and the data are updated automatically as the number of public fungal ITS sequences grows.",
+      "lang": "en",
+      "mimetype": "text/plain"
+    }
+  ],
   "contribs": [
     {
+      "given_name": "Urmas",
       "index": 0,
       "raw_name": "Urmas Kõljalg",
-      "given_name": "Urmas",
-      "surname": "Kõljalg",
-      "role": "author"
+      "role": "author",
+      "surname": "Kõljalg"
     },
     {
+      "given_name": "Kessy",
       "index": 1,
       "raw_name": "Kessy Abarenkov",
-      "given_name": "Kessy",
-      "surname": "Abarenkov",
-      "role": "author"
+      "role": "author",
+      "surname": "Abarenkov"
     },
     {
+      "given_name": "R. Henrik",
       "index": 2,
       "raw_name": "R. Henrik Nilsson",
-      "given_name": "R. Henrik",
-      "surname": "Nilsson",
-      "role": "author"
+      "role": "author",
+      "surname": "Nilsson"
     },
     {
+      "given_name": "Karl-Henrik",
       "index": 3,
       "raw_name": "Karl-Henrik Larsson",
-      "given_name": "Karl-Henrik",
-      "surname": "Larsson",
-      "role": "author"
+      "role": "author",
+      "surname": "Larsson"
     },
     {
+      "given_name": "Anders Bjørnsgard",
       "index": 4,
       "raw_name": "Anders Bjørnsgard Aas",
-      "given_name": "Anders Bjørnsgard",
-      "surname": "Aas",
-      "role": "author"
+      "role": "author",
+      "surname": "Aas"
     },
     {
+      "given_name": "Rachel",
       "index": 5,
       "raw_name": "Rachel Adams",
-      "given_name": "Rachel",
-      "surname": "Adams",
-      "role": "author"
+      "role": "author",
+      "surname": "Adams"
     },
     {
+      "given_name": "Artur",
       "index": 6,
       "raw_name": "Artur Alves",
-      "given_name": "Artur",
-      "surname": "Alves",
-      "role": "author"
+      "role": "author",
+      "surname": "Alves"
     },
     {
+      "given_name": "Joseph F.",
       "index": 7,
       "raw_name": "Joseph F. Ammirati",
-      "given_name": "Joseph F.",
-      "surname": "Ammirati",
-      "role": "author"
+      "role": "author",
+      "surname": "Ammirati"
     },
     {
+      "given_name": "A. Elizabeth",
       "index": 8,
       "raw_name": "A. Elizabeth Arnold",
-      "given_name": "A. Elizabeth",
-      "surname": "Arnold",
-      "role": "author"
+      "role": "author",
+      "surname": "Arnold"
     },
     {
+      "given_name": "Mohammad",
       "index": 9,
       "raw_name": "Mohammad Bahram",
-      "given_name": "Mohammad",
-      "surname": "Bahram",
-      "role": "author"
+      "role": "author",
+      "surname": "Bahram"
     },
     {
+      "given_name": "Johan",
       "index": 10,
       "raw_name": "Johan Bengtsson-Palme",
-      "given_name": "Johan",
-      "surname": "Bengtsson-Palme",
-      "role": "author"
+      "role": "author",
+      "surname": "Bengtsson-Palme"
     },
     {
+      "given_name": "Anna",
       "index": 11,
       "raw_name": "Anna Berlin",
-      "given_name": "Anna",
-      "surname": "Berlin",
-      "role": "author"
+      "role": "author",
+      "surname": "Berlin"
     },
     {
+      "given_name": "Synnøve",
       "index": 12,
       "raw_name": "Synnøve Botnen",
-      "given_name": "Synnøve",
-      "surname": "Botnen",
-      "role": "author"
+      "role": "author",
+      "surname": "Botnen"
     },
     {
+      "given_name": "Sarah",
       "index": 13,
       "raw_name": "Sarah Bourlat",
-      "given_name": "Sarah",
-      "surname": "Bourlat",
-      "role": "author"
+      "role": "author",
+      "surname": "Bourlat"
     },
     {
+      "given_name": "Tanya",
       "index": 14,
       "raw_name": "Tanya Cheeke",
-      "given_name": "Tanya",
-      "surname": "Cheeke",
-      "role": "author"
+      "role": "author",
+      "surname": "Cheeke"
     },
     {
+      "given_name": "Bálint",
       "index": 15,
       "raw_name": "Bálint Dima",
-      "given_name": "Bálint",
-      "surname": "Dima",
-      "role": "author"
+      "role": "author",
+      "surname": "Dima"
     },
     {
+      "given_name": "Rein",
       "index": 16,
       "raw_name": "Rein Drenkhan",
-      "given_name": "Rein",
-      "surname": "Drenkhan",
-      "role": "author"
+      "role": "author",
+      "surname": "Drenkhan"
     },
     {
+      "given_name": "Camila",
       "index": 17,
       "raw_name": "Camila Duarte",
-      "given_name": "Camila",
-      "surname": "Duarte",
-      "role": "author"
+      "role": "author",
+      "surname": "Duarte"
     },
     {
+      "given_name": "Margarita",
       "index": 18,
       "raw_name": "Margarita Dueñas",
-      "given_name": "Margarita",
-      "surname": "Dueñas",
-      "role": "author"
+      "role": "author",
+      "surname": "Dueñas"
     },
     {
+      "given_name": "Ursula",
       "index": 19,
       "raw_name": "Ursula Eberhardt",
-      "given_name": "Ursula",
-      "surname": "Eberhardt",
-      "role": "author"
+      "role": "author",
+      "surname": "Eberhardt"
     },
     {
+      "given_name": "Hanna",
       "index": 20,
       "raw_name": "Hanna Friberg",
-      "given_name": "Hanna",
-      "surname": "Friberg",
-      "role": "author"
+      "role": "author",
+      "surname": "Friberg"
     },
     {
+      "given_name": "Tobias G.",
       "index": 21,
       "raw_name": "Tobias G. Frøslev",
-      "given_name": "Tobias G.",
-      "surname": "Frøslev",
-      "role": "author"
+      "role": "author",
+      "surname": "Frøslev"
     },
     {
+      "given_name": "Sigisfredo",
       "index": 22,
       "raw_name": "Sigisfredo Garnica",
-      "given_name": "Sigisfredo",
-      "surname": "Garnica",
-      "role": "author"
+      "role": "author",
+      "surname": "Garnica"
     },
     {
+      "given_name": "József",
       "index": 23,
       "raw_name": "József Geml",
-      "given_name": "József",
-      "surname": "Geml",
-      "role": "author"
+      "role": "author",
+      "surname": "Geml"
     },
     {
+      "given_name": "Masoomeh",
       "index": 24,
       "raw_name": "Masoomeh Ghobad-Nejhad",
-      "given_name": "Masoomeh",
-      "surname": "Ghobad-Nejhad",
-      "role": "author"
+      "role": "author",
+      "surname": "Ghobad-Nejhad"
     },
     {
+      "given_name": "Tine",
       "index": 25,
       "raw_name": "Tine Grebenc",
-      "given_name": "Tine",
-      "surname": "Grebenc",
-      "role": "author"
+      "role": "author",
+      "surname": "Grebenc"
     },
     {
+      "given_name": "Gareth W.",
       "index": 26,
       "raw_name": "Gareth W. Griffith",
-      "given_name": "Gareth W.",
-      "surname": "Griffith",
-      "role": "author"
+      "role": "author",
+      "surname": "Griffith"
     },
     {
+      "given_name": "Felix",
       "index": 27,
       "raw_name": "Felix Hampe",
-      "given_name": "Felix",
-      "surname": "Hampe",
-      "role": "author"
+      "role": "author",
+      "surname": "Hampe"
     },
     {
+      "given_name": "Peter",
       "index": 28,
       "raw_name": "Peter Kennedy",
-      "given_name": "Peter",
-      "surname": "Kennedy",
-      "role": "author"
+      "role": "author",
+      "surname": "Kennedy"
     },
     {
+      "given_name": "Maryia",
       "index": 29,
       "raw_name": "Maryia Khomich",
-      "given_name": "Maryia",
-      "surname": "Khomich",
-      "role": "author"
+      "role": "author",
+      "surname": "Khomich"
     },
     {
+      "given_name": "Petr",
       "index": 30,
       "raw_name": "Petr Kohout",
-      "given_name": "Petr",
-      "surname": "Kohout",
-      "role": "author"
+      "role": "author",
+      "surname": "Kohout"
     },
     {
+      "given_name": "Anu",
       "index": 31,
       "raw_name": "Anu Kollom",
-      "given_name": "Anu",
-      "surname": "Kollom",
-      "role": "author"
+      "role": "author",
+      "surname": "Kollom"
     },
     {
+      "given_name": "Ellen",
       "index": 32,
       "raw_name": "Ellen Larsson",
-      "given_name": "Ellen",
-      "surname": "Larsson",
-      "role": "author"
+      "role": "author",
+      "surname": "Larsson"
     },
     {
+      "given_name": "Irinyi",
       "index": 33,
       "raw_name": "Irinyi Laszlo",
-      "given_name": "Irinyi",
-      "surname": "Laszlo",
-      "role": "author"
+      "role": "author",
+      "surname": "Laszlo"
     },
     {
+      "given_name": "Steven",
       "index": 34,
       "raw_name": "Steven Leavitt",
-      "given_name": "Steven",
-      "surname": "Leavitt",
-      "role": "author"
+      "role": "author",
+      "surname": "Leavitt"
     },
     {
+      "given_name": "Kare",
       "index": 35,
       "raw_name": "Kare Liimatainen",
-      "given_name": "Kare",
-      "surname": "Liimatainen",
-      "role": "author"
+      "role": "author",
+      "surname": "Liimatainen"
     },
     {
+      "given_name": "Björn",
       "index": 36,
       "raw_name": "Björn Lindahl",
-      "given_name": "Björn",
-      "surname": "Lindahl",
-      "role": "author"
+      "role": "author",
+      "surname": "Lindahl"
     },
     {
+      "given_name": "Deborah J.",
       "index": 37,
       "raw_name": "Deborah J. Lodge",
-      "given_name": "Deborah J.",
-      "surname": "Lodge",
-      "role": "author"
+      "role": "author",
+      "surname": "Lodge"
     },
     {
+      "given_name": "Helge Thorsten",
       "index": 38,
       "raw_name": "Helge Thorsten Lumbsch",
-      "given_name": "Helge Thorsten",
-      "surname": "Lumbsch",
-      "role": "author"
+      "role": "author",
+      "surname": "Lumbsch"
     },
     {
+      "given_name": "María Paz",
       "index": 39,
       "raw_name": "María Paz Martín Esteban",
-      "given_name": "María Paz",
-      "surname": "Martín Esteban",
-      "role": "author"
+      "role": "author",
+      "surname": "Martín Esteban"
     },
     {
+      "given_name": "Wieland",
       "index": 40,
       "raw_name": "Wieland Meyer",
-      "given_name": "Wieland",
-      "surname": "Meyer",
-      "role": "author"
+      "role": "author",
+      "surname": "Meyer"
     },
     {
+      "given_name": "Otto",
       "index": 41,
       "raw_name": "Otto Miettinen",
-      "given_name": "Otto",
-      "surname": "Miettinen",
-      "role": "author"
+      "role": "author",
+      "surname": "Miettinen"
     },
     {
+      "given_name": "Nhu",
       "index": 42,
       "raw_name": "Nhu Nguyen",
-      "given_name": "Nhu",
-      "surname": "Nguyen",
-      "role": "author"
+      "role": "author",
+      "surname": "Nguyen"
     },
     {
+      "given_name": "Tuula",
       "index": 43,
       "raw_name": "Tuula Niskanen",
-      "given_name": "Tuula",
-      "surname": "Niskanen",
-      "role": "author"
+      "role": "author",
+      "surname": "Niskanen"
     },
     {
+      "given_name": "Ryoko",
       "index": 44,
       "raw_name": "Ryoko Oono",
-      "given_name": "Ryoko",
-      "surname": "Oono",
-      "role": "author"
+      "role": "author",
+      "surname": "Oono"
     },
     {
+      "given_name": "Maarja",
       "index": 45,
       "raw_name": "Maarja Öpik",
-      "given_name": "Maarja",
-      "surname": "Öpik",
-      "role": "author"
+      "role": "author",
+      "surname": "Öpik"
     },
     {
+      "given_name": "Alexander",
       "index": 46,
       "raw_name": "Alexander Ordynets",
-      "given_name": "Alexander",
-      "surname": "Ordynets",
-      "role": "author"
+      "role": "author",
+      "surname": "Ordynets"
     },
     {
+      "given_name": "Julia",
       "index": 47,
       "raw_name": "Julia Pawłowska",
-      "given_name": "Julia",
-      "surname": "Pawłowska",
-      "role": "author"
+      "role": "author",
+      "surname": "Pawłowska"
     },
     {
+      "given_name": "Ursula",
       "index": 48,
       "raw_name": "Ursula Peintner",
-      "given_name": "Ursula",
-      "surname": "Peintner",
-      "role": "author"
+      "role": "author",
+      "surname": "Peintner"
     },
     {
+      "given_name": "Olinto Liparini",
       "index": 49,
       "raw_name": "Olinto Liparini Pereira",
-      "given_name": "Olinto Liparini",
-      "surname": "Pereira",
-      "role": "author"
+      "role": "author",
+      "surname": "Pereira"
     },
     {
+      "given_name": "Danilo Batista",
       "index": 50,
       "raw_name": "Danilo Batista Pinho",
-      "given_name": "Danilo Batista",
-      "surname": "Pinho",
-      "role": "author"
+      "role": "author",
+      "surname": "Pinho"
     },
     {
+      "given_name": "Kadri",
       "index": 51,
       "raw_name": "Kadri Põldmaa",
-      "given_name": "Kadri",
-      "surname": "Põldmaa",
-      "role": "author"
+      "role": "author",
+      "surname": "Põldmaa"
     },
     {
+      "given_name": "Kadri",
       "index": 52,
       "raw_name": "Kadri Runnel",
-      "given_name": "Kadri",
-      "surname": "Runnel",
-      "role": "author"
+      "role": "author",
+      "surname": "Runnel"
     },
     {
+      "given_name": "Martin",
       "index": 53,
       "raw_name": "Martin Ryberg",
-      "given_name": "Martin",
-      "surname": "Ryberg",
-      "role": "author"
+      "role": "author",
+      "surname": "Ryberg"
     },
     {
+      "given_name": "Irja",
       "index": 54,
       "raw_name": "Irja Saar",
-      "given_name": "Irja",
-      "surname": "Saar",
-      "role": "author"
+      "role": "author",
+      "surname": "Saar"
     },
     {
+      "given_name": "Kemal",
       "index": 55,
       "raw_name": "Kemal Sanli",
-      "given_name": "Kemal",
-      "surname": "Sanli",
-      "role": "author"
+      "role": "author",
+      "surname": "Sanli"
     },
     {
+      "given_name": "James",
       "index": 56,
       "raw_name": "James Scott",
-      "given_name": "James",
-      "surname": "Scott",
-      "role": "author"
+      "role": "author",
+      "surname": "Scott"
     },
     {
+      "given_name": "Viacheslav",
       "index": 57,
       "raw_name": "Viacheslav Spirin",
-      "given_name": "Viacheslav",
-      "surname": "Spirin",
-      "role": "author"
+      "role": "author",
+      "surname": "Spirin"
     },
     {
+      "given_name": "Ave",
       "index": 58,
       "raw_name": "Ave Suija",
-      "given_name": "Ave",
-      "surname": "Suija",
-      "role": "author"
+      "role": "author",
+      "surname": "Suija"
     },
     {
+      "given_name": "Sten",
       "index": 59,
       "raw_name": "Sten Svantesson",
-      "given_name": "Sten",
-      "surname": "Svantesson",
-      "role": "author"
+      "role": "author",
+      "surname": "Svantesson"
     },
     {
+      "given_name": "Mariusz",
       "index": 60,
       "raw_name": "Mariusz Tadych",
-      "given_name": "Mariusz",
-      "surname": "Tadych",
-      "role": "author"
+      "role": "author",
+      "surname": "Tadych"
     },
     {
+      "given_name": "Susumu",
       "index": 61,
       "raw_name": "Susumu Takamatsu",
-      "given_name": "Susumu",
-      "surname": "Takamatsu",
-      "role": "author"
+      "role": "author",
+      "surname": "Takamatsu"
     },
     {
+      "given_name": "Heidi",
       "index": 62,
       "raw_name": "Heidi Tamm",
-      "given_name": "Heidi",
-      "surname": "Tamm",
-      "role": "author"
+      "role": "author",
+      "surname": "Tamm"
     },
     {
+      "given_name": "AFS.",
       "index": 63,
       "raw_name": "AFS. Taylor",
-      "given_name": "AFS.",
-      "surname": "Taylor",
-      "role": "author"
+      "role": "author",
+      "surname": "Taylor"
     },
     {
+      "given_name": "Leho",
       "index": 64,
       "raw_name": "Leho Tedersoo",
-      "given_name": "Leho",
-      "surname": "Tedersoo",
-      "role": "author"
+      "role": "author",
+      "surname": "Tedersoo"
     },
     {
+      "given_name": "M.T.",
       "index": 65,
       "raw_name": "M.T. Telleria",
-      "given_name": "M.T.",
-      "surname": "Telleria",
-      "role": "author"
+      "role": "author",
+      "surname": "Telleria"
     },
     {
+      "given_name": "Dhanushka",
       "index": 66,
       "raw_name": "Dhanushka Udayanga",
-      "given_name": "Dhanushka",
-      "surname": "Udayanga",
-      "role": "author"
+      "role": "author",
+      "surname": "Udayanga"
     },
     {
+      "given_name": "Martin",
       "index": 67,
       "raw_name": "Martin Unterseher",
-      "given_name": "Martin",
-      "surname": "Unterseher",
-      "role": "author"
+      "role": "author",
+      "surname": "Unterseher"
     },
     {
+      "given_name": "Sergey",
       "index": 68,
       "raw_name": "Sergey Volobuev",
-      "given_name": "Sergey",
-      "surname": "Volobuev",
-      "role": "author"
+      "role": "author",
+      "surname": "Volobuev"
     },
     {
+      "given_name": "Michael",
       "index": 69,
       "raw_name": "Michael Weiss",
-      "given_name": "Michael",
-      "surname": "Weiss",
-      "role": "author"
+      "role": "author",
+      "surname": "Weiss"
     },
     {
+      "given_name": "Christian",
       "index": 70,
       "raw_name": "Christian Wurzbacher",
-      "given_name": "Christian",
-      "surname": "Wurzbacher",
-      "role": "author"
+      "role": "author",
+      "surname": "Wurzbacher"
     },
     {
       "raw_name": "Kessy Abarenkov"
@@ -531,12 +511,32 @@
       "raw_name": "NHM UT-University Of Tartu; Natural History Museum And Botanic Garden"
     }
   ],
+  "ext_ids": {
+    "doi": "10.15156/bio/sh409843.07fu"
+  },
+  "extra": {
+    "datacite": {
+      "license": [
+        {
+          "rights": "Attribution-NonCommercial (CC BY-NC)",
+          "rightsUri": "http://creativecommons.org/licenses/by-nc/4.0"
+        }
+      ],
+      "metadataVersion": 1,
+      "resourceType": "Dataset/UNITE Species Hypothesis",
+      "resourceTypeGeneral": "Dataset",
+      "schemaVersion": "http://datacite.org/schema/kernel-3"
+    },
+    "release_month": 10
+  },
+  "language": "en",
+  "license_slug": "CC-BY-NC",
+  "publisher": "UNITE Community",
   "refs": [],
-  "abstracts": [
-    {
-      "content": "UNITE provides a unified way for delimiting, identifying, communicating, and working with DNA-based Species Hypotheses (SH). All fungal ITS sequences in the international nucleotide sequence databases are clustered to approximately the species level by applying a set of dynamic distance values (<0.5 - 3.0%). All species hypotheses are given a unique, stable name in the form of a DOI, and their taxonomic and ecological annotations are verified through distributed, web-based third-party annotation efforts. SHs are connected to a taxon name and its classification as far as possible (phylum, class, order, etc.) by taking into account identifications for all sequences in the SH. An automatically or manually designated sequence is chosen to represent each such SH. These sequences are released (https://unite.ut.ee/repository.php) for use by the scientific community in, for example, local sequence similarity searches and next-generation sequencing analysis pipelines. The system and the data are updated automatically as the number of public fungal ITS sequences grows.",
-      "mimetype": "text/plain",
-      "lang": "en"
-    }
-  ]
+  "release_date": "2014-10-05",
+  "release_stage": "published",
+  "release_type": "dataset",
+  "release_year": 2014,
+  "subtitle": "Gomphales",
+  "title": "SH409843.07FU"
 }
diff --git a/python/tests/files/datacite/datacite_result_06.json b/python/tests/files/datacite/datacite_result_06.json
index 18880100..4f6cae94 100644
--- a/python/tests/files/datacite/datacite_result_06.json
+++ b/python/tests/files/datacite/datacite_result_06.json
@@ -1,4 +1,15 @@
 {
+  "abstracts": [],
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "Crispijn De Passe (Der Ältere) (1564-1637)",
+      "role": "author"
+    }
+  ],
+  "ext_ids": {
+    "doi": "10.16903/ethz-grs-d_006220"
+  },
   "extra": {
     "datacite": {
       "license": [
@@ -11,19 +22,8 @@
       "schemaVersion": "http://datacite.org/schema/kernel-3"
     }
   },
-  "title": "Der Eifer (Sedulitas), Blatt 7 der Folge \"Die Tugenden\"",
+  "refs": [],
   "release_type": "article",
   "release_year": 1590,
-  "ext_ids": {
-    "doi": "10.16903/ethz-grs-d_006220"
-  },
-  "contribs": [
-    {
-      "index": 0,
-      "raw_name": "Crispijn De Passe (Der Ältere) (1564-1637)",
-      "role": "author"
-    }
-  ],
-  "refs": [],
-  "abstracts": []
+  "title": "Der Eifer (Sedulitas), Blatt 7 der Folge \"Die Tugenden\""
 }
diff --git a/python/tests/files/datacite/datacite_result_07.json b/python/tests/files/datacite/datacite_result_07.json
index 23b63d50..2f500925 100644
--- a/python/tests/files/datacite/datacite_result_07.json
+++ b/python/tests/files/datacite/datacite_result_07.json
@@ -1,6 +1,46 @@
 {
+  "abstracts": [
+    {
+      "content": "The purpose of the ISEC concept is to provide a high-efficient heat pump system for hot water production. The ISEC concept uses two storage tanks for the water, one discharged and one charged. Hot water for the industrial process is tapped from the charged tank, while the other tank is charging. Charging is done by circulating the water in the tank through the condenser of a heat pump several times and thereby gradually heating the water. The charging is done with a higher mass flow rate than the discharging to reach several circulations of the water during the time frame of one discharging. This result in a lower condensing temperature than if the water was heated in one step. Two test setups were built, one to test the performance of the heat pump gradually heating the water and one to investigate the stratification in the storage tanks. Furthermore, a dynamic model of the system was implemented in Dymola, and validated by the use of test data from the two experimental setups. This paper shows that there is a good consistency between the model and the experimental tests.",
+      "lang": "en",
+      "mimetype": "text/plain"
+    }
+  ],
+  "contribs": [
+    {
+      "given_name": "E.",
+      "index": 0,
+      "raw_name": "E. ROTHUIZEN",
+      "role": "author",
+      "surname": "ROTHUIZEN"
+    },
+    {
+      "given_name": "B.",
+      "index": 1,
+      "raw_name": "B. ELMEGAARD",
+      "role": "author",
+      "surname": "ELMEGAARD"
+    },
+    {
+      "given_name": "B.",
+      "index": 2,
+      "raw_name": "B. MARKUSSEN W.",
+      "role": "author",
+      "surname": "MARKUSSEN W."
+    },
+    {
+      "index": 3,
+      "raw_name": "Et Al.",
+      "role": "author"
+    }
+  ],
+  "ext_ids": {
+    "doi": "10.18462/iir.icr.2015.0926"
+  },
   "extra": {
     "datacite": {
+      "resourceType": "Dataset",
+      "resourceTypeGeneral": "Dataset",
       "subjects": [
         {
           "subject": "HEAT PUMP"
@@ -23,54 +63,14 @@
         {
           "subject": "MODEL"
         }
-      ],
-      "resourceType": "Dataset",
-      "resourceTypeGeneral": "Dataset"
+      ]
     }
   },
-  "title": "High efficient heat pump system using storage tanks to increase cop by means of the ISEC concept. 1: model validation.",
-  "release_type": "dataset",
-  "release_stage": "published",
-  "release_year": 2015,
-  "ext_ids": {
-    "doi": "10.18462/iir.icr.2015.0926"
-  },
-  "publisher": "International Institute of Refrigeration (IIR)",
   "language": "en",
-  "contribs": [
-    {
-      "index": 0,
-      "raw_name": "E. ROTHUIZEN",
-      "given_name": "E.",
-      "surname": "ROTHUIZEN",
-      "role": "author"
-    },
-    {
-      "index": 1,
-      "raw_name": "B. ELMEGAARD",
-      "given_name": "B.",
-      "surname": "ELMEGAARD",
-      "role": "author"
-    },
-    {
-      "index": 2,
-      "raw_name": "B. MARKUSSEN W.",
-      "given_name": "B.",
-      "surname": "MARKUSSEN W.",
-      "role": "author"
-    },
-    {
-      "index": 3,
-      "raw_name": "Et Al.",
-      "role": "author"
-    }
-  ],
+  "publisher": "International Institute of Refrigeration (IIR)",
   "refs": [],
-  "abstracts": [
-    {
-      "content": "The purpose of the ISEC concept is to provide a high-efficient heat pump system for hot water production. The ISEC concept uses two storage tanks for the water, one discharged and one charged. Hot water for the industrial process is tapped from the charged tank, while the other tank is charging. Charging is done by circulating the water in the tank through the condenser of a heat pump several times and thereby gradually heating the water. The charging is done with a higher mass flow rate than the discharging to reach several circulations of the water during the time frame of one discharging. This result in a lower condensing temperature than if the water was heated in one step. Two test setups were built, one to test the performance of the heat pump gradually heating the water and one to investigate the stratification in the storage tanks. Furthermore, a dynamic model of the system was implemented in Dymola, and validated by the use of test data from the two experimental setups. This paper shows that there is a good consistency between the model and the experimental tests.",
-      "mimetype": "text/plain",
-      "lang": "en"
-    }
-  ]
+  "release_stage": "published",
+  "release_type": "dataset",
+  "release_year": 2015,
+  "title": "High efficient heat pump system using storage tanks to increase cop by means of the ISEC concept. 1: model validation."
 }
diff --git a/python/tests/files/datacite/datacite_result_08.json b/python/tests/files/datacite/datacite_result_08.json
index ff942d0a..70237280 100644
--- a/python/tests/files/datacite/datacite_result_08.json
+++ b/python/tests/files/datacite/datacite_result_08.json
@@ -1,6 +1,35 @@
 {
+  "abstracts": [
+    {
+      "content": "International society recognizes that the scarcity of fresh water is increasing and farming sectors suffer from lack of irrigation water. However, if we look at this issue with a framework of relative factor endowment, a different view will arise. In emerging states with rapid industrialization and labor migration, labor scarcity increases at a faster pace than that of irrigation water. Using the historical review of Japan's irrigation policies as well as the case studies of India and China, this paper shows that the introduction of policies which do not reflect the actual relative resource scarcity may mislead the development path. We argue that under increasing relative labor scarcity it is important to realize the substitution of capital for labor for surface irrigation system management and that the substitution needs public support because the service of surface irrigation system has some externalities. Through this argument, this paper also intends to shed the light back to the role of the state for local resource management which seems to be unfairly undervalued since the boom of community participatory approach in the 1980s.",
+      "lang": "en",
+      "mimetype": "text/plain"
+    }
+  ],
+  "contribs": [
+    {
+      "given_name": "Kei",
+      "index": 0,
+      "raw_name": "Kei Kajisa",
+      "role": "author",
+      "surname": "Kajisa"
+    },
+    {
+      "given_name": "Kei",
+      "index": 1,
+      "raw_name": "Kei Kajisa",
+      "role": "author",
+      "surname": "Kajisa"
+    }
+  ],
+  "ext_ids": {
+    "doi": "10.22004/ag.econ.284864"
+  },
   "extra": {
     "datacite": {
+      "metadataVersion": 1,
+      "resourceType": "Text",
+      "resourceTypeGeneral": "Text",
       "subjects": [
         {
           "subject": "Land Economics/Use"
@@ -17,41 +46,12 @@
           "subject": "collective action",
           "subjectScheme": "keyword"
         }
-      ],
-      "metadataVersion": 1,
-      "resourceType": "Text",
-      "resourceTypeGeneral": "Text"
+      ]
     }
   },
-  "title": "Irrigation Policies under Rapid Industrialization and Labor Migration: Lessons from Japan, China and India",
-  "release_type": "article-journal",
-  "release_year": 2017,
-  "ext_ids": {
-    "doi": "10.22004/ag.econ.284864"
-  },
   "language": "en",
-  "contribs": [
-    {
-      "index": 0,
-      "raw_name": "Kei Kajisa",
-      "given_name": "Kei",
-      "surname": "Kajisa",
-      "role": "author"
-    },
-    {
-      "index": 1,
-      "raw_name": "Kei Kajisa",
-      "given_name": "Kei",
-      "surname": "Kajisa",
-      "role": "author"
-    }
-  ],
   "refs": [],
-  "abstracts": [
-    {
-      "content": "International society recognizes that the scarcity of fresh water is increasing and farming sectors suffer from lack of irrigation water. However, if we look at this issue with a framework of relative factor endowment, a different view will arise. In emerging states with rapid industrialization and labor migration, labor scarcity increases at a faster pace than that of irrigation water. Using the historical review of Japan's irrigation policies as well as the case studies of India and China, this paper shows that the introduction of policies which do not reflect the actual relative resource scarcity may mislead the development path. We argue that under increasing relative labor scarcity it is important to realize the substitution of capital for labor for surface irrigation system management and that the substitution needs public support because the service of surface irrigation system has some externalities. Through this argument, this paper also intends to shed the light back to the role of the state for local resource management which seems to be unfairly undervalued since the boom of community participatory approach in the 1980s.",
-      "mimetype": "text/plain",
-      "lang": "en"
-    }
-  ]
+  "release_type": "article-journal",
+  "release_year": 2017,
+  "title": "Irrigation Policies under Rapid Industrialization and Labor Migration: Lessons from Japan, China and India"
 }
diff --git a/python/tests/files/datacite/datacite_result_09.json b/python/tests/files/datacite/datacite_result_09.json
index c93dc769..79571360 100644
--- a/python/tests/files/datacite/datacite_result_09.json
+++ b/python/tests/files/datacite/datacite_result_09.json
@@ -1,37 +1,12 @@
 {
-  "extra": {
-    "datacite": {
-      "subjects": [
-        {
-          "subject": "Direktdiodenlasersysteme"
-        },
-        {
-          "subject": "Physics",
-          "subjectScheme": "linsearch"
-        }
-      ],
-      "metadataVersion": 9,
-      "resourceType": "Report",
-      "resourceTypeGeneral": "Text",
-      "schemaVersion": "http://datacite.org/schema/kernel-4"
-    }
-  },
-  "title": "BrightLas : TP3.3. Module für Direktdiodenstrahlquellen bis 4kW und Untersuchungen zur Leistungsskalierung (Diodemodul) : zum Verbundvorhaben Direktdiodenlaseranlagen und -systeme (VP3) im Förderschwerpunkt innovative regionale Wachstumskerne, BMBF : Abschlussbericht",
-  "release_type": "report",
-  "release_stage": "published",
-  "release_year": 2016,
-  "ext_ids": {
-    "doi": "10.2314/gbv:880813733"
-  },
-  "publisher": "[Lumics GmbH]",
-  "language": "de",
+  "abstracts": [],
   "contribs": [
     {
+      "given_name": "Nils",
       "index": 0,
       "raw_name": "Nils Kirstaedter",
-      "given_name": "Nils",
-      "surname": "Kirstaedter",
-      "role": "author"
+      "role": "author",
+      "surname": "Kirstaedter"
     },
     {
       "extra": {
@@ -39,13 +14,38 @@
       }
     },
     {
-      "raw_name": "Technische Informationsbibliothek (TIB)",
       "extra": {
         "type": "DataManager"
-      }
+      },
+      "raw_name": "Technische Informationsbibliothek (TIB)"
     }
   ],
+  "ext_ids": {
+    "doi": "10.2314/gbv:880813733"
+  },
+  "extra": {
+    "datacite": {
+      "metadataVersion": 9,
+      "resourceType": "Report",
+      "resourceTypeGeneral": "Text",
+      "schemaVersion": "http://datacite.org/schema/kernel-4",
+      "subjects": [
+        {
+          "subject": "Direktdiodenlasersysteme"
+        },
+        {
+          "subject": "Physics",
+          "subjectScheme": "linsearch"
+        }
+      ]
+    }
+  },
+  "language": "de",
+  "publisher": "[Lumics GmbH]",
   "refs": [],
-  "abstracts": [],
+  "release_stage": "published",
+  "release_type": "report",
+  "release_year": 2016,
+  "title": "BrightLas : TP3.3. Module für Direktdiodenstrahlquellen bis 4kW und Untersuchungen zur Leistungsskalierung (Diodemodul) : zum Verbundvorhaben Direktdiodenlaseranlagen und -systeme (VP3) im Förderschwerpunkt innovative regionale Wachstumskerne, BMBF : Abschlussbericht",
   "version": "1.0"
 }
diff --git a/python/tests/files/datacite/datacite_result_10.json b/python/tests/files/datacite/datacite_result_10.json
index 8dea8957..1d39feb0 100644
--- a/python/tests/files/datacite/datacite_result_10.json
+++ b/python/tests/files/datacite/datacite_result_10.json
@@ -1,6 +1,20 @@
 {
+  "abstracts": [],
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "Unknown",
+      "role": "author"
+    }
+  ],
+  "ext_ids": {
+    "doi": "10.25549/wpacards-m6171"
+  },
   "extra": {
     "datacite": {
+      "resourceType": "Dataset",
+      "resourceTypeGeneral": "Dataset",
+      "schemaVersion": "http://datacite.org/schema/kernel-4",
       "subjects": [
         {
           "subject": "housing areas"
@@ -8,28 +22,14 @@
         {
           "subject": "Dwellings"
         }
-      ],
-      "resourceType": "Dataset",
-      "resourceTypeGeneral": "Dataset",
-      "schemaVersion": "http://datacite.org/schema/kernel-4"
+      ]
     }
   },
-  "title": "WPA household census for 210 E VERNON, Los Angeles",
-  "release_type": "dataset",
-  "release_stage": "published",
-  "release_year": 2012,
-  "ext_ids": {
-    "doi": "10.25549/wpacards-m6171"
-  },
-  "publisher": "University of Southern California Digital Library (USC.DL)",
   "language": "en",
-  "contribs": [
-    {
-      "index": 0,
-      "raw_name": "Unknown",
-      "role": "author"
-    }
-  ],
+  "publisher": "University of Southern California Digital Library (USC.DL)",
   "refs": [],
-  "abstracts": []
+  "release_stage": "published",
+  "release_type": "dataset",
+  "release_year": 2012,
+  "title": "WPA household census for 210 E VERNON, Los Angeles"
 }
diff --git a/python/tests/files/datacite/datacite_result_11.json b/python/tests/files/datacite/datacite_result_11.json
index 944ca718..761a99c9 100644
--- a/python/tests/files/datacite/datacite_result_11.json
+++ b/python/tests/files/datacite/datacite_result_11.json
@@ -1,4 +1,15 @@
 {
+  "abstracts": [],
+  "contribs": [
+    {
+      "index": 0,
+      "raw_name": "Comet Photo AG (Zürich)",
+      "role": "author"
+    }
+  ],
+  "ext_ids": {
+    "doi": "10.3932/ethz-a-000055869"
+  },
   "extra": {
     "datacite": {
       "metadataVersion": 6,
@@ -6,22 +17,11 @@
       "schemaVersion": "http://datacite.org/schema/kernel-3"
     }
   },
-  "title": "N1 bei Safenwil",
-  "release_type": "graphic",
-  "release_stage": "published",
-  "release_year": 1965,
-  "ext_ids": {
-    "doi": "10.3932/ethz-a-000055869"
-  },
-  "publisher": "ETH-Bibliothek Zürich, Bildarchiv",
   "language": "de",
-  "contribs": [
-    {
-      "index": 0,
-      "raw_name": "Comet Photo AG (Zürich)",
-      "role": "author"
-    }
-  ],
+  "publisher": "ETH-Bibliothek Zürich, Bildarchiv",
   "refs": [],
-  "abstracts": []
+  "release_stage": "published",
+  "release_type": "graphic",
+  "release_year": 1965,
+  "title": "N1 bei Safenwil"
 }
diff --git a/python/tests/files/datacite/datacite_result_12.json b/python/tests/files/datacite/datacite_result_12.json
index 6977ecea..4e966d6c 100644
--- a/python/tests/files/datacite/datacite_result_12.json
+++ b/python/tests/files/datacite/datacite_result_12.json
@@ -1,49 +1,49 @@
 {
-  "extra": {
-    "datacite": {
-      "resourceTypeGeneral": "Text"
-    },
-    "release_month": 6
-  },
-  "title": "Anthropometric and Physiological Profile of Mixed Martial Art Athletes: A Brief Review",
-  "release_type": "article-journal",
-  "release_stage": "published",
-  "release_date": "2019-06-14",
-  "release_year": 2019,
-  "ext_ids": {
-    "doi": "10.5167/uzh-171449"
-  },
-  "publisher": "MDPI Publishing",
+  "abstracts": [],
   "contribs": [
     {
+      "given_name": "Charalampos",
       "index": 0,
       "raw_name": "Charalampos Spanias",
-      "given_name": "Charalampos",
-      "surname": "Spanias",
-      "role": "author"
+      "role": "author",
+      "surname": "Spanias"
     },
     {
+      "given_name": "Pantelis T",
       "index": 1,
       "raw_name": "Pantelis T Nikolaidis",
-      "given_name": "Pantelis T",
-      "surname": "Nikolaidis",
-      "role": "author"
+      "role": "author",
+      "surname": "Nikolaidis"
     },
     {
+      "given_name": "Thomas",
       "index": 2,
       "raw_name": "Thomas Rosemann",
-      "given_name": "Thomas",
-      "surname": "Rosemann",
-      "role": "author"
+      "role": "author",
+      "surname": "Rosemann"
     },
     {
+      "given_name": "Beat",
       "index": 3,
       "raw_name": "Beat Knechtle",
-      "given_name": "Beat",
-      "surname": "Knechtle",
-      "role": "author"
+      "role": "author",
+      "surname": "Knechtle"
     }
   ],
+  "ext_ids": {
+    "doi": "10.5167/uzh-171449"
+  },
+  "extra": {
+    "datacite": {
+      "resourceTypeGeneral": "Text"
+    },
+    "release_month": 6
+  },
+  "publisher": "MDPI Publishing",
   "refs": [],
-  "abstracts": []
+  "release_date": "2019-06-14",
+  "release_stage": "published",
+  "release_type": "article-journal",
+  "release_year": 2019,
+  "title": "Anthropometric and Physiological Profile of Mixed Martial Art Athletes: A Brief Review"
 }
diff --git a/python/tests/files/datacite/datacite_result_13.json b/python/tests/files/datacite/datacite_result_13.json
index 91126c5a..923f2ea8 100644
--- a/python/tests/files/datacite/datacite_result_13.json
+++ b/python/tests/files/datacite/datacite_result_13.json
@@ -1,22 +1,5 @@
 {
-  "extra": {
-    "datacite": {
-      "metadataVersion": 17,
-      "resourceType": "Journal Article",
-      "resourceTypeGeneral": "Text",
-      "schemaVersion": "http://datacite.org/schema/kernel-3"
-    },
-    "release_month": 10
-  },
-  "title": "[Müssen wir des Glücks uns schämen?]",
-  "release_type": "article-journal",
-  "release_stage": "published",
-  "release_date": "1940-10-05",
-  "release_year": 1940,
-  "ext_ids": {
-    "doi": "10.5169/seals-314104"
-  },
-  "publisher": "Buchdruckerei Büchler & Co.",
+  "abstracts": [],
   "contribs": [
     {
       "index": 0,
@@ -24,13 +7,30 @@
       "role": "author"
     },
     {
+      "given_name": "Hermann",
       "index": 1,
       "raw_name": "Hermann Hiltbrunner",
-      "given_name": "Hermann",
-      "surname": "Hiltbrunner",
-      "role": "author"
+      "role": "author",
+      "surname": "Hiltbrunner"
     }
   ],
+  "ext_ids": {
+    "doi": "10.5169/seals-314104"
+  },
+  "extra": {
+    "datacite": {
+      "metadataVersion": 17,
+      "resourceType": "Journal Article",
+      "resourceTypeGeneral": "Text",
+      "schemaVersion": "http://datacite.org/schema/kernel-3"
+    },
+    "release_month": 10
+  },
+  "publisher": "Buchdruckerei Büchler & Co.",
   "refs": [],
-  "abstracts": []
+  "release_date": "1940-10-05",
+  "release_stage": "published",
+  "release_type": "article-journal",
+  "release_year": 1940,
+  "title": "[Müssen wir des Glücks uns schämen?]"
 }
diff --git a/python/tests/files/datacite/datacite_result_14.json b/python/tests/files/datacite/datacite_result_14.json
index 20f6bfd4..2ce68d29 100644
--- a/python/tests/files/datacite/datacite_result_14.json
+++ b/python/tests/files/datacite/datacite_result_14.json
@@ -1,114 +1,114 @@
 {
-  "extra": {
-    "datacite": {
-      "subjects": [
-        {
-          "subject": "Crystal Structure"
-        },
-        {
-          "subject": "Experimental 3D Coordinates"
-        },
-        {
-          "subject": "Crystal System"
-        },
-        {
-          "subject": "Space Group"
-        },
-        {
-          "subject": "Cell Parameters"
-        },
-        {
-          "subject": "Crystallography"
-        },
-        {
-          "subject": "bis(mu~2~-5-(3,5-Di-t-butylphenyl)-15-(4-(2-(diphenylphosphino)ethynyl)phenyl)-2,8,12,18-tetrahexyl-3,7,13,17-tetramethylporphyrinato)-(5,15-bis(3,5-di-t-butylphenyl)-2,8,12,18-tetraethyl-3,7,13,17-tetramethylporphyrinato)-di-nickel-ruthenium chloroform solvate"
-        }
-      ],
-      "relations": [
-        {
-          "relationType": "IsSupplementTo",
-          "relatedIdentifier": "10.1021/ic034699w",
-          "relatedIdentifierType": "DOI"
-        }
-      ],
-      "metadataVersion": 2,
-      "resourceTypeGeneral": "Dataset",
-      "schemaVersion": "http://datacite.org/schema/kernel-3"
+  "abstracts": [
+    {
+      "content": "An entry from the Cambridge Structural Database, the world's repository for small molecule crystal structures. The entry contains experimental data from a crystal diffraction study. The deposited dataset for this entry is freely available from the CCDC and typically includes 3D coordinates, cell parameters, space group, experimental conditions and quality measures.",
+      "lang": "en",
+      "mimetype": "text/plain"
     }
-  },
-  "title": "CCDC 222635: Experimental Crystal Structure Determination",
-  "release_type": "entry",
-  "release_stage": "published",
-  "release_year": 2004,
-  "ext_ids": {
-    "doi": "10.5517/cc7gns3"
-  },
-  "publisher": "Cambridge Crystallographic Data Centre",
-  "language": "en",
+  ],
   "contribs": [
     {
+      "given_name": "E.",
       "index": 0,
       "raw_name": "E. Stulz",
-      "given_name": "E.",
-      "surname": "Stulz",
-      "role": "author"
+      "role": "author",
+      "surname": "Stulz"
     },
     {
+      "given_name": "S.M.",
       "index": 1,
       "raw_name": "S.M. Scott",
-      "given_name": "S.M.",
-      "surname": "Scott",
-      "role": "author"
+      "role": "author",
+      "surname": "Scott"
     },
     {
+      "given_name": "Yiu-Fai",
       "index": 2,
       "raw_name": "Yiu-Fai Ng",
-      "given_name": "Yiu-Fai",
-      "surname": "Ng",
-      "role": "author"
+      "role": "author",
+      "surname": "Ng"
     },
     {
+      "given_name": "A.D.",
       "index": 3,
       "raw_name": "A.D. Bond",
-      "given_name": "A.D.",
-      "surname": "Bond",
-      "role": "author"
+      "role": "author",
+      "surname": "Bond"
     },
     {
+      "given_name": "S.J.",
       "index": 4,
       "raw_name": "S.J. Teat",
-      "given_name": "S.J.",
-      "surname": "Teat",
-      "role": "author"
+      "role": "author",
+      "surname": "Teat"
     },
     {
+      "given_name": "S.L.",
       "index": 5,
       "raw_name": "S.L. Darling",
-      "given_name": "S.L.",
-      "surname": "Darling",
-      "role": "author"
+      "role": "author",
+      "surname": "Darling"
     },
     {
+      "given_name": "N.",
       "index": 6,
       "raw_name": "N. Feeder",
-      "given_name": "N.",
-      "surname": "Feeder",
-      "role": "author"
+      "role": "author",
+      "surname": "Feeder"
     },
     {
+      "given_name": "J.K.M.",
       "index": 7,
       "raw_name": "J.K.M. Sanders",
-      "given_name": "J.K.M.",
-      "surname": "Sanders",
-      "role": "author"
+      "role": "author",
+      "surname": "Sanders"
     }
   ],
-  "refs": [],
-  "abstracts": [
-    {
-      "content": "An entry from the Cambridge Structural Database, the world's repository for small molecule crystal structures. The entry contains experimental data from a crystal diffraction study. The deposited dataset for this entry is freely available from the CCDC and typically includes 3D coordinates, cell parameters, space group, experimental conditions and quality measures.",
-      "mimetype": "text/plain",
-      "lang": "en"
+  "ext_ids": {
+    "doi": "10.5517/cc7gns3"
+  },
+  "extra": {
+    "datacite": {
+      "metadataVersion": 2,
+      "relations": [
+        {
+          "relatedIdentifier": "10.1021/ic034699w",
+          "relatedIdentifierType": "DOI",
+          "relationType": "IsSupplementTo"
+        }
+      ],
+      "resourceTypeGeneral": "Dataset",
+      "schemaVersion": "http://datacite.org/schema/kernel-3",
+      "subjects": [
+        {
+          "subject": "Crystal Structure"
+        },
+        {
+          "subject": "Experimental 3D Coordinates"
+        },
+        {
+          "subject": "Crystal System"
+        },
+        {
+          "subject": "Space Group"
+        },
+        {
+          "subject": "Cell Parameters"
+        },
+        {
+          "subject": "Crystallography"
+        },
+        {
+          "subject": "bis(mu~2~-5-(3,5-Di-t-butylphenyl)-15-(4-(2-(diphenylphosphino)ethynyl)phenyl)-2,8,12,18-tetrahexyl-3,7,13,17-tetramethylporphyrinato)-(5,15-bis(3,5-di-t-butylphenyl)-2,8,12,18-tetraethyl-3,7,13,17-tetramethylporphyrinato)-di-nickel-ruthenium chloroform solvate"
+        }
+      ]
     }
-  ]
+  },
+  "language": "en",
+  "publisher": "Cambridge Crystallographic Data Centre",
+  "refs": [],
+  "release_stage": "published",
+  "release_type": "entry",
+  "release_year": 2004,
+  "title": "CCDC 222635: Experimental Crystal Structure Determination"
 }
diff --git a/python/tests/files/datacite/datacite_result_15.json b/python/tests/files/datacite/datacite_result_15.json
index 3a03dfb6..5e7180c4 100644
--- a/python/tests/files/datacite/datacite_result_15.json
+++ b/python/tests/files/datacite/datacite_result_15.json
@@ -1,4 +1,17 @@
 {
+  "abstracts": [],
+  "contribs": [
+    {
+      "given_name": "David",
+      "index": 0,
+      "raw_name": "David Richardson",
+      "role": "author",
+      "surname": "Richardson"
+    }
+  ],
+  "ext_ids": {
+    "doi": "10.6073/pasta/95296d8416aae24f3d39b4ecb27f0b28"
+  },
   "extra": {
     "datacite": {
       "metadataVersion": 1,
@@ -7,23 +20,10 @@
       "schemaVersion": "http://datacite.org/schema/kernel-2.2"
     }
   },
-  "title": "Parramore Island of the Virginia Coast Reserve Permanent Plot Resurvey: Tree data 1997",
-  "release_type": "dataset",
-  "release_stage": "published",
-  "release_year": 2017,
-  "ext_ids": {
-    "doi": "10.6073/pasta/95296d8416aae24f3d39b4ecb27f0b28"
-  },
   "publisher": "Environmental Data Initiative",
-  "contribs": [
-    {
-      "index": 0,
-      "raw_name": "David Richardson",
-      "given_name": "David",
-      "surname": "Richardson",
-      "role": "author"
-    }
-  ],
   "refs": [],
-  "abstracts": []
+  "release_stage": "published",
+  "release_type": "dataset",
+  "release_year": 2017,
+  "title": "Parramore Island of the Virginia Coast Reserve Permanent Plot Resurvey: Tree data 1997"
 }
diff --git a/python/tests/files/datacite/datacite_result_16.json b/python/tests/files/datacite/datacite_result_16.json
index 8cf762b6..dc9d18af 100644
--- a/python/tests/files/datacite/datacite_result_16.json
+++ b/python/tests/files/datacite/datacite_result_16.json
@@ -1,4 +1,17 @@
 {
+  "abstracts": [],
+  "contribs": [
+    {
+      "given_name": "Taha",
+      "index": 0,
+      "raw_name": "Taha Sochi",
+      "role": "author",
+      "surname": "Sochi"
+    }
+  ],
+  "ext_ids": {
+    "doi": "10.6084/m9.figshare.1282478"
+  },
   "extra": {
     "datacite": {
       "license": [
@@ -12,23 +25,10 @@
       "schemaVersion": "http://datacite.org/schema/kernel-3"
     }
   },
-  "title": "Testing the Connectivity of Networks",
-  "release_type": "dataset",
-  "release_stage": "published",
-  "release_year": 2014,
-  "ext_ids": {
-    "doi": "10.6084/m9.figshare.1282478"
-  },
   "publisher": "Figshare",
-  "contribs": [
-    {
-      "index": 0,
-      "raw_name": "Taha Sochi",
-      "given_name": "Taha",
-      "surname": "Sochi",
-      "role": "author"
-    }
-  ],
   "refs": [],
-  "abstracts": []
+  "release_stage": "published",
+  "release_type": "dataset",
+  "release_year": 2014,
+  "title": "Testing the Connectivity of Networks"
 }
diff --git a/python/tests/files/datacite/datacite_result_17.json b/python/tests/files/datacite/datacite_result_17.json
index 6e8c4e34..0f768179 100644
--- a/python/tests/files/datacite/datacite_result_17.json
+++ b/python/tests/files/datacite/datacite_result_17.json
@@ -1,18 +1,5 @@
 {
-  "extra": {
-    "datacite": {
-      "resourceTypeGeneral": "Dataset",
-      "schemaVersion": "http://datacite.org/schema/kernel-4"
-    }
-  },
-  "title": "gel_BSA-FITC_Markov_segmntation0343.tif",
-  "release_type": "dataset",
-  "release_stage": "published",
-  "release_year": 2018,
-  "ext_ids": {
-    "doi": "10.7910/dvn/tsqfwc/yytj22"
-  },
-  "publisher": "Harvard Dataverse",
+  "abstracts": [],
   "contribs": [
     {
       "index": 0,
@@ -20,6 +7,19 @@
       "role": "author"
     }
   ],
+  "ext_ids": {
+    "doi": "10.7910/dvn/tsqfwc/yytj22"
+  },
+  "extra": {
+    "datacite": {
+      "resourceTypeGeneral": "Dataset",
+      "schemaVersion": "http://datacite.org/schema/kernel-4"
+    }
+  },
+  "publisher": "Harvard Dataverse",
   "refs": [],
-  "abstracts": []
+  "release_stage": "published",
+  "release_type": "dataset",
+  "release_year": 2018,
+  "title": "gel_BSA-FITC_Markov_segmntation0343.tif"
 }
diff --git a/python/tests/files/datacite/datacite_result_18.json b/python/tests/files/datacite/datacite_result_18.json
index 6e69bad2..7f2d2792 100644
--- a/python/tests/files/datacite/datacite_result_18.json
+++ b/python/tests/files/datacite/datacite_result_18.json
@@ -1,4 +1,9 @@
 {
+  "abstracts": [],
+  "contribs": [],
+  "ext_ids": {
+    "doi": "10.7916/d81z522m"
+  },
   "extra": {
     "datacite": {
       "metadataVersion": 2,
@@ -6,16 +11,11 @@
     },
     "release_month": 8
   },
-  "title": "Eastern questionnaire, answer sheet for Interviewee 53215, page 064",
-  "release_type": "article",
-  "release_stage": "published",
-  "release_date": "2017-08-21",
-  "release_year": 2017,
-  "ext_ids": {
-    "doi": "10.7916/d81z522m"
-  },
   "publisher": "Columbia University",
-  "contribs": [],
   "refs": [],
-  "abstracts": []
+  "release_date": "2017-08-21",
+  "release_stage": "published",
+  "release_type": "article",
+  "release_year": 2017,
+  "title": "Eastern questionnaire, answer sheet for Interviewee 53215, page 064"
 }
diff --git a/python/tests/files/datacite/datacite_result_19.json b/python/tests/files/datacite/datacite_result_19.json
index 2f2f217e..4ff00a56 100644
--- a/python/tests/files/datacite/datacite_result_19.json
+++ b/python/tests/files/datacite/datacite_result_19.json
@@ -1,4 +1,9 @@
 {
+  "abstracts": [],
+  "contribs": [],
+  "ext_ids": {
+    "doi": "10.7916/d86x0cg1"
+  },
   "extra": {
     "datacite": {
       "metadataVersion": 3,
@@ -6,16 +11,11 @@
     },
     "release_month": 8
   },
-  "title": "Eastern questionnaire, answer sheet for Interviewee 55236, page 092",
-  "release_type": "article",
-  "release_stage": "published",
-  "release_date": "2017-08-24",
-  "release_year": 2017,
-  "ext_ids": {
-    "doi": "10.7916/d86x0cg1"
-  },
   "publisher": "Columbia University",
-  "contribs": [],
   "refs": [],
-  "abstracts": []
+  "release_date": "2017-08-24",
+  "release_stage": "published",
+  "release_type": "article",
+  "release_year": 2017,
+  "title": "Eastern questionnaire, answer sheet for Interviewee 55236, page 092"
 }
diff --git a/python/tests/files/datacite/datacite_result_20.json b/python/tests/files/datacite/datacite_result_20.json
index 0f99e2a2..5a6d3473 100644
--- a/python/tests/files/datacite/datacite_result_20.json
+++ b/python/tests/files/datacite/datacite_result_20.json
@@ -1,17 +1,17 @@
 {
+  "abstracts": [],
+  "contribs": [],
+  "ext_ids": {
+    "doi": "10.7916/d86x0cg1"
+  },
   "extra": {
     "datacite": {},
     "release_month": 8
   },
-  "title": "<h1>Eastern questionnaire</h1>",
-  "release_type": "article",
-  "release_stage": "published",
+  "refs": [],
   "release_date": "2017-08-24",
+  "release_stage": "published",
+  "release_type": "article",
   "release_year": 2017,
-  "ext_ids": {
-    "doi": "10.7916/d86x0cg1"
-  },
-  "contribs": [],
-  "refs": [],
-  "abstracts": []
+  "title": "<h1>Eastern questionnaire</h1>"
 }
diff --git a/python/tests/files/datacite/datacite_result_21.json b/python/tests/files/datacite/datacite_result_21.json
index 3dfcf1bf..54c22538 100644
--- a/python/tests/files/datacite/datacite_result_21.json
+++ b/python/tests/files/datacite/datacite_result_21.json
@@ -1,18 +1,18 @@
 {
+  "abstracts": [],
+  "contribs": [],
+  "ext_ids": {
+    "doi": "10.7916/d86x0cg1"
+  },
   "extra": {
     "datacite": {},
     "release_month": 8
   },
-  "title": "ABC",
-  "release_type": "article",
-  "release_stage": "published",
-  "release_date": "2017-08-24",
-  "release_year": 2017,
-  "ext_ids": {
-    "doi": "10.7916/d86x0cg1"
-  },
   "language": "de",
-  "contribs": [],
   "refs": [],
-  "abstracts": []
+  "release_date": "2017-08-24",
+  "release_stage": "published",
+  "release_type": "article",
+  "release_year": 2017,
+  "title": "ABC"
 }
diff --git a/python/tests/files/datacite/datacite_result_22.json b/python/tests/files/datacite/datacite_result_22.json
index bd88c358..913fbbb6 100644
--- a/python/tests/files/datacite/datacite_result_22.json
+++ b/python/tests/files/datacite/datacite_result_22.json
@@ -1,25 +1,25 @@
 {
-  "extra": {
-    "datacite": {},
-    "release_month": 8
-  },
-  "title": "ABC",
-  "release_type": "article",
-  "release_stage": "published",
-  "release_date": "2017-08-24",
-  "release_year": 2017,
-  "ext_ids": {
-    "doi": "10.7916/d86x0cg1"
-  },
-  "language": "de",
+  "abstracts": [],
   "contribs": [
     {
       "index": 0,
+      "raw_affiliation": "Department of pataphysics",
       "raw_name": "Anton Welch",
-      "role": "author",
-      "raw_affiliation": "Department of pataphysics"
+      "role": "author"
     }
   ],
+  "ext_ids": {
+    "doi": "10.7916/d86x0cg1"
+  },
+  "extra": {
+    "datacite": {},
+    "release_month": 8
+  },
+  "language": "de",
   "refs": [],
-  "abstracts": []
+  "release_date": "2017-08-24",
+  "release_stage": "published",
+  "release_type": "article",
+  "release_year": 2017,
+  "title": "ABC"
 }
diff --git a/python/tests/files/datacite/datacite_result_23.json b/python/tests/files/datacite/datacite_result_23.json
index e82925af..0ac6a06d 100644
--- a/python/tests/files/datacite/datacite_result_23.json
+++ b/python/tests/files/datacite/datacite_result_23.json
@@ -1,25 +1,25 @@
 {
-  "extra": {
-    "datacite": {},
-    "release_month": 8
-  },
-  "title": "ABC",
-  "release_type": "article",
-  "release_stage": "published",
-  "release_date": "2017-08-24",
-  "release_year": 2017,
-  "ext_ids": {
-    "doi": "10.7916/d86x0cg1-xxx"
-  },
-  "language": "de",
+  "abstracts": [],
   "contribs": [
     {
       "index": 0,
+      "raw_affiliation": "Department of pataphysics",
       "raw_name": "Anton Welch",
-      "role": "author",
-      "raw_affiliation": "Department of pataphysics"
+      "role": "author"
     }
   ],
+  "ext_ids": {
+    "doi": "10.7916/d86x0cg1-xxx"
+  },
+  "extra": {
+    "datacite": {},
+    "release_month": 8
+  },
+  "language": "de",
   "refs": [],
-  "abstracts": []
+  "release_date": "2017-08-24",
+  "release_stage": "published",
+  "release_type": "article",
+  "release_year": 2017,
+  "title": "ABC"
 }
diff --git a/python/tests/files/datacite/datacite_result_24.json b/python/tests/files/datacite/datacite_result_24.json
index 2d95d300..cd9898f9 100644
--- a/python/tests/files/datacite/datacite_result_24.json
+++ b/python/tests/files/datacite/datacite_result_24.json
@@ -1,25 +1,25 @@
 {
-  "extra": {
-    "datacite": {},
-    "release_month": 8
-  },
-  "title": "ABC",
-  "subtitle": "DEF",
-  "release_type": "article",
-  "release_stage": "published",
-  "release_date": "2017-08-24",
-  "release_year": 2017,
-  "ext_ids": {
-    "doi": "10.7916/d86x0cg1"
-  },
+  "abstracts": [],
   "contribs": [
     {
       "index": 0,
+      "raw_affiliation": "Department of pataphysics",
       "raw_name": "Anton Welch",
-      "role": "author",
-      "raw_affiliation": "Department of pataphysics"
+      "role": "author"
     }
   ],
+  "ext_ids": {
+    "doi": "10.7916/d86x0cg1"
+  },
+  "extra": {
+    "datacite": {},
+    "release_month": 8
+  },
   "refs": [],
-  "abstracts": []
+  "release_date": "2017-08-24",
+  "release_stage": "published",
+  "release_type": "article",
+  "release_year": 2017,
+  "subtitle": "DEF",
+  "title": "ABC"
 }
diff --git a/python/tests/files/datacite/datacite_result_25.json b/python/tests/files/datacite/datacite_result_25.json
index aad6d17e..6a29e8de 100644
--- a/python/tests/files/datacite/datacite_result_25.json
+++ b/python/tests/files/datacite/datacite_result_25.json
@@ -1,25 +1,25 @@
 {
-  "extra": {
-    "datacite": {},
-    "release_month": 8
-  },
-  "title": "Additional file 123: ABC",
-  "subtitle": "DEF",
-  "release_type": "stub",
-  "release_stage": "published",
-  "release_date": "2017-08-24",
-  "release_year": 2017,
-  "ext_ids": {
-    "doi": "10.7916/d86x0cg1"
-  },
+  "abstracts": [],
   "contribs": [
     {
       "index": 0,
+      "raw_affiliation": "Department of pataphysics",
       "raw_name": "Anton Welch",
-      "role": "author",
-      "raw_affiliation": "Department of pataphysics"
+      "role": "author"
     }
   ],
+  "ext_ids": {
+    "doi": "10.7916/d86x0cg1"
+  },
+  "extra": {
+    "datacite": {},
+    "release_month": 8
+  },
   "refs": [],
-  "abstracts": []
+  "release_date": "2017-08-24",
+  "release_stage": "published",
+  "release_type": "stub",
+  "release_year": 2017,
+  "subtitle": "DEF",
+  "title": "Additional file 123: ABC"
 }
diff --git a/python/tests/files/datacite/datacite_result_26.json b/python/tests/files/datacite/datacite_result_26.json
index 8d26197c..267eb9c2 100644
--- a/python/tests/files/datacite/datacite_result_26.json
+++ b/python/tests/files/datacite/datacite_result_26.json
@@ -1,31 +1,33 @@
 {
-  "extra": {
-    "datacite": {},
-    "release_month": 8
-  },
-  "title": "Additional file 123: ABC",
-  "subtitle": "DEF",
-  "release_type": "stub",
-  "release_stage": "published",
-  "release_date": "2017-08-24",
-  "release_year": 2017,
-  "ext_ids": {
-    "doi": "10.7916/d86x0cg1"
-  },
+  "abstracts": [],
   "contribs": [
     {
       "index": 0,
+      "raw_affiliation": "Department of pataphysics",
       "raw_name": "Anton Welch",
-      "role": "author",
-      "raw_affiliation": "Department of pataphysics"
+      "role": "author"
     },
-      {
-        "extra": {"type": "Editor"},
-        "raw_name": "David Wemmer",
-        "given_name": "David",
-        "surname": "Wemmer"
-      }
+    {
+      "extra": {
+        "type": "Editor"
+      },
+      "given_name": "David",
+      "raw_name": "David Wemmer",
+      "surname": "Wemmer"
+    }
   ],
+  "ext_ids": {
+    "doi": "10.7916/d86x0cg1"
+  },
+  "extra": {
+    "datacite": {},
+    "release_month": 8
+  },
   "refs": [],
-  "abstracts": []
+  "release_date": "2017-08-24",
+  "release_stage": "published",
+  "release_type": "stub",
+  "release_year": 2017,
+  "subtitle": "DEF",
+  "title": "Additional file 123: ABC"
 }
-- 
cgit v1.2.3