diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/fatcat_tools/importers/datacite.py | 16 | ||||
| -rw-r--r-- | python/tests/files/datacite/datacite_doc_33.json | 62 | ||||
| -rw-r--r-- | python/tests/files/datacite/datacite_result_05.json | 3 | ||||
| -rw-r--r-- | python/tests/files/datacite/datacite_result_08.json | 7 | ||||
| -rw-r--r-- | python/tests/files/datacite/datacite_result_33.json | 31 | ||||
| -rw-r--r-- | python/tests/import_datacite.py | 2 | 
6 files changed, 110 insertions, 11 deletions
| diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 434a2941..66ec2023 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -298,6 +298,9 @@ class DataciteImporter(EntityImporter):          contribs = self.parse_datacite_creators(creators, doi=doi) + self.parse_datacite_creators(contributors, role=None, set_index=False, doi=doi) +        # Address duplicated author names; use raw_name string comparison; refs #59. +        contribs = unique_contributors(contribs) +          # Title, may come with "attributes.titles[].titleType", like          # "AlternativeTitle", "Other", "Subtitle", "TranslatedTitle"          titles = attributes.get('titles', []) or [] @@ -823,6 +826,19 @@ class DataciteImporter(EntityImporter):          return contribs +def unique_contributors(contribs): +    """ +    Given a list of ReleaseContrib items, return a list of unique +    ReleaseContribs, refs GH #59. +    """ +    unique_names, unique_contribs = set(), [] +    for rc in contribs: +        if rc.raw_name and rc.raw_name in unique_names: +            continue +        unique_names.add(rc.raw_name) +        unique_contribs.append(rc) +    return unique_contribs +  def lookup_license_slug(raw):      """      Resolve a variety of strings into a some pseudo-canonical form, e.g. diff --git a/python/tests/files/datacite/datacite_doc_33.json b/python/tests/files/datacite/datacite_doc_33.json new file mode 100644 index 00000000..571d1220 --- /dev/null +++ b/python/tests/files/datacite/datacite_doc_33.json @@ -0,0 +1,62 @@ +{ +  "id": "10.17912/micropub.biology.000143", +  "type": "dois", +  "attributes": { +    "doi": "10.17912/micropub.biology.000143", +    "identifiers": null, +    "creators": [ +      { +        "name": "ABC News", +        "givenName": "", +        "familyName": "", +        "affiliation": [], +        "role": "author" +      } +    ], +    "titles": [ +      { +        "title": "Sample" +      } +    ], +    "publisher": "microPublication Biology", +    "publicationYear": 2019, +    "types": { +      "resourceTypeGeneral": "DataPaper" +    }, +    "relatedIdentifiers": [], +    "sizes": [], +    "formats": [], +    "version": null, +    "rightsList": [], +    "descriptions": [ +      { +        "description": 1234567890, +        "descriptionType": "Abstract" +      } +    ], +    "geoLocations": [], +    "fundingReferences": [], +    "url": "https://www.micropublication.org/journals/biology/micropub.biology.000143", +    "created": "2019-08-19T14:43:08.000Z", +    "registered": "2019-08-19T14:43:09.000Z", +    "published": "2019", +    "updated": "2019-11-09T12:32:02.000Z", +    "contributors": [ +      { +        "name": "ABC News", +        "givenName": "", +        "familyName": "", +        "affiliation": [], +        "role": "" +      } +    ] +  }, +  "relationships": { +    "client": { +      "data": { +        "id": "caltech.micropub", +        "type": "clients" +      } +    } +  } +} diff --git a/python/tests/files/datacite/datacite_result_05.json b/python/tests/files/datacite/datacite_result_05.json index 79c2a8fb..d634490d 100644 --- a/python/tests/files/datacite/datacite_result_05.json +++ b/python/tests/files/datacite/datacite_result_05.json @@ -505,9 +505,6 @@        "surname": "Wurzbacher"      },      { -      "raw_name": "Kessy Abarenkov" -    }, -    {        "raw_name": "NHM UT-University Of Tartu; Natural History Museum And Botanic Garden"      }    ], diff --git a/python/tests/files/datacite/datacite_result_08.json b/python/tests/files/datacite/datacite_result_08.json index 70237280..5a46ef50 100644 --- a/python/tests/files/datacite/datacite_result_08.json +++ b/python/tests/files/datacite/datacite_result_08.json @@ -13,13 +13,6 @@        "raw_name": "Kei Kajisa",        "role": "author",        "surname": "Kajisa" -    }, -    { -      "given_name": "Kei", -      "index": 1, -      "raw_name": "Kei Kajisa", -      "role": "author", -      "surname": "Kajisa"      }    ],    "ext_ids": { diff --git a/python/tests/files/datacite/datacite_result_33.json b/python/tests/files/datacite/datacite_result_33.json new file mode 100644 index 00000000..bcb72469 --- /dev/null +++ b/python/tests/files/datacite/datacite_result_33.json @@ -0,0 +1,31 @@ +{ +  "abstracts": [ +    { +      "content": "1234567890", +      "mimetype": "text/plain" +    } +  ], +  "contribs": [ +    { +      "given_name": "", +      "surname": "", +      "index": 0, +      "raw_name": "ABC News", +      "role": "author" +    } +  ], +  "ext_ids": { +    "doi": "10.17912/micropub.biology.000143" +  }, +  "extra": { +    "datacite": { +      "resourceTypeGeneral": "DataPaper" +    }, +    "container_name": "microPublication Biology" +  }, +  "refs": [], +  "release_stage": "published", +  "release_year": 2019, +  "publisher": "microPublication Biology", +  "title": "Sample" +} diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py index 20c1eaf8..1472b8ea 100644 --- a/python/tests/import_datacite.py +++ b/python/tests/import_datacite.py @@ -288,7 +288,7 @@ def test_datacite_conversions(datacite_importer):      for now.      """      datacite_importer.debug = True -    for i in range(33): +    for i in range(34):          src = 'tests/files/datacite/datacite_doc_{0:02d}.json'.format(i)          dst = 'tests/files/datacite/datacite_result_{0:02d}.json'.format(i)          with open(src, 'r') as f: | 
