diff options
| author | Martin Czygan <martin.czygan@gmail.com> | 2020-04-20 20:52:10 +0200 | 
|---|---|---|
| committer | Martin Czygan <martin.czygan@gmail.com> | 2020-04-20 20:52:10 +0200 | 
| commit | 7c6febf20c84dd4f5778e1fb02369456f7dad344 (patch) | |
| tree | 851f86e3a5097f7bc3a6df8171dfb08d17fe1820 /python | |
| parent | e749b576bdb9bbec3bcef7e4e389e7fbbf916c84 (diff) | |
| download | fatcat-7c6febf20c84dd4f5778e1fb02369456f7dad344.tar.gz fatcat-7c6febf20c84dd4f5778e1fb02369456f7dad344.zip | |
datacite: fix a raw name constraint violation
It was possible that contribs got added which had no raw name. One
example would be a name consisting of whitespace only.
This fix adds a final check for this case.
Diffstat (limited to 'python')
| -rw-r--r-- | python/fatcat_tools/importers/datacite.py | 8 | ||||
| -rw-r--r-- | python/tests/files/datacite/datacite_doc_31.json | 53 | ||||
| -rw-r--r-- | python/tests/files/datacite/datacite_result_31.json | 24 | ||||
| -rw-r--r-- | python/tests/import_datacite.py | 2 | 
4 files changed, 86 insertions, 1 deletions
| diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 81f00876..244984f5 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -758,6 +758,14 @@ class DataciteImporter(EntityImporter):                      given_name = clean(given_name)                  if surname:                      surname = clean(surname) + +                # Perform a final assertion that name does not reduce to zero +                # (e.g. whitespace only name). +                if name: +                    name = name.strip() +                if not name: +                    continue +                  if raw_affiliation == '':                      continue diff --git a/python/tests/files/datacite/datacite_doc_31.json b/python/tests/files/datacite/datacite_doc_31.json new file mode 100644 index 00000000..83af3e4d --- /dev/null +++ b/python/tests/files/datacite/datacite_doc_31.json @@ -0,0 +1,53 @@ +{ +  "id": "10.17912/micropub.biology.000143", +  "type": "dois", +  "attributes": { +    "doi": "10.17912/micropub.biology.000143", +    "identifiers": null, +    "creators": [ +      { +        "raw_name": " ", +        "givenName": "", +        "familyName": "", +        "affiliation": [], +        "role": "author" +      } +    ], +    "titles": [ +      { +        "title": "Sample" +      } +    ], +    "publisher": "microPublication Biology", +    "publicationYear": 2019, +    "types": { +      "resourceTypeGeneral": "DataPaper" +    }, +    "relatedIdentifiers": [], +    "sizes": [], +    "formats": [], +    "version": null, +    "rightsList": [], +    "descriptions": [ +      { +        "description": "Biological liquid-liquid phase separation", +        "descriptionType": "Abstract" +      } +    ], +    "geoLocations": [], +    "fundingReferences": [], +    "url": "https://www.micropublication.org/journals/biology/micropub.biology.000143", +    "created": "2019-08-19T14:43:08.000Z", +    "registered": "2019-08-19T14:43:09.000Z", +    "published": "2019", +    "updated": "2019-11-09T12:32:02.000Z" +  }, +  "relationships": { +    "client": { +      "data": { +        "id": "caltech.micropub", +        "type": "clients" +      } +    } +  } +} diff --git a/python/tests/files/datacite/datacite_result_31.json b/python/tests/files/datacite/datacite_result_31.json new file mode 100644 index 00000000..193104b0 --- /dev/null +++ b/python/tests/files/datacite/datacite_result_31.json @@ -0,0 +1,24 @@ +{ +  "abstracts": [ +    { +      "content": "Biological liquid-liquid phase separation", +      "lang": "fr", +      "mimetype": "text/plain" +    } +  ], +  "contribs": [], +  "ext_ids": { +    "doi": "10.17912/micropub.biology.000143" +  }, +  "extra": { +    "datacite": { +      "resourceTypeGeneral": "DataPaper" +    }, +    "container_name": "microPublication Biology" +  }, +  "refs": [], +  "release_stage": "published", +  "release_year": 2019, +  "publisher": "microPublication Biology", +  "title": "Sample" +} diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py index 15650375..7fdd8230 100644 --- a/python/tests/import_datacite.py +++ b/python/tests/import_datacite.py @@ -287,7 +287,7 @@ def test_datacite_conversions(datacite_importer):      for now.      """      datacite_importer.debug = True -    for i in range(31): +    for i in range(32):          src = 'tests/files/datacite/datacite_doc_{0:02d}.json'.format(i)          dst = 'tests/files/datacite/datacite_result_{0:02d}.json'.format(i)          with open(src, 'r') as f: | 
