diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/fatcat_tools/importers/datacite.py | 17 | ||||
| -rw-r--r-- | python/tests/files/datacite/datacite_doc_27.json | 60 | ||||
| -rw-r--r-- | python/tests/files/datacite/datacite_doc_28.json | 60 | ||||
| -rw-r--r-- | python/tests/files/datacite/datacite_result_27.json | 33 | ||||
| -rw-r--r-- | python/tests/files/datacite/datacite_result_28.json | 33 | ||||
| -rw-r--r-- | python/tests/import_datacite.py | 2 | 
6 files changed, 202 insertions, 3 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 2f1e17d1..4128b3ca 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -468,11 +468,24 @@ class DataciteImporter(EntityImporter):          for desc in descs:              if not desc.get('descriptionType') == 'Abstract':                  continue -            if len(desc.get('description', '') or '') < 10: -                continue + +            # Description maybe a string or list.              text = desc.get('description', '') +            if not text: +                continue +            if isinstance(text, list): +                try: +                    text = "\n".join(text) +                except TypeError as err: +                    continue # Bail out, if it is not a list of strings. + +            # Limit length. +            if len(text) < 10: +                continue              if len(text) > MAX_ABSTRACT_LENGTH:                  text = text[:MAX_ABSTRACT_LENGTH] + " [...]" + +            # Detect language.              lang = None              try:                  lang = langdetect.detect(text) diff --git a/python/tests/files/datacite/datacite_doc_27.json b/python/tests/files/datacite/datacite_doc_27.json new file mode 100644 index 00000000..ff9c00f4 --- /dev/null +++ b/python/tests/files/datacite/datacite_doc_27.json @@ -0,0 +1,60 @@ +{ +  "attributes": { +    "contributors": [ +      { +        "affiliation": [], +        "contributorType": "Editor", +        "familyName": "Wemmer", +        "givenName": "David", +        "name": "Wemmer, David", +        "nameType": "Personal" +      } +    ], +    "creators": [ +      { +        "affiliation": [ +          "Department of pataphysics" +        ], +        "name": "Anton Welch", +        "nameIdentifiers": [] +      } +    ], +    "dates": [ +      { +        "date": "2017-08-24", +        "dateType": "Created" +      }, +      { +        "date": "2019-08-04", +        "dateType": "Updated" +      }, +      { +        "date": "2017", +        "dateType": "Issued" +      } +    ], +    "descriptions": [ +        {"description": "Hello World", "descriptionType": "Abstract"} +    ], +    "doi": "10.7916/d86x0cg1", +    "isActive": true, +    "language": "DE-CH", +    "publicationYear": 2016, +    "state": "findable", +    "titles": [ +      { +        "title": "Additional file 123: ABC" +      }, +      { +        "title": "DEF", +        "titleType": "Subtitle" +      } +    ], +    "types": { +      "bibtex": "misc", +      "citeproc": "article", +      "ris": "GEN", +      "schemaOrg": "CreativeWork" +    } +  } +} diff --git a/python/tests/files/datacite/datacite_doc_28.json b/python/tests/files/datacite/datacite_doc_28.json new file mode 100644 index 00000000..0d03ecc8 --- /dev/null +++ b/python/tests/files/datacite/datacite_doc_28.json @@ -0,0 +1,60 @@ +{ +  "attributes": { +    "contributors": [ +      { +        "affiliation": [], +        "contributorType": "Editor", +        "familyName": "Wemmer", +        "givenName": "David", +        "name": "Wemmer, David", +        "nameType": "Personal" +      } +    ], +    "creators": [ +      { +        "affiliation": [ +          "Department of pataphysics" +        ], +        "name": "Anton Welch", +        "nameIdentifiers": [] +      } +    ], +    "dates": [ +      { +        "date": "2017-08-24", +        "dateType": "Created" +      }, +      { +        "date": "2019-08-04", +        "dateType": "Updated" +      }, +      { +        "date": "2017", +        "dateType": "Issued" +      } +    ], +    "descriptions": [ +        {"description": ["Hello", "World"], "descriptionType": "Abstract"} +    ], +    "doi": "10.7916/d86x0cg1", +    "isActive": true, +    "language": "DE-CH", +    "publicationYear": 2016, +    "state": "findable", +    "titles": [ +      { +        "title": "Additional file 123: ABC" +      }, +      { +        "title": "DEF", +        "titleType": "Subtitle" +      } +    ], +    "types": { +      "bibtex": "misc", +      "citeproc": "article", +      "ris": "GEN", +      "schemaOrg": "CreativeWork" +    } +  } +} diff --git a/python/tests/files/datacite/datacite_result_27.json b/python/tests/files/datacite/datacite_result_27.json new file mode 100644 index 00000000..3d033e6a --- /dev/null +++ b/python/tests/files/datacite/datacite_result_27.json @@ -0,0 +1,33 @@ +{ +  "abstracts": [{"content": "Hello World", "lang": "en", "mimetype": "text/plain"}], +  "contribs": [ +    { +      "index": 0, +      "raw_affiliation": "Department of pataphysics", +      "raw_name": "Anton Welch", +      "role": "author" +    }, +    { +      "extra": { +        "type": "Editor" +      }, +      "given_name": "David", +      "raw_name": "David Wemmer", +      "surname": "Wemmer" +    } +  ], +  "ext_ids": { +    "doi": "10.7916/d86x0cg1" +  }, +  "extra": { +    "datacite": {}, +    "release_month": 8 +  }, +  "refs": [], +  "release_date": "2017-08-24", +  "release_stage": "published", +  "release_type": "stub", +  "release_year": 2017, +  "subtitle": "DEF", +  "title": "Additional file 123: ABC" +} diff --git a/python/tests/files/datacite/datacite_result_28.json b/python/tests/files/datacite/datacite_result_28.json new file mode 100644 index 00000000..84bed9c8 --- /dev/null +++ b/python/tests/files/datacite/datacite_result_28.json @@ -0,0 +1,33 @@ +{ +  "abstracts": [{"content": "Hello\nWorld", "lang": "en", "mimetype": "text/plain"}], +  "contribs": [ +    { +      "index": 0, +      "raw_affiliation": "Department of pataphysics", +      "raw_name": "Anton Welch", +      "role": "author" +    }, +    { +      "extra": { +        "type": "Editor" +      }, +      "given_name": "David", +      "raw_name": "David Wemmer", +      "surname": "Wemmer" +    } +  ], +  "ext_ids": { +    "doi": "10.7916/d86x0cg1" +  }, +  "extra": { +    "datacite": {}, +    "release_month": 8 +  }, +  "refs": [], +  "release_date": "2017-08-24", +  "release_stage": "published", +  "release_type": "stub", +  "release_year": 2017, +  "subtitle": "DEF", +  "title": "Additional file 123: ABC" +} diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py index 89671cd2..a7d514ea 100644 --- a/python/tests/import_datacite.py +++ b/python/tests/import_datacite.py @@ -287,7 +287,7 @@ def test_datacite_conversions(datacite_importer):      for now.      """      datacite_importer.debug = True -    for i in range(27): +    for i in range(29):          src = 'tests/files/datacite/datacite_doc_{0:02d}.json'.format(i)          dst = 'tests/files/datacite/datacite_result_{0:02d}.json'.format(i)          print('testing mapping from {} => {}'.format(src, dst))  | 
