diff options
| author | Martin Czygan <martin.czygan@gmail.com> | 2020-01-09 15:36:09 +0100 | 
|---|---|---|
| committer | Martin Czygan <martin.czygan@gmail.com> | 2020-01-09 15:36:09 +0100 | 
| commit | 6a7591103c7b7d985ad22199138af9378de697f4 (patch) | |
| tree | 8834991b7d70676972f7bc4f3e347d3051a2e82d /python/fatcat_tools | |
| parent | ffd2597d5e962e3f3a2ea23c66a135bb737b2390 (diff) | |
| download | fatcat-6a7591103c7b7d985ad22199138af9378de697f4.tar.gz fatcat-6a7591103c7b7d985ad22199138af9378de697f4.zip | |
datacite: abstracts may be strings or list of strings
Diffstat (limited to 'python/fatcat_tools')
| -rw-r--r-- | python/fatcat_tools/importers/datacite.py | 17 | 
1 files changed, 15 insertions, 2 deletions
| diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 2f1e17d1..4128b3ca 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -468,11 +468,24 @@ class DataciteImporter(EntityImporter):          for desc in descs:              if not desc.get('descriptionType') == 'Abstract':                  continue -            if len(desc.get('description', '') or '') < 10: -                continue + +            # Description maybe a string or list.              text = desc.get('description', '') +            if not text: +                continue +            if isinstance(text, list): +                try: +                    text = "\n".join(text) +                except TypeError as err: +                    continue # Bail out, if it is not a list of strings. + +            # Limit length. +            if len(text) < 10: +                continue              if len(text) > MAX_ABSTRACT_LENGTH:                  text = text[:MAX_ABSTRACT_LENGTH] + " [...]" + +            # Detect language.              lang = None              try:                  lang = langdetect.detect(text) | 
