From 6a7591103c7b7d985ad22199138af9378de697f4 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Thu, 9 Jan 2020 15:36:09 +0100 Subject: datacite: abstracts may be strings or list of strings --- python/fatcat_tools/importers/datacite.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'python/fatcat_tools/importers') diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 2f1e17d1..4128b3ca 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -468,11 +468,24 @@ class DataciteImporter(EntityImporter): for desc in descs: if not desc.get('descriptionType') == 'Abstract': continue - if len(desc.get('description', '') or '') < 10: - continue + + # Description maybe a string or list. text = desc.get('description', '') + if not text: + continue + if isinstance(text, list): + try: + text = "\n".join(text) + except TypeError as err: + continue # Bail out, if it is not a list of strings. + + # Limit length. + if len(text) < 10: + continue if len(text) > MAX_ABSTRACT_LENGTH: text = text[:MAX_ABSTRACT_LENGTH] + " [...]" + + # Detect language. lang = None try: lang = langdetect.detect(text) -- cgit v1.2.3