summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-01-09 15:36:09 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-01-09 15:36:09 +0100
commit6a7591103c7b7d985ad22199138af9378de697f4 (patch)
tree8834991b7d70676972f7bc4f3e347d3051a2e82d /python/fatcat_tools
parentffd2597d5e962e3f3a2ea23c66a135bb737b2390 (diff)
downloadfatcat-6a7591103c7b7d985ad22199138af9378de697f4.tar.gz
fatcat-6a7591103c7b7d985ad22199138af9378de697f4.zip
datacite: abstracts may be strings or list of strings
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/importers/datacite.py17
1 files changed, 15 insertions, 2 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 2f1e17d1..4128b3ca 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -468,11 +468,24 @@ class DataciteImporter(EntityImporter):
for desc in descs:
if not desc.get('descriptionType') == 'Abstract':
continue
- if len(desc.get('description', '') or '') < 10:
- continue
+
+ # Description maybe a string or list.
text = desc.get('description', '')
+ if not text:
+ continue
+ if isinstance(text, list):
+ try:
+ text = "\n".join(text)
+ except TypeError as err:
+ continue # Bail out, if it is not a list of strings.
+
+ # Limit length.
+ if len(text) < 10:
+ continue
if len(text) > MAX_ABSTRACT_LENGTH:
text = text[:MAX_ABSTRACT_LENGTH] + " [...]"
+
+ # Detect language.
lang = None
try:
lang = langdetect.detect(text)