summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/datacite.py
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2019-12-26 17:25:09 +0100
committerMartin Czygan <martin.czygan@gmail.com>2019-12-28 23:07:32 +0100
commit097fa7660c60e6c52ac2adbdd82fe64c122b1e42 (patch)
tree586ddf2fe3ef027cb9adca6122454baf79c5d42e /python/fatcat_tools/importers/datacite.py
parenta57919b05d8b1f24041713e85b7fa4322c0591c6 (diff)
downloadfatcat-097fa7660c60e6c52ac2adbdd82fe64c122b1e42.tar.gz
fatcat-097fa7660c60e6c52ac2adbdd82fe64c122b1e42.zip
datacite: limit abstract length
Diffstat (limited to 'python/fatcat_tools/importers/datacite.py')
-rw-r--r--python/fatcat_tools/importers/datacite.py6
1 files changed, 6 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 26520164..66f812e2 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -16,6 +16,10 @@ import sqlite3
import sys
from fatcat_tools.transforms import entity_to_dict
+
+# Cutoff length for abstracts.
+MAX_ABSTRACT_LENGTH = 2048
+
# https://guide.fatcat.wiki/entity_container.html#container_type-vocabulary
CONTAINER_TYPE_MAP = {
'Journal': 'journal',
@@ -450,6 +454,8 @@ class DataciteImporter(EntityImporter):
if len(desc.get('description', '')) < 10:
continue
text = desc.get('description')
+ if len(text) > MAX_ABSTRACT_LENGTH:
+ text = text[:MAX_ABSTRACT_LENGTH] + " [...]"
lang = None
if self.lang_detect:
try: