From bdc4347acbbdb9f58b7c3abc2578a488de3d0a85 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Fri, 1 Oct 2021 16:56:59 +0200 Subject: datacite: skip empty abstracts Do not add abstracts where `clean` results in the empty string - this violates a constraint: `either abstract_sha1 or content is required` --- python/fatcat_tools/importers/datacite.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'python/fatcat_tools') diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 703dbc27..eb49596f 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -549,10 +549,13 @@ class DataciteImporter(EntityImporter): lang = langdetect.detect(text) except (langdetect.lang_detect_exception.LangDetectException, TypeError) as err: print('[{}] language detection failed with {} on {}'.format(doi, err, text), file=sys.stderr) + abstract_text = clean(text) + if not abstract_text: + continue abstracts.append( fatcat_openapi_client.ReleaseAbstract( mimetype="text/plain", - content=clean(text), + content=abstract_text, lang=lang, )) -- cgit v1.2.3