summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorbnewbold <bnewbold@archive.org>2021-10-02 00:55:56 +0000
committerbnewbold <bnewbold@archive.org>2021-10-02 00:55:56 +0000
commit491722e00548888e24fba6ec87d7fefa92e3538b (patch)
tree1b8c4ba4c23edb299fef488c346b7d2565bb9834 /python/fatcat_tools
parent519c7e77cf3a54b9620adef07fedac9b37a5f9f2 (diff)
parentbdc4347acbbdb9f58b7c3abc2578a488de3d0a85 (diff)
downloadfatcat-491722e00548888e24fba6ec87d7fefa92e3538b.tar.gz
fatcat-491722e00548888e24fba6ec87d7fefa92e3538b.zip
Merge branch 'martin-datacite-emtpy-abstract-sentry-94639' into 'master'
datacite: skip empty abstracts See merge request webgroup/fatcat!119
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/importers/datacite.py5
1 files changed, 4 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 703dbc27..eb49596f 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -549,10 +549,13 @@ class DataciteImporter(EntityImporter):
lang = langdetect.detect(text)
except (langdetect.lang_detect_exception.LangDetectException, TypeError) as err:
print('[{}] language detection failed with {} on {}'.format(doi, err, text), file=sys.stderr)
+ abstract_text = clean(text)
+ if not abstract_text:
+ continue
abstracts.append(
fatcat_openapi_client.ReleaseAbstract(
mimetype="text/plain",
- content=clean(text),
+ content=abstract_text,
lang=lang,
))