diff options
author | bnewbold <bnewbold@archive.org> | 2021-10-02 00:55:56 +0000 |
---|---|---|
committer | bnewbold <bnewbold@archive.org> | 2021-10-02 00:55:56 +0000 |
commit | 491722e00548888e24fba6ec87d7fefa92e3538b (patch) | |
tree | 1b8c4ba4c23edb299fef488c346b7d2565bb9834 /python/fatcat_tools/importers | |
parent | 519c7e77cf3a54b9620adef07fedac9b37a5f9f2 (diff) | |
parent | bdc4347acbbdb9f58b7c3abc2578a488de3d0a85 (diff) | |
download | fatcat-491722e00548888e24fba6ec87d7fefa92e3538b.tar.gz fatcat-491722e00548888e24fba6ec87d7fefa92e3538b.zip |
Merge branch 'martin-datacite-emtpy-abstract-sentry-94639' into 'master'
datacite: skip empty abstracts
See merge request webgroup/fatcat!119
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r-- | python/fatcat_tools/importers/datacite.py | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 703dbc27..eb49596f 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -549,10 +549,13 @@ class DataciteImporter(EntityImporter): lang = langdetect.detect(text) except (langdetect.lang_detect_exception.LangDetectException, TypeError) as err: print('[{}] language detection failed with {} on {}'.format(doi, err, text), file=sys.stderr) + abstract_text = clean(text) + if not abstract_text: + continue abstracts.append( fatcat_openapi_client.ReleaseAbstract( mimetype="text/plain", - content=clean(text), + content=abstract_text, lang=lang, )) |