diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2021-10-01 16:56:59 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2021-10-01 16:56:59 +0200 |
commit | bdc4347acbbdb9f58b7c3abc2578a488de3d0a85 (patch) | |
tree | 1b8c4ba4c23edb299fef488c346b7d2565bb9834 /python/fatcat_tools | |
parent | 519c7e77cf3a54b9620adef07fedac9b37a5f9f2 (diff) | |
download | fatcat-bdc4347acbbdb9f58b7c3abc2578a488de3d0a85.tar.gz fatcat-bdc4347acbbdb9f58b7c3abc2578a488de3d0a85.zip |
datacite: skip empty abstracts
Do not add abstracts where `clean` results in the empty string - this
violates a constraint: `either abstract_sha1 or content is required`
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r-- | python/fatcat_tools/importers/datacite.py | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 703dbc27..eb49596f 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -549,10 +549,13 @@ class DataciteImporter(EntityImporter): lang = langdetect.detect(text) except (langdetect.lang_detect_exception.LangDetectException, TypeError) as err: print('[{}] language detection failed with {} on {}'.format(doi, err, text), file=sys.stderr) + abstract_text = clean(text) + if not abstract_text: + continue abstracts.append( fatcat_openapi_client.ReleaseAbstract( mimetype="text/plain", - content=clean(text), + content=abstract_text, lang=lang, )) |