summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-10-01 16:56:59 +0200
committerMartin Czygan <martin.czygan@gmail.com>2021-10-01 16:56:59 +0200
commitbdc4347acbbdb9f58b7c3abc2578a488de3d0a85 (patch)
tree1b8c4ba4c23edb299fef488c346b7d2565bb9834 /python/fatcat_tools
parent519c7e77cf3a54b9620adef07fedac9b37a5f9f2 (diff)
downloadfatcat-bdc4347acbbdb9f58b7c3abc2578a488de3d0a85.tar.gz
fatcat-bdc4347acbbdb9f58b7c3abc2578a488de3d0a85.zip
datacite: skip empty abstracts
Do not add abstracts where `clean` results in the empty string - this violates a constraint: `either abstract_sha1 or content is required`
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/importers/datacite.py5
1 files changed, 4 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 703dbc27..eb49596f 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -549,10 +549,13 @@ class DataciteImporter(EntityImporter):
lang = langdetect.detect(text)
except (langdetect.lang_detect_exception.LangDetectException, TypeError) as err:
print('[{}] language detection failed with {} on {}'.format(doi, err, text), file=sys.stderr)
+ abstract_text = clean(text)
+ if not abstract_text:
+ continue
abstracts.append(
fatcat_openapi_client.ReleaseAbstract(
mimetype="text/plain",
- content=clean(text),
+ content=abstract_text,
lang=lang,
))