aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-01-28 16:58:18 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-01-28 16:59:29 -0800
commitb1bfcce57d50bf74e3797d6a676c4138f5a252be (patch)
treee3b1a396235c787fc8ee2d0f87a5a983b0ec345b /python
parent7dcf4f206b1fbea9081481594c8f6a88a7e847e1 (diff)
downloadfatcat-b1bfcce57d50bf74e3797d6a676c4138f5a252be.tar.gz
fatcat-b1bfcce57d50bf74e3797d6a676c4138f5a252be.zip
enforce title len>1 for release imports
Diffstat (limited to 'python')
-rw-r--r--python/fatcat_tools/importers/crossref.py3
-rw-r--r--python/fatcat_tools/importers/grobid_metadata.py6
2 files changed, 8 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py
index fbf30a32..c163a534 100644
--- a/python/fatcat_tools/importers/crossref.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -340,6 +340,9 @@ class CrossrefImporter(EntityImporter):
original_title = clean(obj.get('original-title')[0], force_xml=True)
if obj.get('title'):
title = clean(obj.get('title')[0], force_xml=True)
+ if not title or len(title) < 2:
+ # title can't be just a single character
+ return None
if extra_crossref:
extra['crossref'] = extra_crossref
diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py
index bc09ec8f..84771083 100644
--- a/python/fatcat_tools/importers/grobid_metadata.py
+++ b/python/fatcat_tools/importers/grobid_metadata.py
@@ -144,8 +144,12 @@ class GrobidMetadataImporter(EntityImporter):
if not extra:
extra = None
+ title = clean(obj['title'], force_xml=True)
+ if not title or len(title) < 2:
+ return None
+
re = fatcat_client.ReleaseEntity(
- title=clean(obj['title'], force_xml=True),
+ title=title,
release_type="article-journal",
release_date=release_date,
release_year=release_year,