From b1bfcce57d50bf74e3797d6a676c4138f5a252be Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 28 Jan 2019 16:58:18 -0800 Subject: enforce title len>1 for release imports --- python/fatcat_tools/importers/crossref.py | 3 +++ python/fatcat_tools/importers/grobid_metadata.py | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'python') diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index fbf30a32..c163a534 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -340,6 +340,9 @@ class CrossrefImporter(EntityImporter): original_title = clean(obj.get('original-title')[0], force_xml=True) if obj.get('title'): title = clean(obj.get('title')[0], force_xml=True) + if not title or len(title) < 2: + # title can't be just a single character + return None if extra_crossref: extra['crossref'] = extra_crossref diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index bc09ec8f..84771083 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -144,8 +144,12 @@ class GrobidMetadataImporter(EntityImporter): if not extra: extra = None + title = clean(obj['title'], force_xml=True) + if not title or len(title) < 2: + return None + re = fatcat_client.ReleaseEntity( - title=clean(obj['title'], force_xml=True), + title=title, release_type="article-journal", release_date=release_date, release_year=release_year, -- cgit v1.2.3