diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-01-28 16:58:18 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-01-28 16:59:29 -0800 |
commit | b1bfcce57d50bf74e3797d6a676c4138f5a252be (patch) | |
tree | e3b1a396235c787fc8ee2d0f87a5a983b0ec345b | |
parent | 7dcf4f206b1fbea9081481594c8f6a88a7e847e1 (diff) | |
download | fatcat-b1bfcce57d50bf74e3797d6a676c4138f5a252be.tar.gz fatcat-b1bfcce57d50bf74e3797d6a676c4138f5a252be.zip |
enforce title len>1 for release imports
-rw-r--r-- | python/fatcat_tools/importers/crossref.py | 3 | ||||
-rw-r--r-- | python/fatcat_tools/importers/grobid_metadata.py | 6 |
2 files changed, 8 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index fbf30a32..c163a534 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -340,6 +340,9 @@ class CrossrefImporter(EntityImporter): original_title = clean(obj.get('original-title')[0], force_xml=True) if obj.get('title'): title = clean(obj.get('title')[0], force_xml=True) + if not title or len(title) < 2: + # title can't be just a single character + return None if extra_crossref: extra['crossref'] = extra_crossref diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index bc09ec8f..84771083 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -144,8 +144,12 @@ class GrobidMetadataImporter(EntityImporter): if not extra: extra = None + title = clean(obj['title'], force_xml=True) + if not title or len(title) < 2: + return None + re = fatcat_client.ReleaseEntity( - title=clean(obj['title'], force_xml=True), + title=title, release_type="article-journal", release_date=release_date, release_year=release_year, |