diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2019-01-28 16:58:18 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-01-28 16:59:29 -0800 | 
| commit | b1bfcce57d50bf74e3797d6a676c4138f5a252be (patch) | |
| tree | e3b1a396235c787fc8ee2d0f87a5a983b0ec345b /python/fatcat_tools | |
| parent | 7dcf4f206b1fbea9081481594c8f6a88a7e847e1 (diff) | |
| download | fatcat-b1bfcce57d50bf74e3797d6a676c4138f5a252be.tar.gz fatcat-b1bfcce57d50bf74e3797d6a676c4138f5a252be.zip | |
enforce title len>1 for release imports
Diffstat (limited to 'python/fatcat_tools')
| -rw-r--r-- | python/fatcat_tools/importers/crossref.py | 3 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/grobid_metadata.py | 6 | 
2 files changed, 8 insertions, 1 deletions
| diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index fbf30a32..c163a534 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -340,6 +340,9 @@ class CrossrefImporter(EntityImporter):              original_title = clean(obj.get('original-title')[0], force_xml=True)          if obj.get('title'):              title = clean(obj.get('title')[0], force_xml=True) +        if not title or len(title) < 2: +            # title can't be just a single character +            return None          if extra_crossref:              extra['crossref'] = extra_crossref diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index bc09ec8f..84771083 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -144,8 +144,12 @@ class GrobidMetadataImporter(EntityImporter):          if not extra:              extra = None +        title = clean(obj['title'], force_xml=True) +        if not title or len(title) < 2: +            return None +          re = fatcat_client.ReleaseEntity( -            title=clean(obj['title'], force_xml=True), +            title=title,              release_type="article-journal",              release_date=release_date,              release_year=release_year, | 
