diff options
Diffstat (limited to 'fatcat_scholar')
-rw-r--r-- | fatcat_scholar/schema.py | 4 |
1 files changed, 4 insertions, 0 deletions
diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py index 0494ed7..8b09ab3 100644 --- a/fatcat_scholar/schema.py +++ b/fatcat_scholar/schema.py @@ -239,6 +239,10 @@ def scrub_text(raw: str, mimetype: str = None) -> Optional[str]: text = text[len(prefix) :] break + # single word? not "text". eg, random URLs + if len(text.split()) <= 1: + return None + if not text: return None return text |