diff options
author | Ellen Spertus <ellen.spertus@gmail.com> | 2018-08-09 11:30:44 -0700 |
---|---|---|
committer | Ellen Spertus <ellen.spertus@gmail.com> | 2018-08-09 11:30:44 -0700 |
commit | 6d64c5d4e1527c7277527132efa858def2589486 (patch) | |
tree | 56b34fceae5e4e3e0c476dbc5c6fbf2673c60b04 /scalding/src/main | |
parent | ccfeb71ef2a25a479c083051acc0ebb7436e421b (diff) | |
download | sandcrawler-6d64c5d4e1527c7277527132efa858def2589486.tar.gz sandcrawler-6d64c5d4e1527c7277527132efa858def2589486.zip |
Added test for null argument to titleToSlug()
Diffstat (limited to 'scalding/src/main')
-rw-r--r-- | scalding/src/main/scala/sandcrawler/Scorable.scala | 13 |
1 files changed, 9 insertions, 4 deletions
diff --git a/scalding/src/main/scala/sandcrawler/Scorable.scala b/scalding/src/main/scala/sandcrawler/Scorable.scala index 736c175..ce4fdca 100644 --- a/scalding/src/main/scala/sandcrawler/Scorable.scala +++ b/scalding/src/main/scala/sandcrawler/Scorable.scala @@ -45,12 +45,17 @@ object Scorable { } def titleToSlug(title : String) : String = { - val slug = StringUtilities.removePunctuation( - StringUtilities.removeAccents(title).split(":")(0).toLowerCase()) - if (slug.isEmpty) { + if (title == null || title.isEmpty) { NoSlug } else { - slug + val unaccented = StringUtilities.removeAccents(title) + // Remove punctuation after splitting on colon. + val slug = StringUtilities.removePunctuation((unaccented.split(":")(0).toLowerCase())) + if (slug.isEmpty || slug == null) { + NoSlug + } else { + slug + } } } |