From 6d64c5d4e1527c7277527132efa858def2589486 Mon Sep 17 00:00:00 2001 From: Ellen Spertus Date: Thu, 9 Aug 2018 11:30:44 -0700 Subject: Added test for null argument to titleToSlug() --- scalding/src/main/scala/sandcrawler/Scorable.scala | 13 +++++++++---- scalding/src/test/scala/sandcrawler/ScorableTest.scala | 4 ++++ 2 files changed, 13 insertions(+), 4 deletions(-) (limited to 'scalding') diff --git a/scalding/src/main/scala/sandcrawler/Scorable.scala b/scalding/src/main/scala/sandcrawler/Scorable.scala index 736c175..ce4fdca 100644 --- a/scalding/src/main/scala/sandcrawler/Scorable.scala +++ b/scalding/src/main/scala/sandcrawler/Scorable.scala @@ -45,12 +45,17 @@ object Scorable { } def titleToSlug(title : String) : String = { - val slug = StringUtilities.removePunctuation( - StringUtilities.removeAccents(title).split(":")(0).toLowerCase()) - if (slug.isEmpty) { + if (title == null || title.isEmpty) { NoSlug } else { - slug + val unaccented = StringUtilities.removeAccents(title) + // Remove punctuation after splitting on colon. + val slug = StringUtilities.removePunctuation((unaccented.split(":")(0).toLowerCase())) + if (slug.isEmpty || slug == null) { + NoSlug + } else { + slug + } } } diff --git a/scalding/src/test/scala/sandcrawler/ScorableTest.scala b/scalding/src/test/scala/sandcrawler/ScorableTest.scala index 713a7e5..40801a0 100644 --- a/scalding/src/test/scala/sandcrawler/ScorableTest.scala +++ b/scalding/src/test/scala/sandcrawler/ScorableTest.scala @@ -71,6 +71,10 @@ class ScorableTest extends FlatSpec with Matchers { Scorable.titleToSlug("") shouldBe Scorable.NoSlug } + it should "return Scorable.NoSlug if given null" in { + Scorable.titleToSlug(null) shouldBe Scorable.NoSlug + } + "titleToSlug()" should "strip punctuation" in { Scorable.titleToSlug("HELLO!:the:re") shouldBe "hello" Scorable.titleToSlug("a:b:c") shouldBe "a" -- cgit v1.2.3