diff options
| author | Ellen Spertus <ellen.spertus@gmail.com> | 2018-08-09 11:30:44 -0700 | 
|---|---|---|
| committer | Ellen Spertus <ellen.spertus@gmail.com> | 2018-08-09 11:30:44 -0700 | 
| commit | 6d64c5d4e1527c7277527132efa858def2589486 (patch) | |
| tree | 56b34fceae5e4e3e0c476dbc5c6fbf2673c60b04 /scalding/src | |
| parent | ccfeb71ef2a25a479c083051acc0ebb7436e421b (diff) | |
| download | sandcrawler-6d64c5d4e1527c7277527132efa858def2589486.tar.gz sandcrawler-6d64c5d4e1527c7277527132efa858def2589486.zip | |
Added test for null argument to titleToSlug()
Diffstat (limited to 'scalding/src')
| -rw-r--r-- | scalding/src/main/scala/sandcrawler/Scorable.scala | 13 | ||||
| -rw-r--r-- | scalding/src/test/scala/sandcrawler/ScorableTest.scala | 4 | 
2 files changed, 13 insertions, 4 deletions
| diff --git a/scalding/src/main/scala/sandcrawler/Scorable.scala b/scalding/src/main/scala/sandcrawler/Scorable.scala index 736c175..ce4fdca 100644 --- a/scalding/src/main/scala/sandcrawler/Scorable.scala +++ b/scalding/src/main/scala/sandcrawler/Scorable.scala @@ -45,12 +45,17 @@ object Scorable {    }    def titleToSlug(title : String) : String = { -    val slug = StringUtilities.removePunctuation( -      StringUtilities.removeAccents(title).split(":")(0).toLowerCase()) -    if (slug.isEmpty) { +    if (title == null || title.isEmpty) {        NoSlug      } else { -      slug +      val unaccented = StringUtilities.removeAccents(title) +      // Remove punctuation after splitting on colon. +      val slug = StringUtilities.removePunctuation((unaccented.split(":")(0).toLowerCase())) +      if (slug.isEmpty || slug == null) { +        NoSlug +      } else { +        slug +      }      }    } diff --git a/scalding/src/test/scala/sandcrawler/ScorableTest.scala b/scalding/src/test/scala/sandcrawler/ScorableTest.scala index 713a7e5..40801a0 100644 --- a/scalding/src/test/scala/sandcrawler/ScorableTest.scala +++ b/scalding/src/test/scala/sandcrawler/ScorableTest.scala @@ -71,6 +71,10 @@ class ScorableTest extends FlatSpec with Matchers {        Scorable.titleToSlug("") shouldBe Scorable.NoSlug      } +    it should "return Scorable.NoSlug if given null" in { +      Scorable.titleToSlug(null) shouldBe Scorable.NoSlug +    } +      "titleToSlug()" should "strip punctuation" in {        Scorable.titleToSlug("HELLO!:the:re") shouldBe "hello"        Scorable.titleToSlug("a:b:c") shouldBe "a" | 
