aboutsummaryrefslogtreecommitdiffstats
path: root/scalding/src
diff options
context:
space:
mode:
authorEllen Spertus <ellen.spertus@gmail.com>2018-08-09 11:30:44 -0700
committerEllen Spertus <ellen.spertus@gmail.com>2018-08-09 11:30:44 -0700
commit6d64c5d4e1527c7277527132efa858def2589486 (patch)
tree56b34fceae5e4e3e0c476dbc5c6fbf2673c60b04 /scalding/src
parentccfeb71ef2a25a479c083051acc0ebb7436e421b (diff)
downloadsandcrawler-6d64c5d4e1527c7277527132efa858def2589486.tar.gz
sandcrawler-6d64c5d4e1527c7277527132efa858def2589486.zip
Added test for null argument to titleToSlug()
Diffstat (limited to 'scalding/src')
-rw-r--r--scalding/src/main/scala/sandcrawler/Scorable.scala13
-rw-r--r--scalding/src/test/scala/sandcrawler/ScorableTest.scala4
2 files changed, 13 insertions, 4 deletions
diff --git a/scalding/src/main/scala/sandcrawler/Scorable.scala b/scalding/src/main/scala/sandcrawler/Scorable.scala
index 736c175..ce4fdca 100644
--- a/scalding/src/main/scala/sandcrawler/Scorable.scala
+++ b/scalding/src/main/scala/sandcrawler/Scorable.scala
@@ -45,12 +45,17 @@ object Scorable {
}
def titleToSlug(title : String) : String = {
- val slug = StringUtilities.removePunctuation(
- StringUtilities.removeAccents(title).split(":")(0).toLowerCase())
- if (slug.isEmpty) {
+ if (title == null || title.isEmpty) {
NoSlug
} else {
- slug
+ val unaccented = StringUtilities.removeAccents(title)
+ // Remove punctuation after splitting on colon.
+ val slug = StringUtilities.removePunctuation((unaccented.split(":")(0).toLowerCase()))
+ if (slug.isEmpty || slug == null) {
+ NoSlug
+ } else {
+ slug
+ }
}
}
diff --git a/scalding/src/test/scala/sandcrawler/ScorableTest.scala b/scalding/src/test/scala/sandcrawler/ScorableTest.scala
index 713a7e5..40801a0 100644
--- a/scalding/src/test/scala/sandcrawler/ScorableTest.scala
+++ b/scalding/src/test/scala/sandcrawler/ScorableTest.scala
@@ -71,6 +71,10 @@ class ScorableTest extends FlatSpec with Matchers {
Scorable.titleToSlug("") shouldBe Scorable.NoSlug
}
+ it should "return Scorable.NoSlug if given null" in {
+ Scorable.titleToSlug(null) shouldBe Scorable.NoSlug
+ }
+
"titleToSlug()" should "strip punctuation" in {
Scorable.titleToSlug("HELLO!:the:re") shouldBe "hello"
Scorable.titleToSlug("a:b:c") shouldBe "a"