aboutsummaryrefslogtreecommitdiffstats
path: root/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-08-15 22:43:33 -0700
committerBryan Newbold <bnewbold@archive.org>2018-08-15 22:43:33 -0700
commit96ea0ddd06ee4a7c11c7d5def976749ab3675878 (patch)
tree279382cc39355475c8a93f5ca3efcfb05b26fa57 /scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala
parent2277c2f793a007fa3a347af23fca35f4a3eafeef (diff)
downloadsandcrawler-96ea0ddd06ee4a7c11c7d5def976749ab3675878.tar.gz
sandcrawler-96ea0ddd06ee4a7c11c7d5def976749ab3675878.zip
change slugification behavior to not split on colon
Diffstat (limited to 'scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala')
-rw-r--r--scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala14
1 files changed, 7 insertions, 7 deletions
diff --git a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala
index 0acf0b8..80d92aa 100644
--- a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala
+++ b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala
@@ -14,7 +14,7 @@ class ScorableFeaturesTest extends FlatSpec with Matchers {
}
"mapToSlug()" should "extract the parts of titles before a colon" in {
- titleToSlug("HELLO:there") shouldBe "hello"
+ titleToSlug("HELLO:there") shouldBe "hellothere"
}
it should "extract an entire colon-less string" in {
@@ -30,8 +30,8 @@ class ScorableFeaturesTest extends FlatSpec with Matchers {
}
it should "strip punctuation" in {
- titleToSlug("HELLO!:the:re") shouldBe "hello"
- titleToSlug("a:b:c") shouldBe "a"
+ titleToSlug("HELLO!:the:re") shouldBe "hellothere"
+ titleToSlug("a:b:c") shouldBe "abc"
titleToSlug(
"If you're happy and you know it, clap your hands!") shouldBe "ifyourehappyandyouknowitclapyourhands"
titleToSlug(":;\"\'") shouldBe Scorable.NoSlug
@@ -44,14 +44,14 @@ class ScorableFeaturesTest extends FlatSpec with Matchers {
}
it should "strip special characters" in {
- titleToSlug(":;!',|\"\'`.#?!-@*/\\=+~%$^{}()[]<>-_…") shouldBe Scorable.NoSlug
- // TODO: titleToSlug("©™₨№") shouldBe Scorable.NoSlug
+ titleToSlug(":;!',|\"\'`.#?!-@*/\\=+~%$^{}()[]<>-_") shouldBe Scorable.NoSlug
+ // TODO: titleToSlug("©™₨№…") shouldBe Scorable.NoSlug
// TODO: titleToSlug("πµΣσ") shouldBe Scorable.NoSlug
}
it should "remove whitespace" in {
- titleToSlug("foo bar : baz ::") shouldBe "foobar"
- titleToSlug("\na\t:b:c") shouldBe "a"
+ titleToSlug("foo bar : baz ::") shouldBe "foobarbaz"
+ titleToSlug("\na\t:b:c") shouldBe "abc"
titleToSlug("\n \t \r ") shouldBe Scorable.NoSlug
}
}