From 4ca3d5088520d219eccbc5921928c5b67d8e998a Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 15 Aug 2018 20:23:12 -0700 Subject: scorable: test for more punctuation removal --- scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'scalding/src') diff --git a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala index 5ffc305..fd01c91 100644 --- a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala +++ b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala @@ -34,10 +34,18 @@ class ScorableFeaturesTest extends FlatSpec with Matchers { titleToSlug("a:b:c") shouldBe "a" titleToSlug( "If you're happy and you know it, clap your hands!") shouldBe "ifyourehappyandyouknowitclapyourhands" + titleToSlug(":;\"\'") shouldBe Scorable.NoSlug + } + + it should "strip special characters" in { + titleToSlug(":;!',|\"\'`.#?!-@*/\\=+~%$^{}()[]<>-_…") shouldBe Scorable.NoSlug + // TODO: titleToSlug("©™₨№") shouldBe Scorable.NoSlug + // TODO: titleToSlug("πµΣσ") shouldBe Scorable.NoSlug } it should "remove whitespace" in { titleToSlug("foo bar : baz ::") shouldBe "foobar" titleToSlug("\na\t:b:c") shouldBe "a" + titleToSlug("\n \t \r ") shouldBe Scorable.NoSlug } } -- cgit v1.2.3