aboutsummaryrefslogtreecommitdiffstats
path: root/scalding
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-08-15 20:23:12 -0700
committerBryan Newbold <bnewbold@archive.org>2018-08-15 20:23:12 -0700
commit4ca3d5088520d219eccbc5921928c5b67d8e998a (patch)
treeff783658cd20b7e59a7e21f164a4acb8525f8b9e /scalding
parenta3bf1d47fac53b818a8118020adced6c54be7cba (diff)
downloadsandcrawler-4ca3d5088520d219eccbc5921928c5b67d8e998a.tar.gz
sandcrawler-4ca3d5088520d219eccbc5921928c5b67d8e998a.zip
scorable: test for more punctuation removal
Diffstat (limited to 'scalding')
-rw-r--r--scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala8
1 files changed, 8 insertions, 0 deletions
diff --git a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala
index 5ffc305..fd01c91 100644
--- a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala
+++ b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala
@@ -34,10 +34,18 @@ class ScorableFeaturesTest extends FlatSpec with Matchers {
titleToSlug("a:b:c") shouldBe "a"
titleToSlug(
"If you're happy and you know it, clap your hands!") shouldBe "ifyourehappyandyouknowitclapyourhands"
+ titleToSlug(":;\"\'") shouldBe Scorable.NoSlug
+ }
+
+ it should "strip special characters" in {
+ titleToSlug(":;!',|\"\'`.#?!-@*/\\=+~%$^{}()[]<>-_…") shouldBe Scorable.NoSlug
+ // TODO: titleToSlug("©™₨№") shouldBe Scorable.NoSlug
+ // TODO: titleToSlug("πµΣσ") shouldBe Scorable.NoSlug
}
it should "remove whitespace" in {
titleToSlug("foo bar : baz ::") shouldBe "foobar"
titleToSlug("\na\t:b:c") shouldBe "a"
+ titleToSlug("\n \t \r ") shouldBe Scorable.NoSlug
}
}