diff options
author | Bryan Newbold <bnewbold@archive.org> | 2018-08-15 20:23:12 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2018-08-15 20:23:12 -0700 |
commit | 4ca3d5088520d219eccbc5921928c5b67d8e998a (patch) | |
tree | ff783658cd20b7e59a7e21f164a4acb8525f8b9e /scalding/src/test/scala | |
parent | a3bf1d47fac53b818a8118020adced6c54be7cba (diff) | |
download | sandcrawler-4ca3d5088520d219eccbc5921928c5b67d8e998a.tar.gz sandcrawler-4ca3d5088520d219eccbc5921928c5b67d8e998a.zip |
scorable: test for more punctuation removal
Diffstat (limited to 'scalding/src/test/scala')
-rw-r--r-- | scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala | 8 |
1 files changed, 8 insertions, 0 deletions
diff --git a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala index 5ffc305..fd01c91 100644 --- a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala +++ b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala @@ -34,10 +34,18 @@ class ScorableFeaturesTest extends FlatSpec with Matchers { titleToSlug("a:b:c") shouldBe "a" titleToSlug( "If you're happy and you know it, clap your hands!") shouldBe "ifyourehappyandyouknowitclapyourhands" + titleToSlug(":;\"\'") shouldBe Scorable.NoSlug + } + + it should "strip special characters" in { + titleToSlug(":;!',|\"\'`.#?!-@*/\\=+~%$^{}()[]<>-_…") shouldBe Scorable.NoSlug + // TODO: titleToSlug("©™₨№") shouldBe Scorable.NoSlug + // TODO: titleToSlug("πµΣσ") shouldBe Scorable.NoSlug } it should "remove whitespace" in { titleToSlug("foo bar : baz ::") shouldBe "foobar" titleToSlug("\na\t:b:c") shouldBe "a" + titleToSlug("\n \t \r ") shouldBe Scorable.NoSlug } } |