diff options
| author | Bryan Newbold <bnewbold@archive.org> | 2018-08-15 20:23:12 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@archive.org> | 2018-08-15 20:23:12 -0700 | 
| commit | 4ca3d5088520d219eccbc5921928c5b67d8e998a (patch) | |
| tree | ff783658cd20b7e59a7e21f164a4acb8525f8b9e /scalding/src/test | |
| parent | a3bf1d47fac53b818a8118020adced6c54be7cba (diff) | |
| download | sandcrawler-4ca3d5088520d219eccbc5921928c5b67d8e998a.tar.gz sandcrawler-4ca3d5088520d219eccbc5921928c5b67d8e998a.zip | |
scorable: test for more punctuation removal
Diffstat (limited to 'scalding/src/test')
| -rw-r--r-- | scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala | 8 | 
1 files changed, 8 insertions, 0 deletions
| diff --git a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala index 5ffc305..fd01c91 100644 --- a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala +++ b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala @@ -34,10 +34,18 @@ class ScorableFeaturesTest extends FlatSpec with Matchers {      titleToSlug("a:b:c") shouldBe "a"      titleToSlug(        "If you're happy and you know it, clap your hands!") shouldBe "ifyourehappyandyouknowitclapyourhands" +    titleToSlug(":;\"\'") shouldBe Scorable.NoSlug +  } + +  it should "strip special characters" in { +    titleToSlug(":;!',|\"\'`.#?!-@*/\\=+~%$^{}()[]<>-_…") shouldBe Scorable.NoSlug +    // TODO: titleToSlug("©™₨№") shouldBe Scorable.NoSlug +    // TODO: titleToSlug("πµΣσ") shouldBe Scorable.NoSlug    }    it should "remove whitespace" in {      titleToSlug("foo bar : baz ::") shouldBe "foobar"      titleToSlug("\na\t:b:c") shouldBe "a" +    titleToSlug("\n \t \r  ") shouldBe Scorable.NoSlug    }  } | 
