aboutsummaryrefslogtreecommitdiffstats
path: root/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala
diff options
context:
space:
mode:
Diffstat (limited to 'scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala')
-rw-r--r--scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala6
1 files changed, 6 insertions, 0 deletions
diff --git a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala
index fd01c91..0acf0b8 100644
--- a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala
+++ b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala
@@ -37,6 +37,12 @@ class ScorableFeaturesTest extends FlatSpec with Matchers {
titleToSlug(":;\"\'") shouldBe Scorable.NoSlug
}
+ it should "filter stub titles" in {
+ titleToSlug("abstract") shouldBe Scorable.NoSlug
+ titleToSlug("title!") shouldBe Scorable.NoSlug
+ titleToSlug("a real title which is not on blacklist") shouldBe "arealtitlewhichisnotonblacklist"
+ }
+
it should "strip special characters" in {
titleToSlug(":;!',|\"\'`.#?!-@*/\\=+~%$^{}()[]<>-_…") shouldBe Scorable.NoSlug
// TODO: titleToSlug("©™₨№") shouldBe Scorable.NoSlug