aboutsummaryrefslogtreecommitdiffstats
path: root/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala
diff options
context:
space:
mode:
Diffstat (limited to 'scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala')
-rw-r--r--scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala45
1 files changed, 21 insertions, 24 deletions
diff --git a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala
index 450c169..3f6b87c 100644
--- a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala
+++ b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala
@@ -8,60 +8,57 @@ import org.scalatest._
// scalastyle:off null
class ScorableFeaturesTest extends FlatSpec with Matchers {
-
- private def titleToSlug(s : String) : String = {
- ScorableFeatures.create(title = s).toSlug
- }
-
"toMapFeatures()" should "work with gnarly inputs" in {
ScorableFeatures.create(title = null).toMapFeatures
ScorableFeatures.create(title = "something", doi = null, sha1 = null, year = 123).toMapFeatures
}
+ private def titleToSlug(s : String) : Option[String] = ScorableFeatures.create(title = s).toSlug
+
"mapToSlug()" should "extract the parts of titles before a colon" in {
- titleToSlug("HELLO:there") shouldBe "hellothere"
+ titleToSlug("HELLO:there") shouldBe (Some("hellothere"))
}
it should "extract an entire colon-less string" in {
- titleToSlug("hello THERE") shouldBe "hellothere"
+ titleToSlug("hello THERE") shouldBe (Some("hellothere"))
}
it should "return Scorable.NoSlug if given empty string" in {
- titleToSlug("") shouldBe Scorable.NoSlug
+ titleToSlug("") shouldBe (None)
}
it should "return Scorable.NoSlug if given null" in {
- titleToSlug(null) shouldBe Scorable.NoSlug
+ titleToSlug(null) shouldBe (None)
}
it should "strip punctuation" in {
- titleToSlug("HELLO!:the:re") shouldBe "hellothere"
- titleToSlug("a:b:cdefgh") shouldBe "abcdefgh"
+ titleToSlug("HELLO!:the:re") shouldBe Some("hellothere")
+ titleToSlug("a:b:cdefgh") shouldBe Some("abcdefgh")
titleToSlug(
- "If you're happy and you know it, clap your hands!") shouldBe "ifyourehappyandyouknowitclapyourhands"
- titleToSlug(":;\"\'") shouldBe Scorable.NoSlug
+ "If you're happy and you know it, clap your hands!") shouldBe Some("ifyourehappyandyouknowitclapyourhands")
+ titleToSlug(":;\"\'") shouldBe (None)
}
it should "filter stub titles" in {
- titleToSlug("abstract") shouldBe Scorable.NoSlug
- titleToSlug("title!") shouldBe Scorable.NoSlug
- titleToSlug("a real title which is not on blacklist") shouldBe "arealtitlewhichisnotonblacklist"
+ titleToSlug("abstract") shouldBe (None)
+ titleToSlug("title!") shouldBe (None)
+ titleToSlug("a real title which is not on blacklist") shouldBe Some("arealtitlewhichisnotonblacklist")
}
it should "strip special characters" in {
- titleToSlug(":;!',|\"\'`.#?!-@*/\\=+~%$^{}()[]<>-_’·“”‘’“”«»「」¿–±§ʿ") shouldBe Scorable.NoSlug
- // TODO: titleToSlug("©™₨№…") shouldBe Scorable.NoSlug
- // TODO: titleToSlug("πµΣσ") shouldBe Scorable.NoSlug
+ titleToSlug(":;!',|\"\'`.#?!-@*/\\=+~%$^{}()[]<>-_’·“”‘’“”«»「」¿–±§ʿ") shouldBe (None)
+ // TODO: titleToSlug("©™₨№…") shouldBe (None)
+ // TODO: titleToSlug("πµΣσ") shouldBe (None)
}
it should "remove whitespace" in {
- titleToSlug("foo bar : baz ::") shouldBe "foobarbaz"
- titleToSlug("\na\t:b:cdefghi") shouldBe "abcdefghi"
- titleToSlug("\n \t \r ") shouldBe Scorable.NoSlug
+ titleToSlug("foo bar : baz ::") shouldBe Some("foobarbaz")
+ titleToSlug("\na\t:b:cdefghi") shouldBe Some("abcdefghi")
+ titleToSlug("\n \t \r ") shouldBe (None)
}
it should "skip very short slugs" in {
- titleToSlug("short") shouldBe Scorable.NoSlug
- titleToSlug("a longer, more in depth title") shouldBe "alongermoreindepthtitle"
+ titleToSlug("short") shouldBe (None)
+ titleToSlug("a longer, more in depth title") shouldBe Some("alongermoreindepthtitle")
}
}