aboutsummaryrefslogtreecommitdiffstats
path: root/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala
diff options
context:
space:
mode:
Diffstat (limited to 'scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala')
-rw-r--r--scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala13
1 files changed, 9 insertions, 4 deletions
diff --git a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala
index 80d92aa..d742384 100644
--- a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala
+++ b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala
@@ -1,16 +1,21 @@
package sandcrawler
+import java.io.InputStream
+
+import scala.io.Source
+
import org.scalatest._
// scalastyle:off null
class ScorableFeaturesTest extends FlatSpec with Matchers {
+
private def titleToSlug(s : String) : String = {
- new ScorableFeatures(title = s).toSlug
+ ScorableFeatures.create(title = s).toSlug
}
"toMapFeatures()" should "work with gnarly inputs" in {
- new ScorableFeatures(title = null).toMapFeatures
- new ScorableFeatures(title = "something", doi = null, sha1 = null, year = 123).toMapFeatures
+ ScorableFeatures.create(title = null).toMapFeatures
+ ScorableFeatures.create(title = "something", doi = null, sha1 = null, year = 123).toMapFeatures
}
"mapToSlug()" should "extract the parts of titles before a colon" in {
@@ -44,7 +49,7 @@ class ScorableFeaturesTest extends FlatSpec with Matchers {
}
it should "strip special characters" in {
- titleToSlug(":;!',|\"\'`.#?!-@*/\\=+~%$^{}()[]<>-_") shouldBe Scorable.NoSlug
+ titleToSlug(":;!',|\"\'`.#?!-@*/\\=+~%$^{}()[]<>-_’·“”‘’“”«»「」") shouldBe Scorable.NoSlug
// TODO: titleToSlug("©™₨№…") shouldBe Scorable.NoSlug
// TODO: titleToSlug("πµΣσ") shouldBe Scorable.NoSlug
}