diff options
Diffstat (limited to 'scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala')
-rw-r--r-- | scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala | 13 |
1 files changed, 9 insertions, 4 deletions
diff --git a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala index 80d92aa..d742384 100644 --- a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala +++ b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala @@ -1,16 +1,21 @@ package sandcrawler +import java.io.InputStream + +import scala.io.Source + import org.scalatest._ // scalastyle:off null class ScorableFeaturesTest extends FlatSpec with Matchers { + private def titleToSlug(s : String) : String = { - new ScorableFeatures(title = s).toSlug + ScorableFeatures.create(title = s).toSlug } "toMapFeatures()" should "work with gnarly inputs" in { - new ScorableFeatures(title = null).toMapFeatures - new ScorableFeatures(title = "something", doi = null, sha1 = null, year = 123).toMapFeatures + ScorableFeatures.create(title = null).toMapFeatures + ScorableFeatures.create(title = "something", doi = null, sha1 = null, year = 123).toMapFeatures } "mapToSlug()" should "extract the parts of titles before a colon" in { @@ -44,7 +49,7 @@ class ScorableFeaturesTest extends FlatSpec with Matchers { } it should "strip special characters" in { - titleToSlug(":;!',|\"\'`.#?!-@*/\\=+~%$^{}()[]<>-_") shouldBe Scorable.NoSlug + titleToSlug(":;!',|\"\'`.#?!-@*/\\=+~%$^{}()[]<>-_’·“”‘’“”«»「」") shouldBe Scorable.NoSlug // TODO: titleToSlug("©™₨№…") shouldBe Scorable.NoSlug // TODO: titleToSlug("πµΣσ") shouldBe Scorable.NoSlug } |