From f73796fe22d96e1d5ad559ffcb8dfe8fc10b3c20 Mon Sep 17 00:00:00 2001 From: Ellen Spertus Date: Mon, 20 Aug 2018 15:16:43 -0700 Subject: Reads blacklist from file. --- .../scala/sandcrawler/ScorableFeaturesTest.scala | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala') diff --git a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala index 80d92aa..a9a90ec 100644 --- a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala +++ b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala @@ -1,9 +1,30 @@ package sandcrawler +import java.io.InputStream + +import scala.io.Source + import org.scalatest._ // scalastyle:off null class ScorableFeaturesTest extends FlatSpec with Matchers { + + // TODO: Remove this when we're convinced that our file-reading code + // works. (I'm already convinced. --Ellen) + "read slugs" should "work" in { + val SlugBlacklist = Set( "abbreviations", "abstract", "acknowledgements", + "article", "authorreply", "authorsreply", "bookreview", "bookreviews", + "casereport", "commentary", "commentaryon", "commenton", "commentto", + "contents", "correspondence", "dedication", "editorialadvisoryboard", + "focus", "hypothesis", "inbrief", "introduction", "introductiontotheissue", + "lettertotheeditor", "listofabbreviations", "note", "overview", "preface", + "references", "results", "review", "reviewarticle", "summary", "title", + "name") + + ScorableFeatures.SlugBlacklist.size shouldBe SlugBlacklist.size + for (s <- ScorableFeatures.SlugBlacklist) SlugBlacklist should contain (s) + } + private def titleToSlug(s : String) : String = { new ScorableFeatures(title = s).toSlug } -- cgit v1.2.3