aboutsummaryrefslogtreecommitdiffstats
path: root/scalding/src/test
diff options
context:
space:
mode:
authorEllen Spertus <ellen.spertus@gmail.com>2018-08-20 15:16:43 -0700
committerEllen Spertus <ellen.spertus@gmail.com>2018-08-20 15:16:43 -0700
commitf73796fe22d96e1d5ad559ffcb8dfe8fc10b3c20 (patch)
tree454c1becaba40ec9ea6d5c8a349dee050d3ed03d /scalding/src/test
parentaf0fa6edf3c21ac38a8ab4e0fb425e5471e6c3b6 (diff)
downloadsandcrawler-f73796fe22d96e1d5ad559ffcb8dfe8fc10b3c20.tar.gz
sandcrawler-f73796fe22d96e1d5ad559ffcb8dfe8fc10b3c20.zip
Reads blacklist from file.
Diffstat (limited to 'scalding/src/test')
-rw-r--r--scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala21
-rw-r--r--scalding/src/test/scala/sandcrawler/ScoreJobTest.scala1
2 files changed, 22 insertions, 0 deletions
diff --git a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala
index 80d92aa..a9a90ec 100644
--- a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala
+++ b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala
@@ -1,9 +1,30 @@
package sandcrawler
+import java.io.InputStream
+
+import scala.io.Source
+
import org.scalatest._
// scalastyle:off null
class ScorableFeaturesTest extends FlatSpec with Matchers {
+
+ // TODO: Remove this when we're convinced that our file-reading code
+ // works. (I'm already convinced. --Ellen)
+ "read slugs" should "work" in {
+ val SlugBlacklist = Set( "abbreviations", "abstract", "acknowledgements",
+ "article", "authorreply", "authorsreply", "bookreview", "bookreviews",
+ "casereport", "commentary", "commentaryon", "commenton", "commentto",
+ "contents", "correspondence", "dedication", "editorialadvisoryboard",
+ "focus", "hypothesis", "inbrief", "introduction", "introductiontotheissue",
+ "lettertotheeditor", "listofabbreviations", "note", "overview", "preface",
+ "references", "results", "review", "reviewarticle", "summary", "title",
+ "name")
+
+ ScorableFeatures.SlugBlacklist.size shouldBe SlugBlacklist.size
+ for (s <- ScorableFeatures.SlugBlacklist) SlugBlacklist should contain (s)
+ }
+
private def titleToSlug(s : String) : String = {
new ScorableFeatures(title = s).toSlug
}
diff --git a/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala b/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala
index f92ba31..5516869 100644
--- a/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala
+++ b/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala
@@ -162,6 +162,7 @@ class ScoreJobTest extends FlatSpec with Matchers {
.map { l => new Tuple(l.map(s => {new ImmutableBytesWritable(s)}):_*) }
// Add example of lines without GROBID data
+ // scalastyle:off null
val SampleData = SampleDataHead :+ new Tuple(
new ImmutableBytesWritable(Bytes.toBytes("sha1:35985C3YNNEGH5WAG5ZAA88888888888")), null, null)