diff options
Diffstat (limited to 'scalding/src/test')
3 files changed, 16 insertions, 14 deletions
diff --git a/scalding/src/test/scala/sandcrawler/HBaseStatusCountTest.scala b/scalding/src/test/scala/sandcrawler/HBaseStatusCountTest.scala index 0da0b9c..3291670 100644 --- a/scalding/src/test/scala/sandcrawler/HBaseStatusCountTest.scala +++ b/scalding/src/test/scala/sandcrawler/HBaseStatusCountTest.scala @@ -29,17 +29,17 @@ class HBaseStatusCountTest extends FunSpec with TupleConversions { val statusType1Bytes = Bytes.toBytes(statusType1) val statusType2Bytes = Bytes.toBytes(statusType2) - val sampleData : List[List[Array[Byte]]] = List( - // TODO(bnewbold): now to express a null (empty value) in this list? - List(Bytes.toBytes("sha1:K2DKSSVTXWPRMFDTWSTCQW3RVWRIOV3Q"), statusType1Bytes), - List(Bytes.toBytes("sha1:C3YNNEGH5WAG5ZAAXWAEBNXJWT6CZ3WU"), statusType1Bytes), - List(Bytes.toBytes("sha1:SDKUVHC3YNNEGH5WAG5ZAAXWAEBNX4WT"), statusType2Bytes), - List(Bytes.toBytes("sha1:35985C3YNNEGH5WAG5ZAAXWAEBNXJW56"), statusType2Bytes), - List(Bytes.toBytes("sha1:885C3YNNEGH5WAG5ZAAXWA8BNXJWT6CZ"), statusType2Bytes), - List(Bytes.toBytes("sha1:00904C3YNNEGH5WAG5ZA9XWAEBNXJWT6"), statusType2Bytes), - List(Bytes.toBytes("sha1:249C3YNNEGH5WAG5ZAAXWAEBNXJWT6CZ"), statusType1Bytes), - List(Bytes.toBytes("sha1:095893C3YNNEGH5WAG5ZAAXWAEBNXJWT"), statusType2Bytes) - ) + // TODO(bnewbold): now to express a null (empty value) in this list? + val sampleData : List[List[Array[Byte]]] = List( + ("sha1:K2DKSSVTXWPRMFDTWSTCQW3RVWRIOV3Q", statusType1Bytes), + ("sha1:C3YNNEGH5WAG5ZAAXWAEBNXJWT6CZ3WU", statusType1Bytes), + ("sha1:SDKUVHC3YNNEGH5WAG5ZAAXWAEBNX4WT", statusType2Bytes), + ("sha1:35985C3YNNEGH5WAG5ZAAXWAEBNXJW56", statusType2Bytes), + ("sha1:885C3YNNEGH5WAG5ZAAXWA8BNXJWT6CZ", statusType2Bytes), + ("sha1:00904C3YNNEGH5WAG5ZA9XWAEBNXJWT6", statusType2Bytes), + ("sha1:249C3YNNEGH5WAG5ZAAXWAEBNXJWT6CZ", statusType1Bytes), + ("sha1:095893C3YNNEGH5WAG5ZAAXWAEBNXJWT", statusType2Bytes)) + .map(pair => List(Bytes.toBytes(pair._1), pair._2)) val statusType1Count = sampleData.count(lst => lst(1) == statusType1Bytes) val statusType2Count = sampleData.count(lst => lst(1) == statusType2Bytes) diff --git a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala index a9a90ec..5a22ef8 100644 --- a/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala +++ b/scalding/src/test/scala/sandcrawler/ScorableFeaturesTest.scala @@ -26,12 +26,12 @@ class ScorableFeaturesTest extends FlatSpec with Matchers { } private def titleToSlug(s : String) : String = { - new ScorableFeatures(title = s).toSlug + ScorableFeatures.create(title = s).toSlug } "toMapFeatures()" should "work with gnarly inputs" in { - new ScorableFeatures(title = null).toMapFeatures - new ScorableFeatures(title = "something", doi = null, sha1 = null, year = 123).toMapFeatures + ScorableFeatures.create(title = null).toMapFeatures + ScorableFeatures.create(title = "something", doi = null, sha1 = null, year = 123).toMapFeatures } "mapToSlug()" should "extract the parts of titles before a colon" in { diff --git a/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala b/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala index 5516869..00e4659 100644 --- a/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala +++ b/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala @@ -161,10 +161,12 @@ class ScoreJobTest extends FlatSpec with Matchers { .map { case (sha, json, status) => List(Bytes.toBytes(sha), Bytes.toBytes(json), Bytes.toBytes(status)) } .map { l => new Tuple(l.map(s => {new ImmutableBytesWritable(s)}):_*) } + // scalastyle:off null // Add example of lines without GROBID data // scalastyle:off null val SampleData = SampleDataHead :+ new Tuple( new ImmutableBytesWritable(Bytes.toBytes("sha1:35985C3YNNEGH5WAG5ZAA88888888888")), null, null) + // scalastyle:on null JobTest("sandcrawler.ScoreJob") .arg("test", "") |