From 70350899dda973cdf7a5cfdd941ae80319254587 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 15 Aug 2018 22:05:59 -0700 Subject: handle null status_code lines --- scalding/src/test/scala/sandcrawler/ScoreJobTest.scala | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'scalding/src/test/scala') diff --git a/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala b/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala index f68ee1d..54ae801 100644 --- a/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala +++ b/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala @@ -155,10 +155,15 @@ class ScoreJobTest extends FlatSpec with Matchers { val Bad : Long = 400 val StatusCodes = List(Ok, Ok, Ok, Bad, Ok, Bad) - val SampleData : List[List[Array[Byte]]] = (Sha1Strings, JsonStrings, StatusCodes) + val SampleDataHead : List[Tuple] = (Sha1Strings, JsonStrings, StatusCodes) .zipped .toList .map { case (sha, json, status) => List(Bytes.toBytes(sha), Bytes.toBytes(json), Bytes.toBytes(status)) } + .map { l => new Tuple(l.map(s => {new ImmutableBytesWritable(s)}):_*) } + + // Add example of lines without GROBID data + val SampleData = SampleDataHead :+ new Tuple( + new ImmutableBytesWritable(Bytes.toBytes("sha1:35985C3YNNEGH5WAG5ZAA88888888888")), null, null) JobTest("sandcrawler.ScoreJob") .arg("test", "") @@ -168,8 +173,7 @@ class ScoreJobTest extends FlatSpec with Matchers { .arg("zookeeper-hosts", testHost) .arg("crossref-input", input) .arg("debug", "true") - .source[Tuple](GrobidScorable.getHBaseSource(testTable, testHost), - SampleData.map(l => new Tuple(l.map(s => {new ImmutableBytesWritable(s)}):_*))) + .source[Tuple](GrobidScorable.getHBaseSource(testTable, testHost), SampleData) .source(TextLine(input), List( 0 -> CrossrefStrings(0), 1 -> CrossrefStrings(1), -- cgit v1.2.3