aboutsummaryrefslogtreecommitdiffstats
path: root/scalding/src/test
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-08-15 22:05:59 -0700
committerBryan Newbold <bnewbold@archive.org>2018-08-15 22:05:59 -0700
commit70350899dda973cdf7a5cfdd941ae80319254587 (patch)
tree19e444d4037da3124ed9c09ffeb5e8ac1ff6769a /scalding/src/test
parentc3c2760fb388059a9942a61965b79c42bc03f11b (diff)
downloadsandcrawler-70350899dda973cdf7a5cfdd941ae80319254587.tar.gz
sandcrawler-70350899dda973cdf7a5cfdd941ae80319254587.zip
handle null status_code lines
Diffstat (limited to 'scalding/src/test')
-rw-r--r--scalding/src/test/scala/sandcrawler/ScoreJobTest.scala10
1 files changed, 7 insertions, 3 deletions
diff --git a/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala b/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala
index f68ee1d..54ae801 100644
--- a/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala
+++ b/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala
@@ -155,10 +155,15 @@ class ScoreJobTest extends FlatSpec with Matchers {
val Bad : Long = 400
val StatusCodes = List(Ok, Ok, Ok, Bad, Ok, Bad)
- val SampleData : List[List[Array[Byte]]] = (Sha1Strings, JsonStrings, StatusCodes)
+ val SampleDataHead : List[Tuple] = (Sha1Strings, JsonStrings, StatusCodes)
.zipped
.toList
.map { case (sha, json, status) => List(Bytes.toBytes(sha), Bytes.toBytes(json), Bytes.toBytes(status)) }
+ .map { l => new Tuple(l.map(s => {new ImmutableBytesWritable(s)}):_*) }
+
+ // Add example of lines without GROBID data
+ val SampleData = SampleDataHead :+ new Tuple(
+ new ImmutableBytesWritable(Bytes.toBytes("sha1:35985C3YNNEGH5WAG5ZAA88888888888")), null, null)
JobTest("sandcrawler.ScoreJob")
.arg("test", "")
@@ -168,8 +173,7 @@ class ScoreJobTest extends FlatSpec with Matchers {
.arg("zookeeper-hosts", testHost)
.arg("crossref-input", input)
.arg("debug", "true")
- .source[Tuple](GrobidScorable.getHBaseSource(testTable, testHost),
- SampleData.map(l => new Tuple(l.map(s => {new ImmutableBytesWritable(s)}):_*)))
+ .source[Tuple](GrobidScorable.getHBaseSource(testTable, testHost), SampleData)
.source(TextLine(input), List(
0 -> CrossrefStrings(0),
1 -> CrossrefStrings(1),