aboutsummaryrefslogtreecommitdiffstats
path: root/scalding
diff options
context:
space:
mode:
authorEllen Spertus <ellen.spertus@gmail.com>2018-08-22 12:56:06 -0700
committerEllen Spertus <ellen.spertus@gmail.com>2018-08-22 12:56:06 -0700
commit9cc24a40509f62b789ff1fa97913bef32589a288 (patch)
treea41465ab235b6a896844062dbd2971ccd45c5fc4 /scalding
parentf54e47ace6dd041e78e10ee25573c6ad3de808eb (diff)
downloadsandcrawler-9cc24a40509f62b789ff1fa97913bef32589a288.tar.gz
sandcrawler-9cc24a40509f62b789ff1fa97913bef32589a288.zip
Added more tests of GrobidScorable.keepRecord
Diffstat (limited to 'scalding')
-rw-r--r--scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala5
1 files changed, 5 insertions, 0 deletions
diff --git a/scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala b/scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala
index 620998e..6c45cc5 100644
--- a/scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala
+++ b/scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala
@@ -58,6 +58,7 @@ class GrobidScorableTest extends FlatSpec with Matchers {
}
"""
val GrobidStringWithGoodTitle = GrobidString.replace("<<TITLE>>", "Dummy Example File")
+ val GrobidStringWithMaximumTitle = GrobidString.replace("<<TITLE>>", "T" * Scorable.MaxTitleLength)
val GrobidStringWithExcessiveTitle = GrobidString.replace("<<TITLE>>", "T" * Scorable.MaxTitleLength + "0")
val GrobidStringWithNullTitle = GrobidString.replace("\"<<TITLE>>\"", "null")
val GrobidStringWithoutTitle = GrobidString.replace("title", "nottitle")
@@ -97,6 +98,10 @@ class GrobidScorableTest extends FlatSpec with Matchers {
GrobidScorable.keepRecord(GrobidStringWithGoodTitle) shouldBe true
}
+ it should "return true for valid JSON with a title of maximum permitted length" in {
+ GrobidScorable.keepRecord(GrobidStringWithMaximumTitle) shouldBe true
+ }
+
it should "return false for valid JSON with excessively long title" in {
GrobidScorable.keepRecord(GrobidStringWithExcessiveTitle) shouldBe false
}