aboutsummaryrefslogtreecommitdiffstats
path: root/scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala
diff options
context:
space:
mode:
Diffstat (limited to 'scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala')
-rw-r--r--scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala31
1 files changed, 29 insertions, 2 deletions
diff --git a/scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala b/scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala
index 661824b..620998e 100644
--- a/scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala
+++ b/scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala
@@ -57,7 +57,9 @@ class GrobidScorableTest extends FlatSpec with Matchers {
"annex": null
}
"""
- val GrobidStringWithTitle = GrobidString.replace("<<TITLE>>", "Dummy Example File")
+ val GrobidStringWithGoodTitle = GrobidString.replace("<<TITLE>>", "Dummy Example File")
+ val GrobidStringWithExcessiveTitle = GrobidString.replace("<<TITLE>>", "T" * Scorable.MaxTitleLength + "0")
+ val GrobidStringWithNullTitle = GrobidString.replace("\"<<TITLE>>\"", "null")
val GrobidStringWithoutTitle = GrobidString.replace("title", "nottitle")
val MalformedGrobidString = GrobidString.replace("}", "")
val Key = "Dummy Key"
@@ -69,13 +71,18 @@ class GrobidScorableTest extends FlatSpec with Matchers {
result.slug shouldBe Scorable.NoSlug
}
+ it should "handle null title" in {
+ val result = GrobidScorable.jsonToMapFeatures(Key, GrobidStringWithNullTitle)
+ result.slug shouldBe Scorable.NoSlug
+ }
+
it should "handle missing title" in {
val result = GrobidScorable.jsonToMapFeatures(Key, GrobidStringWithoutTitle)
result.slug shouldBe Scorable.NoSlug
}
it should "handle valid input" in {
- val result = GrobidScorable.jsonToMapFeatures(Key, GrobidStringWithTitle)
+ val result = GrobidScorable.jsonToMapFeatures(Key, GrobidStringWithGoodTitle)
result.slug shouldBe "dummyexamplefile"
Scorable.jsonToMap(result.json) match {
case None => fail()
@@ -85,4 +92,24 @@ class GrobidScorableTest extends FlatSpec with Matchers {
}
}
}
+
+ "GrobidScorable.keepRecord()" should "return true for valid JSON with title" in {
+ GrobidScorable.keepRecord(GrobidStringWithGoodTitle) shouldBe true
+ }
+
+ it should "return false for valid JSON with excessively long title" in {
+ GrobidScorable.keepRecord(GrobidStringWithExcessiveTitle) shouldBe false
+ }
+
+ it should "return false for valid JSON with null title" in {
+ GrobidScorable.keepRecord(GrobidStringWithNullTitle) shouldBe false
+ }
+
+ it should "return false for valid JSON with no title" in {
+ GrobidScorable.keepRecord(GrobidStringWithoutTitle) shouldBe false
+ }
+
+ it should "return false for invalid JSON" in {
+ GrobidScorable.keepRecord(GrobidStringWithoutTitle) shouldBe false
+ }
}