diff options
author | Ellen Spertus <ellen.spertus@gmail.com> | 2018-08-22 13:17:49 -0700 |
---|---|---|
committer | Ellen Spertus <ellen.spertus@gmail.com> | 2018-08-22 13:17:49 -0700 |
commit | 9fb9a35b15ed9b553ad4f938dc4e636e5d91ac33 (patch) | |
tree | 16680eb1bed750fac2980bda70ced41df28d98c0 /scalding/src/test | |
parent | 9cc24a40509f62b789ff1fa97913bef32589a288 (diff) | |
download | sandcrawler-9fb9a35b15ed9b553ad4f938dc4e636e5d91ac33.tar.gz sandcrawler-9fb9a35b15ed9b553ad4f938dc4e636e5d91ac33.zip |
Added title-length filtering to CrossrefScorable.
Diffstat (limited to 'scalding/src/test')
-rw-r--r-- | scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala | 36 |
1 files changed, 34 insertions, 2 deletions
diff --git a/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala b/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala index 1789d1a..3d18a21 100644 --- a/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala +++ b/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala @@ -66,7 +66,10 @@ class CrossrefScorableTest extends FlatSpec with Matchers { } """ // scalastyle:on - val CrossrefStringWithTitle = CrossrefString.replace("<<TITLE>>", "Some Title") + val CrossrefStringWithGoodTitle = CrossrefString.replace("<<TITLE>>", "Some Title") + val CrossrefStringWithMaximumTitle = CrossrefString.replace("<<TITLE>>", "T" * Scorable.MaxTitleLength) + val CrossrefStringWithExcessiveTitle = CrossrefString.replace("<<TITLE>>", "T" * Scorable.MaxTitleLength + "0") + val CrossrefStringWithNullTitle = CrossrefString.replace("\"<<TITLE>>\"", "null") val CrossrefStringWithEmptyTitle = CrossrefString.replace("<<TITLE>>", "") val CrossrefStringWithoutTitle = CrossrefString.replace("title", "nottitle") val MalformedCrossrefString = CrossrefString.replace("}", "") @@ -82,13 +85,18 @@ class CrossrefScorableTest extends FlatSpec with Matchers { result.slug shouldBe Scorable.NoSlug } + it should "handle null title" in { + val result = CrossrefScorable.jsonToMapFeatures(CrossrefStringWithNullTitle) + result.slug shouldBe Scorable.NoSlug + } + it should "handle empty title" in { val result = CrossrefScorable.jsonToMapFeatures(CrossrefStringWithEmptyTitle) result.slug shouldBe Scorable.NoSlug } it should "handle valid input" in { - val result = CrossrefScorable.jsonToMapFeatures(CrossrefStringWithTitle) + val result = CrossrefScorable.jsonToMapFeatures(CrossrefStringWithGoodTitle) result.slug shouldBe "sometitle" Scorable.jsonToMap(result.json) match { case None => fail() @@ -97,4 +105,28 @@ class CrossrefScorableTest extends FlatSpec with Matchers { } } } + + "CrossrefScorable.keepRecord()" should "return true for valid JSON with title" in { + CrossrefScorable.keepRecord(CrossrefStringWithGoodTitle) shouldBe true + } + + it should "return true for valid JSON with a title of maximum permitted length" in { + CrossrefScorable.keepRecord(CrossrefStringWithMaximumTitle) shouldBe true + } + + it should "return false for valid JSON with excessively long title" in { + CrossrefScorable.keepRecord(CrossrefStringWithExcessiveTitle) shouldBe false + } + + it should "return false for valid JSON with null title" in { + CrossrefScorable.keepRecord(CrossrefStringWithNullTitle) shouldBe false + } + + it should "return false for valid JSON with no title" in { + CrossrefScorable.keepRecord(CrossrefStringWithoutTitle) shouldBe false + } + + it should "return false for invalid JSON" in { + CrossrefScorable.keepRecord(CrossrefStringWithoutTitle) shouldBe false + } } |