diff options
author | Ellen Spertus <ellen.spertus@gmail.com> | 2018-08-12 19:12:32 -0700 |
---|---|---|
committer | Ellen Spertus <ellen.spertus@gmail.com> | 2018-08-12 19:12:32 -0700 |
commit | 5615428921a45ba6a2fb005b255a28dcbb83b13f (patch) | |
tree | 4c085076194ecdbad63c62194711d5baf657f60f /scalding/src/test | |
parent | 05c0213547f29842bbae6faaf77e983a364d4a2e (diff) | |
download | sandcrawler-5615428921a45ba6a2fb005b255a28dcbb83b13f.tar.gz sandcrawler-5615428921a45ba6a2fb005b255a28dcbb83b13f.zip |
Snapshot before changing Scorable to find bug.
Diffstat (limited to 'scalding/src/test')
3 files changed, 23 insertions, 17 deletions
diff --git a/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala b/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala index 1c35d66..dc6f347 100644 --- a/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala +++ b/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala @@ -66,23 +66,23 @@ class CrossrefScorableTest extends FlatSpec with Matchers { val MalformedCrossrefString = CrossrefString.replace("}", "") // Unit tests - "simplifyJson()" should "return None for bad JSON" in { - CrossrefScorable.simplifyJson("") shouldBe None - CrossrefScorable.simplifyJson(MalformedCrossrefString) shouldBe None + "CrossrefScorable.jsonToMapFeatures()" should "handle invalid JSON" in { + val result = CrossrefScorable.jsonToMapFeatures(MalformedCrossrefString) + result.slug shouldBe Scorable.NoSlug } - it should "return None for JSON lacking title" in { - CrossrefScorable.simplifyJson(CrossrefStringWithoutTitle) shouldBe None + it should "handle missing title" in { + val result = CrossrefScorable.jsonToMapFeatures(CrossrefStringWithoutTitle) + result.slug shouldBe Scorable.NoSlug } - it should "return appropriate result for valid JSON" in { - CrossrefScorable.simplifyJson(CrossrefStringWithTitle) match { - case None => fail("None unexpectedly returned by simplifyJson") + it should "handle valid input" in { + val result = CrossrefScorable.jsonToMapFeatures(CrossrefStringWithTitle) + result.slug shouldBe "dummyexamplefile" + Scorable.jsonToMap(result.json) match { + case None => fail() case Some(map) => { - Scorable.isScorableMap(map) shouldBe true - map.size shouldBe 1 - map.keys should contain ("title") - map("title") shouldBe "SomeTitle" + map("title").asInstanceOf[String] shouldBe "Dummy Example File" } } } diff --git a/scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala b/scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala index 3fcd856..4b958b9 100644 --- a/scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala +++ b/scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala @@ -77,6 +77,7 @@ class GrobidScorableTest extends FlatSpec with Matchers { Scorable.jsonToMap(result.json) match { case None => fail() case Some(map) => { + map should contain key "title" map("title").asInstanceOf[String] shouldBe "Dummy Example File" } } diff --git a/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala b/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala index 8acb454..8436817 100644 --- a/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala +++ b/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala @@ -149,11 +149,16 @@ class ScoreJobTest extends FlatSpec with Matchers { 2 -> CrossrefString.replace("<<TITLE>>", "Title 1: TNG 3").replace("<<DOI>>", "DOI-0.75"), 3 -> CrossrefString.replace("<<TITLE>>", "Title 2: Rebooted").replace("<<DOI>>", "DOI-1"))) .sink[(String, Int, String, String)](TypedTsv[(String, Int, String, String)](output)) { - // Grobid titles: - // "Title 1", "Title 2: TNG", "Title 3: The Sequel" - // crossref slugs: - // "Title 1: TNG", "Title 1: TNG 2", "Title 1: TNG 3", "Title 2 Rebooted" - // Join should have 3 "Title 1" slugs and 1 "Title 2" slug + // Grobid titles and slugs (in parentheses): + // Title 1 (title1) + // Title 2: TNG (title2) + // Title 3: The Sequel (title3) + // crossref titles and slugs (in parentheses): + // Title 1: TNG (title1) + // Title 1: TNG 2 (title1) + // Title 1: TNG 3 (title1) + // Title 2 Rebooted (title2rebooted) + // Join should have 3 "title1" slugs and 1 "title2" slug outputBuffer => "The pipeline" should "return a 4-element list" in { outputBuffer should have length 4 |