diff options
Diffstat (limited to 'scalding/src/test/scala')
3 files changed, 23 insertions, 17 deletions
| diff --git a/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala b/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala index 1c35d66..dc6f347 100644 --- a/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala +++ b/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala @@ -66,23 +66,23 @@ class CrossrefScorableTest extends FlatSpec with Matchers {    val MalformedCrossrefString = CrossrefString.replace("}", "")    // Unit tests -  "simplifyJson()" should "return None for bad JSON" in { -    CrossrefScorable.simplifyJson("") shouldBe None -    CrossrefScorable.simplifyJson(MalformedCrossrefString) shouldBe None +  "CrossrefScorable.jsonToMapFeatures()" should "handle invalid JSON" in { +    val result = CrossrefScorable.jsonToMapFeatures(MalformedCrossrefString)  +    result.slug shouldBe Scorable.NoSlug    } -  it should "return None for JSON lacking title" in { -    CrossrefScorable.simplifyJson(CrossrefStringWithoutTitle) shouldBe None +  it should "handle missing title" in { +    val result = CrossrefScorable.jsonToMapFeatures(CrossrefStringWithoutTitle) +    result.slug shouldBe Scorable.NoSlug    } -  it should "return appropriate result for valid JSON" in { -    CrossrefScorable.simplifyJson(CrossrefStringWithTitle) match { -      case None => fail("None unexpectedly returned by simplifyJson") +  it should "handle valid input" in { +    val result = CrossrefScorable.jsonToMapFeatures(CrossrefStringWithTitle) +    result.slug shouldBe "dummyexamplefile" +    Scorable.jsonToMap(result.json) match { +      case None => fail()        case Some(map) => { -        Scorable.isScorableMap(map) shouldBe true -        map.size shouldBe 1 -        map.keys should contain ("title") -        map("title") shouldBe "SomeTitle" +        map("title").asInstanceOf[String] shouldBe "Dummy Example File"        }      }    } diff --git a/scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala b/scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala index 3fcd856..4b958b9 100644 --- a/scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala +++ b/scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala @@ -77,6 +77,7 @@ class GrobidScorableTest extends FlatSpec with Matchers {      Scorable.jsonToMap(result.json) match {        case None => fail()        case Some(map) => { +        map should contain key "title"          map("title").asInstanceOf[String] shouldBe "Dummy Example File"        }      } diff --git a/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala b/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala index 8acb454..8436817 100644 --- a/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala +++ b/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala @@ -149,11 +149,16 @@ class ScoreJobTest extends FlatSpec with Matchers {        2 -> CrossrefString.replace("<<TITLE>>", "Title 1: TNG 3").replace("<<DOI>>", "DOI-0.75"),        3 -> CrossrefString.replace("<<TITLE>>", "Title 2: Rebooted").replace("<<DOI>>", "DOI-1")))      .sink[(String, Int, String, String)](TypedTsv[(String, Int, String, String)](output)) { -      // Grobid titles:  -      //   "Title 1", "Title 2: TNG", "Title 3: The Sequel" -      // crossref slugs:  -      //   "Title 1: TNG", "Title 1: TNG 2", "Title 1: TNG 3", "Title 2 Rebooted" -      // Join should have 3 "Title  1" slugs and 1 "Title 2" slug +      // Grobid titles and slugs (in parentheses):  +      //   Title 1                       (title1) +      //   Title 2: TNG                  (title2) +      //   Title 3: The Sequel           (title3) +      // crossref titles and slugs (in parentheses): +      //   Title 1: TNG                  (title1) +      //   Title 1: TNG 2                (title1) +      //   Title 1: TNG 3                (title1) +      //   Title 2 Rebooted              (title2rebooted) +      // Join should have 3 "title1" slugs and 1 "title2" slug        outputBuffer =>         "The pipeline" should "return a 4-element list" in {          outputBuffer should have length 4 | 
