From 6ea7b7fdb9330e69afbbe2d2afe3e6b8c83fb4fb Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 23 Aug 2018 17:50:43 -0700 Subject: author parsing (and year, for crossref) --- scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala | 6 +++++- scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'scalding/src/test') diff --git a/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala b/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala index 3d18a21..ac7cc70 100644 --- a/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala +++ b/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala @@ -64,7 +64,7 @@ class CrossrefScorableTest extends FlatSpec with Matchers { "issn-type" : [ { "value" : "0987-7983", "type" : "print" } ], "subject" : [ "Pediatrics, Perinatology, and Child Health" ] } -""" +""".replace("<>", "10.123/aBc") // scalastyle:on val CrossrefStringWithGoodTitle = CrossrefString.replace("<>", "Some Title") val CrossrefStringWithMaximumTitle = CrossrefString.replace("<<TITLE>>", "T" * Scorable.MaxTitleLength) @@ -102,6 +102,10 @@ class CrossrefScorableTest extends FlatSpec with Matchers { case None => fail() case Some(map) => { map("title").asInstanceOf[String] shouldBe "Some Title" + map("doi").asInstanceOf[String] shouldBe "10.123/abc" + // TODO: full name? not just a string? + map("authors").asInstanceOf[List[String]] shouldBe List("Gaier") + map("year").asInstanceOf[Double].toInt shouldBe 2002 } } } diff --git a/scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala b/scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala index 6c45cc5..119cf90 100644 --- a/scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala +++ b/scalding/src/test/scala/sandcrawler/GrobidScorableTest.scala @@ -90,6 +90,7 @@ class GrobidScorableTest extends FlatSpec with Matchers { case Some(map) => { map should contain key "title" map("title").asInstanceOf[String] shouldBe "Dummy Example File" + map("authors").asInstanceOf[List[String]] shouldBe List("Brewster Kahle", "J Doe") } } } -- cgit v1.2.3