From 715a35715609d8cbacff53dd5c7c1715c53a55f8 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 23 Aug 2018 19:22:11 -0700 Subject: require crossref works to have at least one author (for matching) --- scalding/src/main/scala/sandcrawler/CrossrefScorable.scala | 2 +- scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'scalding') diff --git a/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala b/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala index babb4f9..baa1ca9 100644 --- a/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala +++ b/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala @@ -90,7 +90,7 @@ object CrossrefScorable { val doi = Scorable.getString(map, "DOI") val authors: List[String] = mapToAuthorList(map) val year: Int = mapToYear(map).getOrElse(0) - if (doi.isEmpty || doi == null) { + if (doi.isEmpty || doi == null || authors.length == 0) { MapFeatures(Scorable.NoSlug, json) } else { val sf : ScorableFeatures = ScorableFeatures.create(title=title, authors=authors, doi=doi.toLowerCase(), year=year) diff --git a/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala b/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala index ac7cc70..0cb12ee 100644 --- a/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala +++ b/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala @@ -73,6 +73,7 @@ class CrossrefScorableTest extends FlatSpec with Matchers { val CrossrefStringWithEmptyTitle = CrossrefString.replace("<>", "") val CrossrefStringWithoutTitle = CrossrefString.replace("title", "nottitle") val MalformedCrossrefString = CrossrefString.replace("}", "") + val CrossrefStringWithNoAuthors = CrossrefString.replace("<<TITLE>>", "Some Valid Title").replace("author", "no-author") // Unit tests "CrossrefScorable.jsonToMapFeatures()" should "handle invalid JSON" in { @@ -95,6 +96,11 @@ class CrossrefScorableTest extends FlatSpec with Matchers { result.slug shouldBe Scorable.NoSlug } + it should "handle missing authors" in { + val result = CrossrefScorable.jsonToMapFeatures(CrossrefStringWithNoAuthors) + result.slug shouldBe Scorable.NoSlug + } + it should "handle valid input" in { val result = CrossrefScorable.jsonToMapFeatures(CrossrefStringWithGoodTitle) result.slug shouldBe "sometitle" -- cgit v1.2.3