diff options
| author | Bryan Newbold <bnewbold@archive.org> | 2018-08-23 19:22:11 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@archive.org> | 2018-08-23 19:36:23 -0700 | 
| commit | 715a35715609d8cbacff53dd5c7c1715c53a55f8 (patch) | |
| tree | 93d1534c7cb67e4cc7f98e0ee19a6e368e7df3a8 | |
| parent | 6ea7b7fdb9330e69afbbe2d2afe3e6b8c83fb4fb (diff) | |
| download | sandcrawler-715a35715609d8cbacff53dd5c7c1715c53a55f8.tar.gz sandcrawler-715a35715609d8cbacff53dd5c7c1715c53a55f8.zip | |
require crossref works to have at least one author (for matching)
| -rw-r--r-- | scalding/src/main/scala/sandcrawler/CrossrefScorable.scala | 2 | ||||
| -rw-r--r-- | scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala | 6 | 
2 files changed, 7 insertions, 1 deletions
| diff --git a/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala b/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala index babb4f9..baa1ca9 100644 --- a/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala +++ b/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala @@ -90,7 +90,7 @@ object CrossrefScorable {              val doi = Scorable.getString(map, "DOI")              val authors: List[String] = mapToAuthorList(map)              val year: Int = mapToYear(map).getOrElse(0) -            if (doi.isEmpty || doi == null) { +            if (doi.isEmpty || doi == null || authors.length == 0) {                MapFeatures(Scorable.NoSlug, json)              } else {                val sf : ScorableFeatures = ScorableFeatures.create(title=title, authors=authors, doi=doi.toLowerCase(), year=year) diff --git a/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala b/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala index ac7cc70..0cb12ee 100644 --- a/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala +++ b/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala @@ -73,6 +73,7 @@ class CrossrefScorableTest extends FlatSpec with Matchers {    val CrossrefStringWithEmptyTitle = CrossrefString.replace("<<TITLE>>", "")    val CrossrefStringWithoutTitle = CrossrefString.replace("title", "nottitle")    val MalformedCrossrefString = CrossrefString.replace("}", "") +  val CrossrefStringWithNoAuthors = CrossrefString.replace("<<TITLE>>", "Some Valid Title").replace("author", "no-author")    // Unit tests    "CrossrefScorable.jsonToMapFeatures()" should "handle invalid JSON" in { @@ -95,6 +96,11 @@ class CrossrefScorableTest extends FlatSpec with Matchers {      result.slug shouldBe Scorable.NoSlug    } +  it should "handle missing authors" in { +    val result = CrossrefScorable.jsonToMapFeatures(CrossrefStringWithNoAuthors) +    result.slug shouldBe Scorable.NoSlug +  } +    it should "handle valid input" in {      val result = CrossrefScorable.jsonToMapFeatures(CrossrefStringWithGoodTitle)      result.slug shouldBe "sometitle" | 
