aboutsummaryrefslogtreecommitdiffstats
path: root/scalding
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-08-23 19:22:11 -0700
committerBryan Newbold <bnewbold@archive.org>2018-08-23 19:36:23 -0700
commit715a35715609d8cbacff53dd5c7c1715c53a55f8 (patch)
tree93d1534c7cb67e4cc7f98e0ee19a6e368e7df3a8 /scalding
parent6ea7b7fdb9330e69afbbe2d2afe3e6b8c83fb4fb (diff)
downloadsandcrawler-715a35715609d8cbacff53dd5c7c1715c53a55f8.tar.gz
sandcrawler-715a35715609d8cbacff53dd5c7c1715c53a55f8.zip
require crossref works to have at least one author (for matching)
Diffstat (limited to 'scalding')
-rw-r--r--scalding/src/main/scala/sandcrawler/CrossrefScorable.scala2
-rw-r--r--scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala6
2 files changed, 7 insertions, 1 deletions
diff --git a/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala b/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala
index babb4f9..baa1ca9 100644
--- a/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala
+++ b/scalding/src/main/scala/sandcrawler/CrossrefScorable.scala
@@ -90,7 +90,7 @@ object CrossrefScorable {
val doi = Scorable.getString(map, "DOI")
val authors: List[String] = mapToAuthorList(map)
val year: Int = mapToYear(map).getOrElse(0)
- if (doi.isEmpty || doi == null) {
+ if (doi.isEmpty || doi == null || authors.length == 0) {
MapFeatures(Scorable.NoSlug, json)
} else {
val sf : ScorableFeatures = ScorableFeatures.create(title=title, authors=authors, doi=doi.toLowerCase(), year=year)
diff --git a/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala b/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala
index ac7cc70..0cb12ee 100644
--- a/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala
+++ b/scalding/src/test/scala/sandcrawler/CrossrefScorableTest.scala
@@ -73,6 +73,7 @@ class CrossrefScorableTest extends FlatSpec with Matchers {
val CrossrefStringWithEmptyTitle = CrossrefString.replace("<<TITLE>>", "")
val CrossrefStringWithoutTitle = CrossrefString.replace("title", "nottitle")
val MalformedCrossrefString = CrossrefString.replace("}", "")
+ val CrossrefStringWithNoAuthors = CrossrefString.replace("<<TITLE>>", "Some Valid Title").replace("author", "no-author")
// Unit tests
"CrossrefScorable.jsonToMapFeatures()" should "handle invalid JSON" in {
@@ -95,6 +96,11 @@ class CrossrefScorableTest extends FlatSpec with Matchers {
result.slug shouldBe Scorable.NoSlug
}
+ it should "handle missing authors" in {
+ val result = CrossrefScorable.jsonToMapFeatures(CrossrefStringWithNoAuthors)
+ result.slug shouldBe Scorable.NoSlug
+ }
+
it should "handle valid input" in {
val result = CrossrefScorable.jsonToMapFeatures(CrossrefStringWithGoodTitle)
result.slug shouldBe "sometitle"