aboutsummaryrefslogtreecommitdiffstats
path: root/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala
diff options
context:
space:
mode:
authorEllen Spertus <ellen.spertus@gmail.com>2018-08-12 19:12:32 -0700
committerEllen Spertus <ellen.spertus@gmail.com>2018-08-12 19:12:32 -0700
commit5615428921a45ba6a2fb005b255a28dcbb83b13f (patch)
tree4c085076194ecdbad63c62194711d5baf657f60f /scalding/src/test/scala/sandcrawler/ScoreJobTest.scala
parent05c0213547f29842bbae6faaf77e983a364d4a2e (diff)
downloadsandcrawler-5615428921a45ba6a2fb005b255a28dcbb83b13f.tar.gz
sandcrawler-5615428921a45ba6a2fb005b255a28dcbb83b13f.zip
Snapshot before changing Scorable to find bug.
Diffstat (limited to 'scalding/src/test/scala/sandcrawler/ScoreJobTest.scala')
-rw-r--r--scalding/src/test/scala/sandcrawler/ScoreJobTest.scala15
1 files changed, 10 insertions, 5 deletions
diff --git a/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala b/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala
index 8acb454..8436817 100644
--- a/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala
+++ b/scalding/src/test/scala/sandcrawler/ScoreJobTest.scala
@@ -149,11 +149,16 @@ class ScoreJobTest extends FlatSpec with Matchers {
2 -> CrossrefString.replace("<<TITLE>>", "Title 1: TNG 3").replace("<<DOI>>", "DOI-0.75"),
3 -> CrossrefString.replace("<<TITLE>>", "Title 2: Rebooted").replace("<<DOI>>", "DOI-1")))
.sink[(String, Int, String, String)](TypedTsv[(String, Int, String, String)](output)) {
- // Grobid titles:
- // "Title 1", "Title 2: TNG", "Title 3: The Sequel"
- // crossref slugs:
- // "Title 1: TNG", "Title 1: TNG 2", "Title 1: TNG 3", "Title 2 Rebooted"
- // Join should have 3 "Title 1" slugs and 1 "Title 2" slug
+ // Grobid titles and slugs (in parentheses):
+ // Title 1 (title1)
+ // Title 2: TNG (title2)
+ // Title 3: The Sequel (title3)
+ // crossref titles and slugs (in parentheses):
+ // Title 1: TNG (title1)
+ // Title 1: TNG 2 (title1)
+ // Title 1: TNG 3 (title1)
+ // Title 2 Rebooted (title2rebooted)
+ // Join should have 3 "title1" slugs and 1 "title2" slug
outputBuffer =>
"The pipeline" should "return a 4-element list" in {
outputBuffer should have length 4