aboutsummaryrefslogtreecommitdiffstats
path: root/scalding/src/main/scala/sandcrawler/ScoreJob.scala
diff options
context:
space:
mode:
Diffstat (limited to 'scalding/src/main/scala/sandcrawler/ScoreJob.scala')
-rw-r--r--scalding/src/main/scala/sandcrawler/ScoreJob.scala16
1 files changed, 0 insertions, 16 deletions
diff --git a/scalding/src/main/scala/sandcrawler/ScoreJob.scala b/scalding/src/main/scala/sandcrawler/ScoreJob.scala
index 107f504..ccb9b76 100644
--- a/scalding/src/main/scala/sandcrawler/ScoreJob.scala
+++ b/scalding/src/main/scala/sandcrawler/ScoreJob.scala
@@ -12,10 +12,6 @@ class ScoreJob(args: Args) extends JobBase(args) {
val grobidRowCount = Stat("grobid-rows-filtered", "sandcrawler")
val crossrefRowCount = Stat("crossref-rows-filtered", "sandcrawler")
val joinedRowCount = Stat("joined-rows", "sandcrawler")
- /* TODO:
- val uniqueDoiCount = Stat("unique-doi-count", "sandcrawler")
- val uniqueSha1Count = Stat("unique-sha1-count", "sandcrawler")
- */
val grobidScorable : Scorable = new GrobidScorable()
val crossrefScorable : Scorable = new CrossrefScorable()
@@ -36,18 +32,6 @@ class ScoreJob(args: Args) extends JobBase(args) {
.addTrap(TypedTsv(args("output") + ".trapped"))
.join(crossrefPipe)
- /* TODO:
- // Reduces to count unique SHA1 and DOI
- joinedPipe
- .map { case (_, (grobidFeatures, _)) => grobidFeatures.sha }
- .distinct
- .map { _ => uniqueSha1Count.inc }
- joinedPipe
- .map { case (_, (_, crossrefFeatures)) => crossrefFeatures.doi }
- .distinct
- .map { _ => uniqueDoiCount.inc }
- */
-
// TypedTsv doesn't work over case classes.
joinedPipe
.map { case (slug, (grobidFeatures, crossrefFeatures)) =>