diff options
Diffstat (limited to 'scalding/src/main/scala/sandcrawler/ScoreJob.scala')
-rw-r--r-- | scalding/src/main/scala/sandcrawler/ScoreJob.scala | 16 |
1 files changed, 0 insertions, 16 deletions
diff --git a/scalding/src/main/scala/sandcrawler/ScoreJob.scala b/scalding/src/main/scala/sandcrawler/ScoreJob.scala index 107f504..ccb9b76 100644 --- a/scalding/src/main/scala/sandcrawler/ScoreJob.scala +++ b/scalding/src/main/scala/sandcrawler/ScoreJob.scala @@ -12,10 +12,6 @@ class ScoreJob(args: Args) extends JobBase(args) { val grobidRowCount = Stat("grobid-rows-filtered", "sandcrawler") val crossrefRowCount = Stat("crossref-rows-filtered", "sandcrawler") val joinedRowCount = Stat("joined-rows", "sandcrawler") - /* TODO: - val uniqueDoiCount = Stat("unique-doi-count", "sandcrawler") - val uniqueSha1Count = Stat("unique-sha1-count", "sandcrawler") - */ val grobidScorable : Scorable = new GrobidScorable() val crossrefScorable : Scorable = new CrossrefScorable() @@ -36,18 +32,6 @@ class ScoreJob(args: Args) extends JobBase(args) { .addTrap(TypedTsv(args("output") + ".trapped")) .join(crossrefPipe) - /* TODO: - // Reduces to count unique SHA1 and DOI - joinedPipe - .map { case (_, (grobidFeatures, _)) => grobidFeatures.sha } - .distinct - .map { _ => uniqueSha1Count.inc } - joinedPipe - .map { case (_, (_, crossrefFeatures)) => crossrefFeatures.doi } - .distinct - .map { _ => uniqueDoiCount.inc } - */ - // TypedTsv doesn't work over case classes. joinedPipe .map { case (slug, (grobidFeatures, crossrefFeatures)) => |