diff options
| author | Bryan Newbold <bnewbold@archive.org> | 2018-08-24 18:07:11 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@archive.org> | 2018-08-24 18:07:11 -0700 | 
| commit | b4c7b7fdc2537f4defb742e2b7b9de4524adf174 (patch) | |
| tree | dbab9a3ac495c8353ac26c823f2ebdb17b6b65fe /scalding/src/main/scala | |
| parent | cb563e890a8544fef1367d7b0f556cda6f0daca0 (diff) | |
| download | sandcrawler-b4c7b7fdc2537f4defb742e2b7b9de4524adf174.tar.gz sandcrawler-b4c7b7fdc2537f4defb742e2b7b9de4524adf174.zip  | |
rename DumpUnGrobidedJob
Diffstat (limited to 'scalding/src/main/scala')
| -rw-r--r-- | scalding/src/main/scala/sandcrawler/DumpUnGrobidedJob.scala (renamed from scalding/src/main/scala/sandcrawler/UnGrobidedDumpJob.scala) | 10 | 
1 files changed, 5 insertions, 5 deletions
diff --git a/scalding/src/main/scala/sandcrawler/UnGrobidedDumpJob.scala b/scalding/src/main/scala/sandcrawler/DumpUnGrobidedJob.scala index 0ce9167..7fd3ce0 100644 --- a/scalding/src/main/scala/sandcrawler/UnGrobidedDumpJob.scala +++ b/scalding/src/main/scala/sandcrawler/DumpUnGrobidedJob.scala @@ -15,19 +15,19 @@ import parallelai.spyglass.hbase.HBaseSource  // full CDX metadata, and dumps to a TSV for later extraction by the  // "extraction-ungrobided" job.  // -// Does the same horrible join thing that UnGrobidedDumpJob does. -class UnGrobidedDumpJob(args: Args) extends JobBase(args) with HBasePipeConversions { +// Does the same horrible join thing that DumpUnGrobidedJob does. +class DumpUnGrobidedJob(args: Args) extends JobBase(args) with HBasePipeConversions {    val output = args("output") -  val allKeys : TypedPipe[(String,String,String,String)] = UnGrobidedDumpJob.getHBaseKeySource( +  val allKeys : TypedPipe[(String,String,String,String)] = DumpUnGrobidedJob.getHBaseKeySource(      args("hbase-table"),      args("zookeeper-hosts"))      .read      .fromBytesWritable('key, 'c, 'mime, 'cdx)      .toTypedPipe[(String,String,String,String)]('key, 'c, 'mime, 'cdx) -  val existingKeys : TypedPipe[(String,Boolean)] = UnGrobidedDumpJob.getHBaseColSource( +  val existingKeys : TypedPipe[(String,Boolean)] = DumpUnGrobidedJob.getHBaseColSource(      args("hbase-table"),      args("zookeeper-hosts"))      .read @@ -46,7 +46,7 @@ class UnGrobidedDumpJob(args: Args) extends JobBase(args) with HBasePipeConversi  } -object UnGrobidedDumpJob { +object DumpUnGrobidedJob {    // eg, "wbgrp-journal-extract-0-qa",7 "mtrcs-zk1.us.archive.org:2181"    def getHBaseColSource(hbaseTable: String, zookeeperHosts: String) : HBaseSource = {  | 
