aboutsummaryrefslogtreecommitdiffstats
path: root/please
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-08-24 12:19:09 -0700
committerBryan Newbold <bnewbold@archive.org>2018-08-24 12:19:09 -0700
commitf50d4e081f7994a167c4974ee9d3f6e1f8eae478 (patch)
tree00cf69ffe345f7766e7477cb9b1f5f7448b4e4fb /please
parent344531eb6a5cdd4ea15e4d82050368c5af0eafee (diff)
parent5340caad7b39ad29bba77d2a3e486db7a6b1977b (diff)
downloadsandcrawler-f50d4e081f7994a167c4974ee9d3f6e1f8eae478.tar.gz
sandcrawler-f50d4e081f7994a167c4974ee9d3f6e1f8eae478.zip
Merge branch 'bnewbold-match-quality'
Manually resolved merge conflict in: please
Diffstat (limited to 'please')
-rwxr-xr-xplease28
1 files changed, 28 insertions, 0 deletions
diff --git a/please b/please
index b32dd79..7671515 100755
--- a/please
+++ b/please
@@ -215,6 +215,24 @@ def run_colcount(args):
env=args.env)
subprocess.call(cmd, shell=True)
+run_matchbenchmark(args):
+ if args.rebuild:
+ rebuild_scalding()
+ print("Starting matchbenchmark job...")
+ cmd = """./pig/deps/hadoop/bin/hadoop jar \
+ scalding/target/scala-2.11/sandcrawler-assembly-0.2.0-SNAPSHOT.jar \
+ com.twitter.scalding.Tool \
+ sandcrawler.MatchBenchmarkJob \
+ --local \
+ --app.conf.path scalding/ia_cluster.conf \
+ --left-bibjson {left_bibjson} \
+ --right-bibjson {right_bibjson} \
+ --output {output}""".format(
+ output=args.output,
+ left_bibjson=args.left_bibjson,
+ right_bibjson=args.right_bibjson)
+ subprocess.call(cmd, shell=True)
+
def main():
parser = argparse.ArgumentParser()
@@ -264,12 +282,22 @@ def main():
sub_colcount.add_argument('column',
help="column name to use in count")
+sub_matchbenchmark = subparsers.add_parser('match-benchmark')
+ sub_matchbenchmark.set_defaults(func=run_matchbenchmark)
+ sub_matchbenchmark.add_argument('left_bibjson',
+ help="First bibjson file")
+ sub_matchbenchmark.add_argument('right_bibjson',
+ help="Second bibjson file")
+ sub_matchbenchmark.add_argument('output',
+ help="where to write output")
+
args = parser.parse_args()
if not args.__dict__.get("func"):
print("tell me what to do! (try --help)")
sys.exit(-1)
if not (args.prod or args.qa) or (args.prod and args.qa):
print("must pass one of --prod or --qa")
+ sys.exit(-1)
if args.prod:
args.env = "prod"
if args.qa: