From 746870a10215549c25a16529eabaeb199a3b9228 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Sun, 15 Jul 2018 22:49:07 +0000 Subject: update please helpers to provide hbase+zk config --- please | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'please') diff --git a/please b/please index 2d4cae8..a244b80 100755 --- a/please +++ b/please @@ -13,6 +13,7 @@ from datetime import datetime HDFS_DIR = "hdfs:///user/bnewbold/sandcrawler" HBASE_HOST = "wbgrp-svc263.us.archive.org" +ZOOKEEPER_HOSTS = "mtrcs-zk1.us.archive.org:2181" GROBID_URI = "http://wbgrp-svc096.us.archive.org:8070" def rebuild_python(): @@ -86,7 +87,12 @@ def run_rowcount(args): com.twitter.scalding.Tool sandcrawler.HBaseRowCountJob \ --hdfs \ --app.conf.path scalding/ia_cluster.conf \ - --output {}""".format(output) + --hbase-table wbgrp-journal-extract-0-{env} \ + --zookeeper-hosts {zookeeper_hosts} \ + --output {output}""".format( + output=output, + zookeeper_hosts=ZOOKEEPER_HOSTS, + env=args.env) subprocess.call(cmd, shell=True) def run_statuscount(args): @@ -102,7 +108,12 @@ def run_statuscount(args): com.twitter.scalding.Tool sandcrawler.HBaseStatusCountJob \ --hdfs \ --app.conf.path scalding/ia_cluster.conf \ - --output {}""".format(output) + --hbase-table wbgrp-journal-extract-0-{env} \ + --zookeeper-hosts {zookeeper_hosts} \ + --output {output}""".format( + output=output, + zookeeper_hosts=ZOOKEEPER_HOSTS, + env=args.env) subprocess.call(cmd, shell=True) def main(): -- cgit v1.2.3