diff options
author | Bryan Newbold <bnewbold@archive.org> | 2018-07-15 22:49:07 +0000 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2018-07-15 22:49:07 +0000 |
commit | 746870a10215549c25a16529eabaeb199a3b9228 (patch) | |
tree | 2d91dd8f2b153738d6f1370b4623b6d816bceba5 | |
parent | f955fef045d3c78e351a639546a27ab0a53fc0aa (diff) | |
download | sandcrawler-746870a10215549c25a16529eabaeb199a3b9228.tar.gz sandcrawler-746870a10215549c25a16529eabaeb199a3b9228.zip |
update please helpers to provide hbase+zk config
-rwxr-xr-x | please | 15 |
1 files changed, 13 insertions, 2 deletions
@@ -13,6 +13,7 @@ from datetime import datetime HDFS_DIR = "hdfs:///user/bnewbold/sandcrawler" HBASE_HOST = "wbgrp-svc263.us.archive.org" +ZOOKEEPER_HOSTS = "mtrcs-zk1.us.archive.org:2181" GROBID_URI = "http://wbgrp-svc096.us.archive.org:8070" def rebuild_python(): @@ -86,7 +87,12 @@ def run_rowcount(args): com.twitter.scalding.Tool sandcrawler.HBaseRowCountJob \ --hdfs \ --app.conf.path scalding/ia_cluster.conf \ - --output {}""".format(output) + --hbase-table wbgrp-journal-extract-0-{env} \ + --zookeeper-hosts {zookeeper_hosts} \ + --output {output}""".format( + output=output, + zookeeper_hosts=ZOOKEEPER_HOSTS, + env=args.env) subprocess.call(cmd, shell=True) def run_statuscount(args): @@ -102,7 +108,12 @@ def run_statuscount(args): com.twitter.scalding.Tool sandcrawler.HBaseStatusCountJob \ --hdfs \ --app.conf.path scalding/ia_cluster.conf \ - --output {}""".format(output) + --hbase-table wbgrp-journal-extract-0-{env} \ + --zookeeper-hosts {zookeeper_hosts} \ + --output {output}""".format( + output=output, + zookeeper_hosts=ZOOKEEPER_HOSTS, + env=args.env) subprocess.call(cmd, shell=True) def main(): |