aboutsummaryrefslogtreecommitdiffstats
path: root/please
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-06-15 01:22:07 +0000
committerBryan Newbold <bnewbold@archive.org>2018-06-15 01:22:07 +0000
commit08afd4330a83b526a5cf3612d07b1168ec5e4fa2 (patch)
tree4dc4d0b935ad8db8cb261291a46838aba50ddfe6 /please
parentc23ccd1f2d03ad65ee83b8eca8c407d12ecd54e1 (diff)
downloadsandcrawler-08afd4330a83b526a5cf3612d07b1168ec5e4fa2.tar.gz
sandcrawler-08afd4330a83b526a5cf3612d07b1168ec5e4fa2.zip
please: split out rebuild steps
Diffstat (limited to 'please')
-rwxr-xr-xplease21
1 files changed, 18 insertions, 3 deletions
diff --git a/please b/please
index 688a159..1ea751f 100755
--- a/please
+++ b/please
@@ -14,16 +14,28 @@ from datetime import datetime
HDFS_DIR = "hdfs:///user/bnewbold/sandcrawler"
HBASE_HOST = "wbgrp-svc263.us.archive.org"
+def rebuild_python():
+ print("Rebuilding python venv...")
+ cmd = """cd mapreduce;
+ export PIPENV_VENV_IN_PROJECT=1;
+ pipenv install --deploy
+ tar -czf venv-current.tar.gz -C .venv ."""
+ subprocess.call(cmd, shell=True)
+
+def rebuild_scalding():
+ print("Rebuilding scalding jar...")
+ cmd = """cd scalding; sbt assembly"""
+ subprocess.call(cmd, shell=True)
+
def run_backfill(args):
+ if args.rebuild:
+ rebuild_python()
print("Starting backfill job...")
output = "{}/output-{}/{}-backfill".format(
HDFS_DIR,
args.env,
datetime.strftime(datetime.now(), "%Y-%m-%d-%H%M.%S"))
cmd = """cd mapreduce;
- export PIPENV_VENV_IN_PROJECT=1;
- pipenv install --deploy
- tar -czf venv-current.tar.gz -C .venv .
pipenv run ./backfill_hbase_from_cdx.py \
--hbase-host {hbase_host} \
--hbase-table wbgrp-journal-extract-0-{env} \
@@ -58,6 +70,9 @@ def main():
parser.add_argument('--qa',
help="run against qa HBase table",
action='store_true')
+ parser.add_argument('--rebuild',
+ help="rebuild whatever artifact gets sent",
+ action='store_true')
subparsers = parser.add_subparsers()
sub_backfill = subparsers.add_parser('backfill')