aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-09-25 17:54:56 -0700
committerBryan Newbold <bnewbold@archive.org>2019-09-25 17:54:56 -0700
commit353dc0c2954d9f834fcccb49558728e326abca5b (patch)
treec3a1562455000855a2fb153d1fcea7f6fc7f1f87
parentd7830b4a5aad0a59a588e98798711f0e694d50d6 (diff)
downloadsandcrawler-353dc0c2954d9f834fcccb49558728e326abca5b.tar.gz
sandcrawler-353dc0c2954d9f834fcccb49558728e326abca5b.zip
point 'please' to python_hadoop
-rwxr-xr-xplease8
1 files changed, 4 insertions, 4 deletions
diff --git a/please b/please
index 0bfc0b2..10fa843 100755
--- a/please
+++ b/please
@@ -22,7 +22,7 @@ GROBID_URI = "http://wbgrp-svc096.us.archive.org:8070"
def rebuild_python():
print("Rebuilding python venv...")
- cmd = """cd python;
+ cmd = """cd python_hadoop;
export PIPENV_VENV_IN_PROJECT=1;
pipenv install --deploy
tar -czf venv-current.tar.gz -C .venv ."""
@@ -41,7 +41,7 @@ def run_backfill(args):
HDFS_DIR,
args.env,
datetime.strftime(datetime.now(), "%Y-%m-%d-%H%M.%S"))
- cmd = """cd python;
+ cmd = """cd python_hadoop;
pipenv run ./backfill_hbase_from_cdx.py \
--hbase-host {hbase_host} \
--hbase-table wbgrp-journal-extract-0-{env} \
@@ -61,7 +61,7 @@ def run_extract(args):
HDFS_DIR,
args.env,
datetime.strftime(datetime.now(), "%Y-%m-%d-%H%M.%S"))
- cmd = """cd python;
+ cmd = """cd python_hadoop;
pipenv run ./extraction_cdx_grobid.py \
--hbase-host {hbase_host} \
--hbase-table wbgrp-journal-extract-0-{env} \
@@ -89,7 +89,7 @@ def run_extract_ungrobided(args):
HDFS_DIR,
args.env,
datetime.strftime(datetime.now(), "%Y-%m-%d-%H%M.%S"))
- cmd = """cd python;
+ cmd = """cd python_hadoop;
pipenv run ./extraction_ungrobided.py \
--hbase-host {hbase_host} \
--hbase-table wbgrp-journal-extract-0-{env} \