diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-09-25 17:54:56 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-09-25 17:54:56 -0700 |
commit | 353dc0c2954d9f834fcccb49558728e326abca5b (patch) | |
tree | c3a1562455000855a2fb153d1fcea7f6fc7f1f87 | |
parent | d7830b4a5aad0a59a588e98798711f0e694d50d6 (diff) | |
download | sandcrawler-353dc0c2954d9f834fcccb49558728e326abca5b.tar.gz sandcrawler-353dc0c2954d9f834fcccb49558728e326abca5b.zip |
point 'please' to python_hadoop
-rwxr-xr-x | please | 8 |
1 files changed, 4 insertions, 4 deletions
@@ -22,7 +22,7 @@ GROBID_URI = "http://wbgrp-svc096.us.archive.org:8070" def rebuild_python(): print("Rebuilding python venv...") - cmd = """cd python; + cmd = """cd python_hadoop; export PIPENV_VENV_IN_PROJECT=1; pipenv install --deploy tar -czf venv-current.tar.gz -C .venv .""" @@ -41,7 +41,7 @@ def run_backfill(args): HDFS_DIR, args.env, datetime.strftime(datetime.now(), "%Y-%m-%d-%H%M.%S")) - cmd = """cd python; + cmd = """cd python_hadoop; pipenv run ./backfill_hbase_from_cdx.py \ --hbase-host {hbase_host} \ --hbase-table wbgrp-journal-extract-0-{env} \ @@ -61,7 +61,7 @@ def run_extract(args): HDFS_DIR, args.env, datetime.strftime(datetime.now(), "%Y-%m-%d-%H%M.%S")) - cmd = """cd python; + cmd = """cd python_hadoop; pipenv run ./extraction_cdx_grobid.py \ --hbase-host {hbase_host} \ --hbase-table wbgrp-journal-extract-0-{env} \ @@ -89,7 +89,7 @@ def run_extract_ungrobided(args): HDFS_DIR, args.env, datetime.strftime(datetime.now(), "%Y-%m-%d-%H%M.%S")) - cmd = """cd python; + cmd = """cd python_hadoop; pipenv run ./extraction_ungrobided.py \ --hbase-host {hbase_host} \ --hbase-table wbgrp-journal-extract-0-{env} \ |