aboutsummaryrefslogtreecommitdiffstats
path: root/backfill/README.md
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-03-30 22:53:03 -0700
committerBryan Newbold <bnewbold@archive.org>2018-03-30 22:53:03 -0700
commit31d5a1ebdfe2f4638ae1e5ec87ff228eef9114f5 (patch)
tree995bb427a75d0e62c6796dc026c92e63ed410389 /backfill/README.md
parentbb38ea065758a719331803b4adf875f2d75a1c3d (diff)
downloadsandcrawler-31d5a1ebdfe2f4638ae1e5ec87ff228eef9114f5.tar.gz
sandcrawler-31d5a1ebdfe2f4638ae1e5ec87ff228eef9114f5.zip
backfill: sha1 prefix, cluster example
Diffstat (limited to 'backfill/README.md')
-rw-r--r--backfill/README.md8
1 files changed, 6 insertions, 2 deletions
diff --git a/backfill/README.md b/backfill/README.md
index 90b4ba7..6af8f33 100644
--- a/backfill/README.md
+++ b/backfill/README.md
@@ -3,6 +3,10 @@ Run tests:
pipenv run python -m pytest
-Run locally on a file:
+An example actually connecting to HBase from a local machine, with thrift
+running on a devbox:
+
+ ./backfill_hbase_from_cdx.py tests/files/example.cdx \
+ --hbase-table wbgrp-journal-extract-0-qa \
+ --hbase-host bnewbold-dev.us.archive.org
- ./backfill_hbase_from_cdx.py tests/files/example.cdx