aboutsummaryrefslogtreecommitdiffstats
path: root/backfill/tests
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-03-30 22:53:03 -0700
committerBryan Newbold <bnewbold@archive.org>2018-03-30 22:53:03 -0700
commit31d5a1ebdfe2f4638ae1e5ec87ff228eef9114f5 (patch)
tree995bb427a75d0e62c6796dc026c92e63ed410389 /backfill/tests
parentbb38ea065758a719331803b4adf875f2d75a1c3d (diff)
downloadsandcrawler-31d5a1ebdfe2f4638ae1e5ec87ff228eef9114f5.tar.gz
sandcrawler-31d5a1ebdfe2f4638ae1e5ec87ff228eef9114f5.zip
backfill: sha1 prefix, cluster example
Diffstat (limited to 'backfill/tests')
-rw-r--r--backfill/tests/test_backfill_hbase_from_cdx.py11
1 files changed, 7 insertions, 4 deletions
diff --git a/backfill/tests/test_backfill_hbase_from_cdx.py b/backfill/tests/test_backfill_hbase_from_cdx.py
index d8277be..9af5b05 100644
--- a/backfill/tests/test_backfill_hbase_from_cdx.py
+++ b/backfill/tests/test_backfill_hbase_from_cdx.py
@@ -1,3 +1,6 @@
+"""
+TODO: could probably refactor to use unittest.mock.patch('happybase')
+"""
import io
import json
@@ -33,13 +36,13 @@ com,pbworks,educ333b)/robots.txt 20170705063311 http://educ333b.pbworks.com/robo
assert job.hb_table.row(b'1') == {}
# HTTP 301
- assert job.hb_table.row(b'3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ') == {}
+ assert job.hb_table.row(b'sha1:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ') == {}
# valid
- assert job.hb_table.row(b'MPCXVWMUTRUGFP36SLPHKDLY6NGU4S3J') != {}
+ assert job.hb_table.row(b'sha1:MPCXVWMUTRUGFP36SLPHKDLY6NGU4S3J') != {}
# text/plain
- assert job.hb_table.row(b'6VAUYENMOU2SK2OWNRPDD6WTQTECGZAD') == {}
+ assert job.hb_table.row(b'sha1:6VAUYENMOU2SK2OWNRPDD6WTQTECGZAD') == {}
- row = job.hb_table.row(b'MPCXVWMUTRUGFP36SLPHKDLY6NGU4S3J')
+ row = job.hb_table.row(b'sha1:MPCXVWMUTRUGFP36SLPHKDLY6NGU4S3J')
assert row[b'file:mime'] == b"application/pdf"
file_cdx = json.loads(row[b'file:cdx'].decode('utf-8'))