diff options
author | Bryan Newbold <bnewbold@archive.org> | 2018-03-30 22:53:03 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2018-03-30 22:53:03 -0700 |
commit | 31d5a1ebdfe2f4638ae1e5ec87ff228eef9114f5 (patch) | |
tree | 995bb427a75d0e62c6796dc026c92e63ed410389 /backfill/tests | |
parent | bb38ea065758a719331803b4adf875f2d75a1c3d (diff) | |
download | sandcrawler-31d5a1ebdfe2f4638ae1e5ec87ff228eef9114f5.tar.gz sandcrawler-31d5a1ebdfe2f4638ae1e5ec87ff228eef9114f5.zip |
backfill: sha1 prefix, cluster example
Diffstat (limited to 'backfill/tests')
-rw-r--r-- | backfill/tests/test_backfill_hbase_from_cdx.py | 11 |
1 files changed, 7 insertions, 4 deletions
diff --git a/backfill/tests/test_backfill_hbase_from_cdx.py b/backfill/tests/test_backfill_hbase_from_cdx.py index d8277be..9af5b05 100644 --- a/backfill/tests/test_backfill_hbase_from_cdx.py +++ b/backfill/tests/test_backfill_hbase_from_cdx.py @@ -1,3 +1,6 @@ +""" +TODO: could probably refactor to use unittest.mock.patch('happybase') +""" import io import json @@ -33,13 +36,13 @@ com,pbworks,educ333b)/robots.txt 20170705063311 http://educ333b.pbworks.com/robo assert job.hb_table.row(b'1') == {} # HTTP 301 - assert job.hb_table.row(b'3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ') == {} + assert job.hb_table.row(b'sha1:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ') == {} # valid - assert job.hb_table.row(b'MPCXVWMUTRUGFP36SLPHKDLY6NGU4S3J') != {} + assert job.hb_table.row(b'sha1:MPCXVWMUTRUGFP36SLPHKDLY6NGU4S3J') != {} # text/plain - assert job.hb_table.row(b'6VAUYENMOU2SK2OWNRPDD6WTQTECGZAD') == {} + assert job.hb_table.row(b'sha1:6VAUYENMOU2SK2OWNRPDD6WTQTECGZAD') == {} - row = job.hb_table.row(b'MPCXVWMUTRUGFP36SLPHKDLY6NGU4S3J') + row = job.hb_table.row(b'sha1:MPCXVWMUTRUGFP36SLPHKDLY6NGU4S3J') assert row[b'file:mime'] == b"application/pdf" file_cdx = json.loads(row[b'file:cdx'].decode('utf-8')) |