aboutsummaryrefslogtreecommitdiffstats
path: root/backfill
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-04-04 11:48:48 -0700
committerBryan Newbold <bnewbold@archive.org>2018-04-04 11:48:48 -0700
commitb948cddc7fe7000fd119af4fa130b9c24da46472 (patch)
treeaaef8b33ea54e2be21e5b8cfb15d72b545267e58 /backfill
parent9186783c632b63e7d7c3cd8168139718fba378e9 (diff)
downloadsandcrawler-b948cddc7fe7000fd119af4fa130b9c24da46472.tar.gz
sandcrawler-b948cddc7fe7000fd119af4fa130b9c24da46472.zip
trivial whitespace
Diffstat (limited to 'backfill')
-rwxr-xr-xbackfill/backfill_hbase_from_cdx.py2
-rw-r--r--backfill/tests/test_backfill_hbase_from_cdx.py1
2 files changed, 2 insertions, 1 deletions
diff --git a/backfill/backfill_hbase_from_cdx.py b/backfill/backfill_hbase_from_cdx.py
index e6596d5..fe37bd5 100755
--- a/backfill/backfill_hbase_from_cdx.py
+++ b/backfill/backfill_hbase_from_cdx.py
@@ -120,7 +120,7 @@ def test_transform_line():
class MRCDXBackfillHBase(MRJob):
- # CDX lines in
+ # CDX lines in; JSON status out
INPUT_PROTOCOL = mrjob.protocol.RawValueProtocol
OUTPUT_PROTOCOL = mrjob.protocol.JSONValueProtocol
diff --git a/backfill/tests/test_backfill_hbase_from_cdx.py b/backfill/tests/test_backfill_hbase_from_cdx.py
index 9af5b05..1a13e5b 100644
--- a/backfill/tests/test_backfill_hbase_from_cdx.py
+++ b/backfill/tests/test_backfill_hbase_from_cdx.py
@@ -23,6 +23,7 @@ def job():
job = MRCDXBackfillHBase(['--no-conf', '-'], hb_table=table)
return job
+
def test_some_lines(job):
raw = io.BytesIO(b"""