aboutsummaryrefslogtreecommitdiffstats
path: root/mapreduce/tests
diff options
context:
space:
mode:
Diffstat (limited to 'mapreduce/tests')
-rw-r--r--mapreduce/tests/test_backfill_hbase_from_cdx.py5
-rw-r--r--mapreduce/tests/test_extraction_cdx_grobid.py6
2 files changed, 7 insertions, 4 deletions
diff --git a/mapreduce/tests/test_backfill_hbase_from_cdx.py b/mapreduce/tests/test_backfill_hbase_from_cdx.py
index 2dbbc25..070662b 100644
--- a/mapreduce/tests/test_backfill_hbase_from_cdx.py
+++ b/mapreduce/tests/test_backfill_hbase_from_cdx.py
@@ -15,12 +15,13 @@ def job():
Note: this mock only seems to work with job.run_mapper(), not job.run();
the later results in a separate instantiation without the mock?
"""
+ job = MRCDXBackfillHBase(['--no-conf', '-'])
+
conn = happybase_mock.Connection()
conn.create_table('wbgrp-journal-extract-test',
{'file': {}, 'grobid0': {}, 'f': {}})
- table = conn.table('wbgrp-journal-extract-test')
+ job.hb_table = conn.table('wbgrp-journal-extract-test')
- job = MRCDXBackfillHBase(['--no-conf', '-'], hb_table=table)
return job
diff --git a/mapreduce/tests/test_extraction_cdx_grobid.py b/mapreduce/tests/test_extraction_cdx_grobid.py
index 46a89aa..02d2b41 100644
--- a/mapreduce/tests/test_extraction_cdx_grobid.py
+++ b/mapreduce/tests/test_extraction_cdx_grobid.py
@@ -18,14 +18,16 @@ def job():
Note: this mock only seems to work with job.run_mapper(), not job.run();
the later results in a separate instantiation without the mock?
"""
+ job = MRExtractCdxGrobid(['--no-conf', '-'])
+
conn = happybase_mock.Connection()
conn.create_table('wbgrp-journal-extract-test',
{'file': {}, 'grobid0': {}, 'f': {}})
- table = conn.table('wbgrp-journal-extract-test')
+ job.hb_table = conn.table('wbgrp-journal-extract-test')
- job = MRExtractCdxGrobid(['--no-conf', '-'], hb_table=table)
return job
+
@mock.patch('extraction_cdx_grobid.MRExtractCdxGrobid.fetch_warc_content', return_value=(FAKE_PDF_BYTES, None))
@responses.activate
def test_mapper_lines(mock_fetch, job):