diff options
Diffstat (limited to 'pig/tests/pighelper.py')
| -rw-r--r-- | pig/tests/pighelper.py | 69 | 
1 files changed, 69 insertions, 0 deletions
| diff --git a/pig/tests/pighelper.py b/pig/tests/pighelper.py new file mode 100644 index 0000000..cee074d --- /dev/null +++ b/pig/tests/pighelper.py @@ -0,0 +1,69 @@ +""" +A helper class for locally testing Pig scripts. + +author: Bryan Newbold <bnewbold@archive.org> +""" +import os +import tempfile +import unittest +import subprocess +from nose.tools import * + + +class PigTestHelper(unittest.TestCase): + +    @classmethod +    def setUpClass(cls): + +        cls._pigpath= "./deps/pig/bin/pig" +        cls._base = [cls._pigpath, +            '-x', 'local', +            '-log4jconf', 'pig_log4j.properties', +            '-stop_on_failure'] + +        # Check that pig is functioning +        if subprocess.call(cls._base + ['-version']) != 0: +            raise unittest.SkipTest("Failed to find and run Pig") + +        # Classpath? +        # os.path.join("pig-0.12.0-cdh5.0.1", "pig.jar"), +        # os.path.join("pig-0.12.0-cdh5.0.1", "lib", "*"), +        # "hadoop-2.3.0-cdh5.0.1" + +    def setUp(self): +        self._tmpdir = tempfile.mkdtemp() + +    def tearDown(self): +        os.rmdir(self._tmpdir) + +    def run_pig_raw(self, params): +        """Low-level variant with params appended directly. Returns +        CompletedProcess, raises an error if return value isn't succes""" + +        retval = subprocess.run(self._base + params, +            timeout=20.0, +            check=True) +        return retval + +    def run_pig(self, script_path, in_file, **kwargs): +        """Convenience helper around run_pig(). +         +        INPUT parameter is set to in_file. +        OUTPUT parameter is set to a random file. +        Any keyword args are passed as parameters. +        """ + +        pargs = [] +        for key, value in kwargs.items(): +            pargs.append('-p') +            pargs.append('{}={}'.format(key, value)) + +        out_file = tempfile.mktemp(dir=self._tmpdir) +        params = [ +            '-f', script_path, +            '-p', 'INPUT={}'.format(in_file), +            '-p', 'OUTPUT={}'.format(out_file), +            ] + pargs +        self.run_pig_raw(params) +        return out_file + | 
