diff options
| -rwxr-xr-x | pig/fetch_deps.sh | 16 | ||||
| -rw-r--r-- | pig/pig_log4j.properties | 7 | ||||
| -rw-r--r-- | pig/tests/log4j.properties | 9 | ||||
| -rw-r--r-- | pig/tests/pig.properties | 2 | ||||
| -rw-r--r-- | pig/tests/pighelper.py | 21 | 
5 files changed, 32 insertions, 23 deletions
diff --git a/pig/fetch_deps.sh b/pig/fetch_deps.sh index 529f8f7..7bebe06 100755 --- a/pig/fetch_deps.sh +++ b/pig/fetch_deps.sh @@ -2,18 +2,18 @@  set -euo pipefail -# If you change this, also update tests/pighelper.py -PIG_VERSION="0.12.0-cdh5.0.1" +#PIG_VERSION="0.12.0-cdh5.2.0" +# Using more recent version to work around snappy classpath problem +PIG_VERSION="0.17.0" +JAVA_HOME=$(readlink -f /usr/bin/java | sed "s:bin/java::")  mkdir -p deps/  cd deps/ -wget -c https://archive.cloudera.com/cdh5/cdh/5/pig-${PIG_VERSION}.tar.gz + +# Fetch Pig +#wget -c https://archive.cloudera.com/cdh5/cdh/5/pig-${PIG_VERSION}.tar.gz +wget -c http://mirror.metrocast.net/apache/pig/pig-${PIG_VERSION}/pig-${PIG_VERSION}.tar.gz  tar xvf pig-${PIG_VERSION}.tar.gz  ln -fs pig-${PIG_VERSION} pig -cd pig -ln -fs pig-${PIG_VERSION}.jar pig.jar -cd .. - -JAVA_HOME=$(readlink -f /usr/bin/java | sed "s:bin/java::")  ./pig/bin/pig -x local -version diff --git a/pig/pig_log4j.properties b/pig/pig_log4j.properties deleted file mode 100644 index a64a19b..0000000 --- a/pig/pig_log4j.properties +++ /dev/null @@ -1,7 +0,0 @@ -log4j.rootLogger=WARN, A1 -log4j.appender.A1=org.apache.log4j.ConsoleAppender -log4j.appender.A1.layout=org.apache.log4j.PatternLayout -log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n - -log4j.logger.org.apache.pig=WARN, A1 -log4j.logger.org.apache.hadoop = WARN, A1 diff --git a/pig/tests/log4j.properties b/pig/tests/log4j.properties new file mode 100644 index 0000000..e094722 --- /dev/null +++ b/pig/tests/log4j.properties @@ -0,0 +1,9 @@ +log4j.rootLogger=WARN, stdout +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.Target=System.out +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n + +# With these enabled, get "log4j:ERROR Attempted to append to closed appender named [stdout]" +#log4j.logger.org.apache.pig=WARN, stdout +#log4j.logger.org.apache.hadoop = WARN, stdout diff --git a/pig/tests/pig.properties b/pig/tests/pig.properties new file mode 100644 index 0000000..082b936 --- /dev/null +++ b/pig/tests/pig.properties @@ -0,0 +1,2 @@ +log4jconf=./tests/log4j.properties +stop.on.failure=true diff --git a/pig/tests/pighelper.py b/pig/tests/pighelper.py index cee074d..3786a07 100644 --- a/pig/tests/pighelper.py +++ b/pig/tests/pighelper.py @@ -1,9 +1,17 @@  """  A helper class for locally testing Pig scripts. +Include `PigTestHelper` and extend in your test classes, call `self.run_pig()` +with your script and example input file, then look at the output (at returned +path) to check for validity. + +TODO: squelch pig stdout going to console, presumably because of subprocess.run +behavior +  author: Bryan Newbold <bnewbold@archive.org>  """  import os +import shutil  import tempfile  import unittest  import subprocess @@ -16,30 +24,26 @@ class PigTestHelper(unittest.TestCase):      def setUpClass(cls):          cls._pigpath= "./deps/pig/bin/pig" +        cls._classpath = "./deps/hadoop/share/hadoop/common/lib"          cls._base = [cls._pigpath,              '-x', 'local', -            '-log4jconf', 'pig_log4j.properties', -            '-stop_on_failure'] +            '-P', './tests/pig.properties']          # Check that pig is functioning          if subprocess.call(cls._base + ['-version']) != 0:              raise unittest.SkipTest("Failed to find and run Pig") -        # Classpath? -        # os.path.join("pig-0.12.0-cdh5.0.1", "pig.jar"), -        # os.path.join("pig-0.12.0-cdh5.0.1", "lib", "*"), -        # "hadoop-2.3.0-cdh5.0.1" -      def setUp(self):          self._tmpdir = tempfile.mkdtemp()      def tearDown(self): -        os.rmdir(self._tmpdir) +        shutil.rmtree(self._tmpdir)      def run_pig_raw(self, params):          """Low-level variant with params appended directly. Returns          CompletedProcess, raises an error if return value isn't succes""" +        print("Running: {}".format(' '.join(self._base + params)))          retval = subprocess.run(self._base + params,              timeout=20.0,              check=True) @@ -67,3 +71,4 @@ class PigTestHelper(unittest.TestCase):          self.run_pig_raw(params)          return out_file +    # TODO: helper to verify that output matches an expected file  | 
