diff options
author | Bryan Newbold <bnewbold@archive.org> | 2018-03-29 23:25:34 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2018-03-29 23:25:34 -0700 |
commit | b67d08495cf8caa3fa52cf8f8f80389250685218 (patch) | |
tree | 2221804285c124703d7d1a7df057eec347bea61b | |
parent | 2d85e1cce20bea15595a70f2d1fb303e95ca5d0f (diff) | |
download | sandcrawler-b67d08495cf8caa3fa52cf8f8f80389250685218.tar.gz sandcrawler-b67d08495cf8caa3fa52cf8f8f80389250685218.zip |
basically working pig test
-rwxr-xr-x | pig/fetch_deps.sh | 16 | ||||
-rw-r--r-- | pig/pig_log4j.properties | 7 | ||||
-rw-r--r-- | pig/tests/log4j.properties | 9 | ||||
-rw-r--r-- | pig/tests/pig.properties | 2 | ||||
-rw-r--r-- | pig/tests/pighelper.py | 21 |
5 files changed, 32 insertions, 23 deletions
diff --git a/pig/fetch_deps.sh b/pig/fetch_deps.sh index 529f8f7..7bebe06 100755 --- a/pig/fetch_deps.sh +++ b/pig/fetch_deps.sh @@ -2,18 +2,18 @@ set -euo pipefail -# If you change this, also update tests/pighelper.py -PIG_VERSION="0.12.0-cdh5.0.1" +#PIG_VERSION="0.12.0-cdh5.2.0" +# Using more recent version to work around snappy classpath problem +PIG_VERSION="0.17.0" +JAVA_HOME=$(readlink -f /usr/bin/java | sed "s:bin/java::") mkdir -p deps/ cd deps/ -wget -c https://archive.cloudera.com/cdh5/cdh/5/pig-${PIG_VERSION}.tar.gz + +# Fetch Pig +#wget -c https://archive.cloudera.com/cdh5/cdh/5/pig-${PIG_VERSION}.tar.gz +wget -c http://mirror.metrocast.net/apache/pig/pig-${PIG_VERSION}/pig-${PIG_VERSION}.tar.gz tar xvf pig-${PIG_VERSION}.tar.gz ln -fs pig-${PIG_VERSION} pig -cd pig -ln -fs pig-${PIG_VERSION}.jar pig.jar -cd .. - -JAVA_HOME=$(readlink -f /usr/bin/java | sed "s:bin/java::") ./pig/bin/pig -x local -version diff --git a/pig/pig_log4j.properties b/pig/pig_log4j.properties deleted file mode 100644 index a64a19b..0000000 --- a/pig/pig_log4j.properties +++ /dev/null @@ -1,7 +0,0 @@ -log4j.rootLogger=WARN, A1 -log4j.appender.A1=org.apache.log4j.ConsoleAppender -log4j.appender.A1.layout=org.apache.log4j.PatternLayout -log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n - -log4j.logger.org.apache.pig=WARN, A1 -log4j.logger.org.apache.hadoop = WARN, A1 diff --git a/pig/tests/log4j.properties b/pig/tests/log4j.properties new file mode 100644 index 0000000..e094722 --- /dev/null +++ b/pig/tests/log4j.properties @@ -0,0 +1,9 @@ +log4j.rootLogger=WARN, stdout +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.Target=System.out +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n + +# With these enabled, get "log4j:ERROR Attempted to append to closed appender named [stdout]" +#log4j.logger.org.apache.pig=WARN, stdout +#log4j.logger.org.apache.hadoop = WARN, stdout diff --git a/pig/tests/pig.properties b/pig/tests/pig.properties new file mode 100644 index 0000000..082b936 --- /dev/null +++ b/pig/tests/pig.properties @@ -0,0 +1,2 @@ +log4jconf=./tests/log4j.properties +stop.on.failure=true diff --git a/pig/tests/pighelper.py b/pig/tests/pighelper.py index cee074d..3786a07 100644 --- a/pig/tests/pighelper.py +++ b/pig/tests/pighelper.py @@ -1,9 +1,17 @@ """ A helper class for locally testing Pig scripts. +Include `PigTestHelper` and extend in your test classes, call `self.run_pig()` +with your script and example input file, then look at the output (at returned +path) to check for validity. + +TODO: squelch pig stdout going to console, presumably because of subprocess.run +behavior + author: Bryan Newbold <bnewbold@archive.org> """ import os +import shutil import tempfile import unittest import subprocess @@ -16,30 +24,26 @@ class PigTestHelper(unittest.TestCase): def setUpClass(cls): cls._pigpath= "./deps/pig/bin/pig" + cls._classpath = "./deps/hadoop/share/hadoop/common/lib" cls._base = [cls._pigpath, '-x', 'local', - '-log4jconf', 'pig_log4j.properties', - '-stop_on_failure'] + '-P', './tests/pig.properties'] # Check that pig is functioning if subprocess.call(cls._base + ['-version']) != 0: raise unittest.SkipTest("Failed to find and run Pig") - # Classpath? - # os.path.join("pig-0.12.0-cdh5.0.1", "pig.jar"), - # os.path.join("pig-0.12.0-cdh5.0.1", "lib", "*"), - # "hadoop-2.3.0-cdh5.0.1" - def setUp(self): self._tmpdir = tempfile.mkdtemp() def tearDown(self): - os.rmdir(self._tmpdir) + shutil.rmtree(self._tmpdir) def run_pig_raw(self, params): """Low-level variant with params appended directly. Returns CompletedProcess, raises an error if return value isn't succes""" + print("Running: {}".format(' '.join(self._base + params))) retval = subprocess.run(self._base + params, timeout=20.0, check=True) @@ -67,3 +71,4 @@ class PigTestHelper(unittest.TestCase): self.run_pig_raw(params) return out_file + # TODO: helper to verify that output matches an expected file |