aboutsummaryrefslogtreecommitdiffstats
path: root/pig
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-03-29 23:25:34 -0700
committerBryan Newbold <bnewbold@archive.org>2018-03-29 23:25:34 -0700
commitb67d08495cf8caa3fa52cf8f8f80389250685218 (patch)
tree2221804285c124703d7d1a7df057eec347bea61b /pig
parent2d85e1cce20bea15595a70f2d1fb303e95ca5d0f (diff)
downloadsandcrawler-b67d08495cf8caa3fa52cf8f8f80389250685218.tar.gz
sandcrawler-b67d08495cf8caa3fa52cf8f8f80389250685218.zip
basically working pig test
Diffstat (limited to 'pig')
-rwxr-xr-xpig/fetch_deps.sh16
-rw-r--r--pig/pig_log4j.properties7
-rw-r--r--pig/tests/log4j.properties9
-rw-r--r--pig/tests/pig.properties2
-rw-r--r--pig/tests/pighelper.py21
5 files changed, 32 insertions, 23 deletions
diff --git a/pig/fetch_deps.sh b/pig/fetch_deps.sh
index 529f8f7..7bebe06 100755
--- a/pig/fetch_deps.sh
+++ b/pig/fetch_deps.sh
@@ -2,18 +2,18 @@
set -euo pipefail
-# If you change this, also update tests/pighelper.py
-PIG_VERSION="0.12.0-cdh5.0.1"
+#PIG_VERSION="0.12.0-cdh5.2.0"
+# Using more recent version to work around snappy classpath problem
+PIG_VERSION="0.17.0"
+JAVA_HOME=$(readlink -f /usr/bin/java | sed "s:bin/java::")
mkdir -p deps/
cd deps/
-wget -c https://archive.cloudera.com/cdh5/cdh/5/pig-${PIG_VERSION}.tar.gz
+
+# Fetch Pig
+#wget -c https://archive.cloudera.com/cdh5/cdh/5/pig-${PIG_VERSION}.tar.gz
+wget -c http://mirror.metrocast.net/apache/pig/pig-${PIG_VERSION}/pig-${PIG_VERSION}.tar.gz
tar xvf pig-${PIG_VERSION}.tar.gz
ln -fs pig-${PIG_VERSION} pig
-cd pig
-ln -fs pig-${PIG_VERSION}.jar pig.jar
-cd ..
-
-JAVA_HOME=$(readlink -f /usr/bin/java | sed "s:bin/java::")
./pig/bin/pig -x local -version
diff --git a/pig/pig_log4j.properties b/pig/pig_log4j.properties
deleted file mode 100644
index a64a19b..0000000
--- a/pig/pig_log4j.properties
+++ /dev/null
@@ -1,7 +0,0 @@
-log4j.rootLogger=WARN, A1
-log4j.appender.A1=org.apache.log4j.ConsoleAppender
-log4j.appender.A1.layout=org.apache.log4j.PatternLayout
-log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
-
-log4j.logger.org.apache.pig=WARN, A1
-log4j.logger.org.apache.hadoop = WARN, A1
diff --git a/pig/tests/log4j.properties b/pig/tests/log4j.properties
new file mode 100644
index 0000000..e094722
--- /dev/null
+++ b/pig/tests/log4j.properties
@@ -0,0 +1,9 @@
+log4j.rootLogger=WARN, stdout
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.Target=System.out
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
+
+# With these enabled, get "log4j:ERROR Attempted to append to closed appender named [stdout]"
+#log4j.logger.org.apache.pig=WARN, stdout
+#log4j.logger.org.apache.hadoop = WARN, stdout
diff --git a/pig/tests/pig.properties b/pig/tests/pig.properties
new file mode 100644
index 0000000..082b936
--- /dev/null
+++ b/pig/tests/pig.properties
@@ -0,0 +1,2 @@
+log4jconf=./tests/log4j.properties
+stop.on.failure=true
diff --git a/pig/tests/pighelper.py b/pig/tests/pighelper.py
index cee074d..3786a07 100644
--- a/pig/tests/pighelper.py
+++ b/pig/tests/pighelper.py
@@ -1,9 +1,17 @@
"""
A helper class for locally testing Pig scripts.
+Include `PigTestHelper` and extend in your test classes, call `self.run_pig()`
+with your script and example input file, then look at the output (at returned
+path) to check for validity.
+
+TODO: squelch pig stdout going to console, presumably because of subprocess.run
+behavior
+
author: Bryan Newbold <bnewbold@archive.org>
"""
import os
+import shutil
import tempfile
import unittest
import subprocess
@@ -16,30 +24,26 @@ class PigTestHelper(unittest.TestCase):
def setUpClass(cls):
cls._pigpath= "./deps/pig/bin/pig"
+ cls._classpath = "./deps/hadoop/share/hadoop/common/lib"
cls._base = [cls._pigpath,
'-x', 'local',
- '-log4jconf', 'pig_log4j.properties',
- '-stop_on_failure']
+ '-P', './tests/pig.properties']
# Check that pig is functioning
if subprocess.call(cls._base + ['-version']) != 0:
raise unittest.SkipTest("Failed to find and run Pig")
- # Classpath?
- # os.path.join("pig-0.12.0-cdh5.0.1", "pig.jar"),
- # os.path.join("pig-0.12.0-cdh5.0.1", "lib", "*"),
- # "hadoop-2.3.0-cdh5.0.1"
-
def setUp(self):
self._tmpdir = tempfile.mkdtemp()
def tearDown(self):
- os.rmdir(self._tmpdir)
+ shutil.rmtree(self._tmpdir)
def run_pig_raw(self, params):
"""Low-level variant with params appended directly. Returns
CompletedProcess, raises an error if return value isn't succes"""
+ print("Running: {}".format(' '.join(self._base + params)))
retval = subprocess.run(self._base + params,
timeout=20.0,
check=True)
@@ -67,3 +71,4 @@ class PigTestHelper(unittest.TestCase):
self.run_pig_raw(params)
return out_file
+ # TODO: helper to verify that output matches an expected file