aboutsummaryrefslogtreecommitdiffstats
path: root/pig/tests
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-03-29 21:50:06 -0700
committerBryan Newbold <bnewbold@archive.org>2018-03-29 21:50:06 -0700
commit67e0a765749a4754ed353fe30c8e771d136322a4 (patch)
tree1e8d656ecc6f4830e5a3e787ba099f871a8137fa /pig/tests
parentcb1582c44a000983a2150679c51b1baf22c09778 (diff)
downloadsandcrawler-67e0a765749a4754ed353fe30c8e771d136322a4.tar.gz
sandcrawler-67e0a765749a4754ed353fe30c8e771d136322a4.zip
import WIP on pig test setup
Diffstat (limited to 'pig/tests')
-rw-r--r--pig/tests/test_filter_cdx.py35
1 files changed, 35 insertions, 0 deletions
diff --git a/pig/tests/test_filter_cdx.py b/pig/tests/test_filter_cdx.py
new file mode 100644
index 0000000..83f88bb
--- /dev/null
+++ b/pig/tests/test_filter_cdx.py
@@ -0,0 +1,35 @@
+
+"""
+Abstract into a base test class/template:
+
+1. Needs deps downloaded and installed and env configured (bash? .env? makefile?)
+2. In test, create tempdir for output. Print helpful info on every run
+3. Run pig locally, inspect output files
+"""
+
+import os
+import unittest
+from nose.tools import *
+from pigpy.hadoop import Hadoop
+
+
+class TestFilterCDX(unittest.TestCase):
+
+ def setUp(self):
+
+ classpaths = [
+ os.path.join("pig-0.12.0-cdh5.0.1", "pig.jar"),
+ os.path.join("pig-0.12.0-cdh5.0.1", "lib", "*"),
+ ]
+
+ local_home = os.path.join("hadoop-2.3.0-cdh5.0.1")
+
+ name_node = "file:///test/files"
+
+ self.hadoop = Hadoop(local_home, name_node, classpaths)
+
+ def test_thing(self):
+
+ self.hadoop.run_pig_job("filter-cdx-ps.pig")
+ self.hadoop.copyToLocal("/reports/output.csv", "output.csv")
+