diff options
author | Bryan Newbold <bnewbold@archive.org> | 2018-03-29 21:50:06 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2018-03-29 21:50:06 -0700 |
commit | 67e0a765749a4754ed353fe30c8e771d136322a4 (patch) | |
tree | 1e8d656ecc6f4830e5a3e787ba099f871a8137fa /pig/tests | |
parent | cb1582c44a000983a2150679c51b1baf22c09778 (diff) | |
download | sandcrawler-67e0a765749a4754ed353fe30c8e771d136322a4.tar.gz sandcrawler-67e0a765749a4754ed353fe30c8e771d136322a4.zip |
import WIP on pig test setup
Diffstat (limited to 'pig/tests')
-rw-r--r-- | pig/tests/test_filter_cdx.py | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/pig/tests/test_filter_cdx.py b/pig/tests/test_filter_cdx.py new file mode 100644 index 0000000..83f88bb --- /dev/null +++ b/pig/tests/test_filter_cdx.py @@ -0,0 +1,35 @@ + +""" +Abstract into a base test class/template: + +1. Needs deps downloaded and installed and env configured (bash? .env? makefile?) +2. In test, create tempdir for output. Print helpful info on every run +3. Run pig locally, inspect output files +""" + +import os +import unittest +from nose.tools import * +from pigpy.hadoop import Hadoop + + +class TestFilterCDX(unittest.TestCase): + + def setUp(self): + + classpaths = [ + os.path.join("pig-0.12.0-cdh5.0.1", "pig.jar"), + os.path.join("pig-0.12.0-cdh5.0.1", "lib", "*"), + ] + + local_home = os.path.join("hadoop-2.3.0-cdh5.0.1") + + name_node = "file:///test/files" + + self.hadoop = Hadoop(local_home, name_node, classpaths) + + def test_thing(self): + + self.hadoop.run_pig_job("filter-cdx-ps.pig") + self.hadoop.copyToLocal("/reports/output.csv", "output.csv") + |