diff options
author | Bryan Newbold <bnewbold@archive.org> | 2018-03-30 12:49:49 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2018-03-30 12:49:49 -0700 |
commit | 719538b753423ac301e686886282695150192e47 (patch) | |
tree | 9a3e7447bcc34ee92cecde1430ad2648db73483c /pig | |
parent | 1ee37422c649aeb11a3f0bbe873b9c479992a5aa (diff) | |
download | sandcrawler-719538b753423ac301e686886282695150192e47.tar.gz sandcrawler-719538b753423ac301e686886282695150192e47.zip |
clean up pig test stuff
Diffstat (limited to 'pig')
-rw-r--r-- | pig/Pipfile | 3 | ||||
-rw-r--r-- | pig/Pipfile.lock | 54 | ||||
-rw-r--r-- | pig/README.md | 35 | ||||
-rw-r--r-- | pig/pytest.ini | 3 | ||||
-rw-r--r-- | pig/tests/pighelper.py | 5 | ||||
-rw-r--r-- | pig/tests/test_filter_cdx.py | 33 |
6 files changed, 71 insertions, 62 deletions
diff --git a/pig/Pipfile b/pig/Pipfile index dbdef21..a1f2c0c 100644 --- a/pig/Pipfile +++ b/pig/Pipfile @@ -11,8 +11,7 @@ name = "pypi" [packages] -pigpy = "*" -nose = "*" +pytest = "*" [requires] diff --git a/pig/Pipfile.lock b/pig/Pipfile.lock index 3ac834f..c1416ae 100644 --- a/pig/Pipfile.lock +++ b/pig/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "6591f4ebe3f7ad124ef9bae389cd9c11b2fe1d6936bf65c4cec64cb016ef0661" + "sha256": "159fc52f40ac4dd9b9cf92cb4f2cd7c0e5b9cc5fb667aabf4c69cb43cf147671" }, "host-environment-markers": { "implementation_name": "cpython", @@ -29,19 +29,55 @@ ] }, "default": { - "nose": { + "attrs": { "hashes": [ - "sha256:dadcddc0aefbf99eea214e0f1232b94f2fa9bd98fa8353711dacb112bfcbbb2a", - "sha256:9ff7c6cc443f8c51994b34a667bbcf45afd6d945be7477b52e97516fd17c53ac", - "sha256:f1bffef9cbc82628f6e7d7b40d7e255aefaa1adb6a1b1d26c69a8b79e6208a98" + "sha256:a17a9573a6f475c99b551c0e0a812707ddda1ec9653bed04c13841404ed6f450", + "sha256:1c7960ccfd6a005cd9f7ba884e6316b5e430a3f1a6c37c5f87d8b43f83b54ec9" ], - "version": "==1.3.7" + "version": "==17.4.0" }, - "pigpy": { + "funcsigs": { "hashes": [ - "sha256:89f91f07b95a2f84dda28159f8479209d50498d3aef7ff96f653345cbec09c96" + "sha256:330cc27ccbf7f1e992e69fef78261dc7c6569012cf397db8d3de0234e6c937ca", + "sha256:a7bb0f2cf3a3fd1ab2732cb49eba4252c2af4240442415b4abce3b87022a8f50" ], - "version": "==0.7" + "markers": "python_version < '3.0'", + "version": "==1.0.2" + }, + "more-itertools": { + "hashes": [ + "sha256:11a625025954c20145b37ff6309cd54e39ca94f72f6bb9576d1195db6fa2442e", + "sha256:0dd8f72eeab0d2c3bd489025bb2f6a1b8342f9b198f6fc37b52d15cfa4531fea", + "sha256:c9ce7eccdcb901a2c75d326ea134e0886abfbea5f93e91cc95de9507c0816c44" + ], + "version": "==4.1.0" + }, + "pluggy": { + "hashes": [ + "sha256:7f8ae7f5bdf75671a718d2daf0a64b7885f74510bcd98b1a0bb420eb9a9d0cff" + ], + "version": "==0.6.0" + }, + "py": { + "hashes": [ + "sha256:983f77f3331356039fdd792e9220b7b8ee1aa6bd2b25f567a963ff1de5a64f6a", + "sha256:29c9fab495d7528e80ba1e343b958684f4ace687327e6f789a94bf3d1915f881" + ], + "version": "==1.5.3" + }, + "pytest": { + "hashes": [ + "sha256:6266f87ab64692112e5477eba395cfedda53b1933ccd29478e671e73b420c19c", + "sha256:fae491d1874f199537fd5872b5e1f0e74a009b979df9d53d1553fd03da1703e1" + ], + "version": "==3.5.0" + }, + "six": { + "hashes": [ + "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb", + "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9" + ], + "version": "==1.11.0" } }, "develop": {} diff --git a/pig/README.md b/pig/README.md index c518591..86d487a 100644 --- a/pig/README.md +++ b/pig/README.md @@ -2,27 +2,32 @@ As of March 2018, the archive runs Pig version 0.12.0, via CDH5.0.1 (Cloudera Distribution). +"Local mode" unit tests in this folder run with Pig version 0.17.0 (controlled +by `fetch_deps.sh`) due to [dependency/jar issues][pig-bug] in local mode of +0.12.0. + +[pig-bug]: https://issues.apache.org/jira/browse/PIG-3530 + ## Development and Testing -To run pig in development on your laptop, you can either use docker or +Fetch dependencies (pig): + + ./fetch_deps.sh + +Write .pig scripts here, and add a pytho wrapper test to `./tests/` when done. +Test vector files (input/output) can go in `./tests/files/`. -https://hub.docker.com/r/chalimartines/local-pig +Install pipenv system-wide if you don't have it: - wget https://archive.cloudera.com/cdh5/cdh/5/pig-0.12.0-cdh5.0.1.tar.gz - tar xvf pig-*.tar.gz - ln -s pig-0.12.0-cdh5.0.1/pig-0.12.0-cdh5.0.1.jar pig-0.12.0-cdh5.0.1/pig.jar - ./pig-*/bin/pig -x local -version + # or use apt, homebrew, etc + sudo pip3 install pipenv - #XXX: don't need Hadoop? - #wget https://archive.cloudera.com/cdh5/cdh/5/hadoop-2.3.0-cdh5.0.1.tar.gz - #tar xvf hadoop-*.tar.gz - #export HADOOP_HOME=hadoop-2.3* +Run the tests with: -Tests require python3, nosetests3, and pigpy. You can install these with: + pipenv run pytest - pip install pipenv - pipenv install --three +Could also, in theory, use a docker image ([local-pig][]), but it's pretty easy +to just download. -Then: +[local-pig]: https://hub.docker.com/r/chalimartines/local-pig - pipenv run nosetests3 diff --git a/pig/pytest.ini b/pig/pytest.ini new file mode 100644 index 0000000..e79cf59 --- /dev/null +++ b/pig/pytest.ini @@ -0,0 +1,3 @@ + +[pytest] +norecursedirs = deps diff --git a/pig/tests/pighelper.py b/pig/tests/pighelper.py index 3786a07..d0d89d2 100644 --- a/pig/tests/pighelper.py +++ b/pig/tests/pighelper.py @@ -5,17 +5,16 @@ Include `PigTestHelper` and extend in your test classes, call `self.run_pig()` with your script and example input file, then look at the output (at returned path) to check for validity. -TODO: squelch pig stdout going to console, presumably because of subprocess.run -behavior +TODO: switch to pytest-style fixture generation author: Bryan Newbold <bnewbold@archive.org> """ + import os import shutil import tempfile import unittest import subprocess -from nose.tools import * class PigTestHelper(unittest.TestCase): diff --git a/pig/tests/test_filter_cdx.py b/pig/tests/test_filter_cdx.py index f46e5e1..897f1f9 100644 --- a/pig/tests/test_filter_cdx.py +++ b/pig/tests/test_filter_cdx.py @@ -1,42 +1,9 @@ -""" -Abstract into a base test class/template: - -1. Needs deps downloaded and installed and env configured (bash? .env? makefile?) -2. In test, create tempdir for output. Print helpful info on every run -3. Run pig locally, inspect output files -""" - import os import unittest -from nose.tools import * from pighelper import PigTestHelper -""" -class TestFilterCDX(unittest.TestCase): - - def setUp(self): - - classpaths = [ - os.path.join("pig-0.12.0-cdh5.0.1", "pig.jar"), - os.path.join("pig-0.12.0-cdh5.0.1", "lib", "*"), - ] - - local_home = os.path.join("hadoop-2.3.0-cdh5.0.1") - - name_node = "file:///test/files" - - self.hadoop = Hadoop(local_home, name_node, classpaths) - - def test_thing(self): - - self.hadoop.run_pig_job("filter-cdx-ps.pig") - self.hadoop.copyToLocal("/reports/output.csv", "output.csv") - -""" - class TestFilterCDX(PigTestHelper): def test_thing(self): - self.run_pig("filter-cdx-ps.pig", "tests/files/example.cdx") |