aboutsummaryrefslogtreecommitdiffstats
path: root/pig
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-03-30 12:49:49 -0700
committerBryan Newbold <bnewbold@archive.org>2018-03-30 12:49:49 -0700
commit719538b753423ac301e686886282695150192e47 (patch)
tree9a3e7447bcc34ee92cecde1430ad2648db73483c /pig
parent1ee37422c649aeb11a3f0bbe873b9c479992a5aa (diff)
downloadsandcrawler-719538b753423ac301e686886282695150192e47.tar.gz
sandcrawler-719538b753423ac301e686886282695150192e47.zip
clean up pig test stuff
Diffstat (limited to 'pig')
-rw-r--r--pig/Pipfile3
-rw-r--r--pig/Pipfile.lock54
-rw-r--r--pig/README.md35
-rw-r--r--pig/pytest.ini3
-rw-r--r--pig/tests/pighelper.py5
-rw-r--r--pig/tests/test_filter_cdx.py33
6 files changed, 71 insertions, 62 deletions
diff --git a/pig/Pipfile b/pig/Pipfile
index dbdef21..a1f2c0c 100644
--- a/pig/Pipfile
+++ b/pig/Pipfile
@@ -11,8 +11,7 @@ name = "pypi"
[packages]
-pigpy = "*"
-nose = "*"
+pytest = "*"
[requires]
diff --git a/pig/Pipfile.lock b/pig/Pipfile.lock
index 3ac834f..c1416ae 100644
--- a/pig/Pipfile.lock
+++ b/pig/Pipfile.lock
@@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
- "sha256": "6591f4ebe3f7ad124ef9bae389cd9c11b2fe1d6936bf65c4cec64cb016ef0661"
+ "sha256": "159fc52f40ac4dd9b9cf92cb4f2cd7c0e5b9cc5fb667aabf4c69cb43cf147671"
},
"host-environment-markers": {
"implementation_name": "cpython",
@@ -29,19 +29,55 @@
]
},
"default": {
- "nose": {
+ "attrs": {
"hashes": [
- "sha256:dadcddc0aefbf99eea214e0f1232b94f2fa9bd98fa8353711dacb112bfcbbb2a",
- "sha256:9ff7c6cc443f8c51994b34a667bbcf45afd6d945be7477b52e97516fd17c53ac",
- "sha256:f1bffef9cbc82628f6e7d7b40d7e255aefaa1adb6a1b1d26c69a8b79e6208a98"
+ "sha256:a17a9573a6f475c99b551c0e0a812707ddda1ec9653bed04c13841404ed6f450",
+ "sha256:1c7960ccfd6a005cd9f7ba884e6316b5e430a3f1a6c37c5f87d8b43f83b54ec9"
],
- "version": "==1.3.7"
+ "version": "==17.4.0"
},
- "pigpy": {
+ "funcsigs": {
"hashes": [
- "sha256:89f91f07b95a2f84dda28159f8479209d50498d3aef7ff96f653345cbec09c96"
+ "sha256:330cc27ccbf7f1e992e69fef78261dc7c6569012cf397db8d3de0234e6c937ca",
+ "sha256:a7bb0f2cf3a3fd1ab2732cb49eba4252c2af4240442415b4abce3b87022a8f50"
],
- "version": "==0.7"
+ "markers": "python_version < '3.0'",
+ "version": "==1.0.2"
+ },
+ "more-itertools": {
+ "hashes": [
+ "sha256:11a625025954c20145b37ff6309cd54e39ca94f72f6bb9576d1195db6fa2442e",
+ "sha256:0dd8f72eeab0d2c3bd489025bb2f6a1b8342f9b198f6fc37b52d15cfa4531fea",
+ "sha256:c9ce7eccdcb901a2c75d326ea134e0886abfbea5f93e91cc95de9507c0816c44"
+ ],
+ "version": "==4.1.0"
+ },
+ "pluggy": {
+ "hashes": [
+ "sha256:7f8ae7f5bdf75671a718d2daf0a64b7885f74510bcd98b1a0bb420eb9a9d0cff"
+ ],
+ "version": "==0.6.0"
+ },
+ "py": {
+ "hashes": [
+ "sha256:983f77f3331356039fdd792e9220b7b8ee1aa6bd2b25f567a963ff1de5a64f6a",
+ "sha256:29c9fab495d7528e80ba1e343b958684f4ace687327e6f789a94bf3d1915f881"
+ ],
+ "version": "==1.5.3"
+ },
+ "pytest": {
+ "hashes": [
+ "sha256:6266f87ab64692112e5477eba395cfedda53b1933ccd29478e671e73b420c19c",
+ "sha256:fae491d1874f199537fd5872b5e1f0e74a009b979df9d53d1553fd03da1703e1"
+ ],
+ "version": "==3.5.0"
+ },
+ "six": {
+ "hashes": [
+ "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb",
+ "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9"
+ ],
+ "version": "==1.11.0"
}
},
"develop": {}
diff --git a/pig/README.md b/pig/README.md
index c518591..86d487a 100644
--- a/pig/README.md
+++ b/pig/README.md
@@ -2,27 +2,32 @@
As of March 2018, the archive runs Pig version 0.12.0, via CDH5.0.1 (Cloudera
Distribution).
+"Local mode" unit tests in this folder run with Pig version 0.17.0 (controlled
+by `fetch_deps.sh`) due to [dependency/jar issues][pig-bug] in local mode of
+0.12.0.
+
+[pig-bug]: https://issues.apache.org/jira/browse/PIG-3530
+
## Development and Testing
-To run pig in development on your laptop, you can either use docker or
+Fetch dependencies (pig):
+
+ ./fetch_deps.sh
+
+Write .pig scripts here, and add a pytho wrapper test to `./tests/` when done.
+Test vector files (input/output) can go in `./tests/files/`.
-https://hub.docker.com/r/chalimartines/local-pig
+Install pipenv system-wide if you don't have it:
- wget https://archive.cloudera.com/cdh5/cdh/5/pig-0.12.0-cdh5.0.1.tar.gz
- tar xvf pig-*.tar.gz
- ln -s pig-0.12.0-cdh5.0.1/pig-0.12.0-cdh5.0.1.jar pig-0.12.0-cdh5.0.1/pig.jar
- ./pig-*/bin/pig -x local -version
+ # or use apt, homebrew, etc
+ sudo pip3 install pipenv
- #XXX: don't need Hadoop?
- #wget https://archive.cloudera.com/cdh5/cdh/5/hadoop-2.3.0-cdh5.0.1.tar.gz
- #tar xvf hadoop-*.tar.gz
- #export HADOOP_HOME=hadoop-2.3*
+Run the tests with:
-Tests require python3, nosetests3, and pigpy. You can install these with:
+ pipenv run pytest
- pip install pipenv
- pipenv install --three
+Could also, in theory, use a docker image ([local-pig][]), but it's pretty easy
+to just download.
-Then:
+[local-pig]: https://hub.docker.com/r/chalimartines/local-pig
- pipenv run nosetests3
diff --git a/pig/pytest.ini b/pig/pytest.ini
new file mode 100644
index 0000000..e79cf59
--- /dev/null
+++ b/pig/pytest.ini
@@ -0,0 +1,3 @@
+
+[pytest]
+norecursedirs = deps
diff --git a/pig/tests/pighelper.py b/pig/tests/pighelper.py
index 3786a07..d0d89d2 100644
--- a/pig/tests/pighelper.py
+++ b/pig/tests/pighelper.py
@@ -5,17 +5,16 @@ Include `PigTestHelper` and extend in your test classes, call `self.run_pig()`
with your script and example input file, then look at the output (at returned
path) to check for validity.
-TODO: squelch pig stdout going to console, presumably because of subprocess.run
-behavior
+TODO: switch to pytest-style fixture generation
author: Bryan Newbold <bnewbold@archive.org>
"""
+
import os
import shutil
import tempfile
import unittest
import subprocess
-from nose.tools import *
class PigTestHelper(unittest.TestCase):
diff --git a/pig/tests/test_filter_cdx.py b/pig/tests/test_filter_cdx.py
index f46e5e1..897f1f9 100644
--- a/pig/tests/test_filter_cdx.py
+++ b/pig/tests/test_filter_cdx.py
@@ -1,42 +1,9 @@
-"""
-Abstract into a base test class/template:
-
-1. Needs deps downloaded and installed and env configured (bash? .env? makefile?)
-2. In test, create tempdir for output. Print helpful info on every run
-3. Run pig locally, inspect output files
-"""
-
import os
import unittest
-from nose.tools import *
from pighelper import PigTestHelper
-"""
-class TestFilterCDX(unittest.TestCase):
-
- def setUp(self):
-
- classpaths = [
- os.path.join("pig-0.12.0-cdh5.0.1", "pig.jar"),
- os.path.join("pig-0.12.0-cdh5.0.1", "lib", "*"),
- ]
-
- local_home = os.path.join("hadoop-2.3.0-cdh5.0.1")
-
- name_node = "file:///test/files"
-
- self.hadoop = Hadoop(local_home, name_node, classpaths)
-
- def test_thing(self):
-
- self.hadoop.run_pig_job("filter-cdx-ps.pig")
- self.hadoop.copyToLocal("/reports/output.csv", "output.csv")
-
-"""
-
class TestFilterCDX(PigTestHelper):
def test_thing(self):
-
self.run_pig("filter-cdx-ps.pig", "tests/files/example.cdx")