aboutsummaryrefslogtreecommitdiffstats
path: root/fetch_hadoop.sh
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-08-24 12:19:09 -0700
committerBryan Newbold <bnewbold@archive.org>2018-08-24 12:19:09 -0700
commitf50d4e081f7994a167c4974ee9d3f6e1f8eae478 (patch)
tree00cf69ffe345f7766e7477cb9b1f5f7448b4e4fb /fetch_hadoop.sh
parent344531eb6a5cdd4ea15e4d82050368c5af0eafee (diff)
parent5340caad7b39ad29bba77d2a3e486db7a6b1977b (diff)
downloadsandcrawler-f50d4e081f7994a167c4974ee9d3f6e1f8eae478.tar.gz
sandcrawler-f50d4e081f7994a167c4974ee9d3f6e1f8eae478.zip
Merge branch 'bnewbold-match-quality'
Manually resolved merge conflict in: please
Diffstat (limited to 'fetch_hadoop.sh')
-rwxr-xr-xfetch_hadoop.sh38
1 files changed, 38 insertions, 0 deletions
diff --git a/fetch_hadoop.sh b/fetch_hadoop.sh
new file mode 100755
index 0000000..633f8fa
--- /dev/null
+++ b/fetch_hadoop.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+
+# This script was originally only for pig scripts; now it can also be used to
+# run scalding code locally (via please)
+
+set -euo pipefail
+
+#PIG_VERSION="0.12.0-cdh5.2.0"
+# Using more recent version to work around snappy classpath problem
+PIG_VERSION="0.17.0"
+HADOOP_VERSION="2.3.0-cdh5.0.1"
+
+mkdir -p pig/deps/
+cd pig/deps/
+
+# Fetch Hadoop Command
+echo https://archive.cloudera.com/cdh5/cdh/5/hadoop-${HADOOP_VERSION}.tar.gz
+#wget -c https://archive.cloudera.com/cdh5/cdh/5/pig-${HADOOP_VERSION}.tar.gz
+#wget -c https://archive.cloudera.com/cdh5/cdh/5/pig-${HADOOP_VERSION}.tar.gz
+wget -c https://archive.org/serve/hadoop_pig_mirror/hadoop-${HADOOP_VERSION}.tar.gz
+echo "Extracting Hadoop (takes a minute)..."
+tar xvf hadoop-${HADOOP_VERSION}.tar.gz > /dev/null
+ln -fs hadoop-${HADOOP_VERSION} hadoop
+
+# Fetch Pig
+#wget -c https://archive.cloudera.com/cdh5/cdh/5/pig-${PIG_VERSION}.tar.gz
+#wget -c http://mirror.metrocast.net/apache/pig/pig-${PIG_VERSION}/pig-${PIG_VERSION}.tar.gz
+wget -c https://archive.org/serve/hadoop_pig_mirror/pig-${PIG_VERSION}.tar.gz
+echo "Extracting Pig (takes a minute)..."
+tar xvf pig-${PIG_VERSION}.tar.gz > /dev/null
+ln -fs pig-${PIG_VERSION} pig
+
+# No 'readlink -f' on macOS
+# https://stackoverflow.com/a/24572274/4682349
+JAVA_HOME=$(perl -MCwd -e 'print Cwd::abs_path shift' /usr/bin/java | sed "s:bin/java::")
+./pig/bin/pig -x local -version
+./hadoop/bin/hadoop version
+