aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbnewbold <bnewbold@archive.org>2018-06-04 22:04:44 +0000
committerbnewbold <bnewbold@archive.org>2018-06-04 22:04:44 +0000
commit70069a78c8798352b5bef815a3fd4aa9e9b52394 (patch)
tree6e67fe63f6e2051ce7a50184b90637065da8e1a8
parentd2dd016aa8da93ad14654237dbb7cfac214f9da8 (diff)
parentd434f5676a5d37692fe00fb14d72120d0c9933f8 (diff)
downloadsandcrawler-70069a78c8798352b5bef815a3fd4aa9e9b52394.tar.gz
sandcrawler-70069a78c8798352b5bef815a3fd4aa9e9b52394.zip
Merge branch 'bnewbold-scala-build-fixes' into 'master'
scala build fixes See merge request webgroup/sandcrawler!2
-rw-r--r--.gitlab-ci.yml14
-rw-r--r--scalding/README.md25
-rw-r--r--scalding/build.sbt1
3 files changed, 19 insertions, 21 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 12146b4..3970bbb 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,17 +1,27 @@
image: python:3.6-stretch
before_script:
- apt update -qy
- - apt install -y python3-dev python3-pip python3-wheel libjpeg-dev openjdk-8-jre-headless
+ - apt install -y apt-transport-https
+ - echo "deb https://dl.bintray.com/sbt/debian /" | tee -a /etc/apt/sources.list.d/sbt.list
+ - apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 2EE0EA64E40A89B84B2DF73499E82A75642AC823
+ - apt update -qy
+ - apt install -y python3-dev python3-pip python3-wheel libjpeg-dev openjdk-8-jdk-headless sbt
- pip3 install pipenv
- pipenv --version
-test_mapreduce:
+test_python_mapreduce:
script:
- cd mapreduce
- pipenv install --dev --deploy
- pipenv run pytest --cov
- pipenv run pylint --disable bad-continuation,arguments-differ,unidiomatic-typecheck *.py
+test_scalding:
+ script:
+ - cd scalding
+ - sbt -mem 1024 test
+ - sbt -mem 1024 assembly
+
# Needs fixing
#test_pig:
# script:
diff --git a/scalding/README.md b/scalding/README.md
index 13c2e1a..c40da5c 100644
--- a/scalding/README.md
+++ b/scalding/README.md
@@ -30,26 +30,13 @@ Run on cluster:
If your `sbt` task fails with this error:
java.util.concurrent.ExecutionException: java.lang.OutOfMemoryError: Metaspace
+
try restarting `sbt` with more memory (e.g., `sbt -mem 2048`).
-## Building SpyGlass Jar
+## SpyGlass Jar
SpyGlass is a "scalding-to-HBase" connector. It isn't maintained, so we needed
-to rebuild to support our versions of HBase/scalding/etc. From SpyGlass fork
-(<https://github.com/bnewbold/SpyGlass>,
-`bnewbold-scala2.11` branch):
-
- cd ~/src/SpyGlass
- git checkout bnewbold-scala2.11
-
- # This builds the new .jar and installs it in the (laptop local) ~/.m2
- # repository
- mvn clean install -U
-
- # Copy that .jar (and associated pom.xml) over to where sbt can find it
- mkdir -p ~/.sbt/preloaded/parallelai/
- cp -r ~/.m2/repository/parallelai/parallelai.spyglass ~/.sbt/preloaded/parallelai/
-
-The medium-term plan here is to push the custom SpyGlass jar as a static maven
-repo to an archive.org item, and point build.sbt to that folder.
-
+to rebuild to support our versions of HBase/scalding/etc. Our fork (including
+build instructions) is at <https://github.com/bnewbold/SpyGlass>
+(`bnewbold-scala2.11` branch); compiled .jar files are available from
+<https://archive.org/download/ia_sandcrawler_maven2>.
diff --git a/scalding/build.sbt b/scalding/build.sbt
index 1a90168..f333111 100644
--- a/scalding/build.sbt
+++ b/scalding/build.sbt
@@ -19,6 +19,7 @@ lazy val root = (project in file(".")).
resolvers += "Apache HBase" at "https://repository.apache.org/content/repositories/releases",
resolvers += "Cloudera Maven Repository" at "https://repository.cloudera.com/artifactory/cloudera-repos",
resolvers += "Twitter Maven Repository" at "https://maven.twttr.com",
+ resolvers += "IA Sandcrawler Rebuilt Jars" at "https://archive.org/download/ia_sandcrawler_maven2/repository",
libraryDependencies += scalaTest % Test,
libraryDependencies += "org.scala-lang" % "scala-library" % "2.11.8",