Montagem do SBT, Spark e você!

Testado com Scala 2.11.11, sbt 0.13.15 e Spark 2.1.0

build.sbtrealizou as seguintes coisas:
* execute a sbt runpartir do shell ou terminal
sbt * Use o plugin sbt-assembly para empacotar adequadamente o jar para uso com spark-submituma instância spark em execução dentro do contêiner docker

name := "spark-mllib-test"
version
:= "1.0"
scalaVersion
:= "2.11.11"
val sparkVersion
= "2.1.0"

libraryDependencies
++= Seq (
"org.apache.spark" %% "spark-core" % sparkVersion % "provided", // spark runtime already provides jars
"org.apache.spark" %% "spark-streaming" % sparkVersion % "provided",
"org.apache.spark" %% "spark-mllib" % sparkVersion % "provided",

// not relevant, just allows me to pass command line options to spark job
"args4j" % "args4j" % "2.33",
"com.bizo" % "args4j-helpers_2.10" % "1.0.0"
)

/* without this explicit merge strategy code you get a lot of noise from sbt-assembly
complaining about not being able to dedup files */

assemblyMergeStrategy
in assembly := {
case PathList("org","aopalliance", xs @ _*) => MergeStrategy.last
case PathList("javax", "inject", xs @ _*) => MergeStrategy.last
case PathList("javax", "servlet", xs @ _*) => MergeStrategy.last
case PathList("javax", "activation", xs @ _*) => MergeStrategy.last
case PathList("org", "apache", xs @ _*) => MergeStrategy.last
case PathList("com", "google", xs @ _*) => MergeStrategy.last
case PathList("com", "esotericsoftware", xs @ _*) => MergeStrategy.last
case PathList("com", "codahale", xs @ _*) => MergeStrategy.last
case PathList("com", "yammer", xs @ _*) => MergeStrategy.last
case "about.html" => MergeStrategy.rename
case "META-INF/ECLIPSEF.RSA" => MergeStrategy.last
case "META-INF/mailcap" => MergeStrategy.last
case "META-INF/mimetypes.default" => MergeStrategy.last
case "plugin.properties" => MergeStrategy.last
case "log4j.properties" => MergeStrategy.last
case "overview.html" => MergeStrategy.last // Added this for 2.1.0 I think
case x =>
val oldStrategy
= (assemblyMergeStrategy in assembly).value
oldStrategy
(x)
}

/* including scala bloats your assembly jar unnecessarily, and may interfere with
spark runtime */

assemblyOption
in assembly := (assemblyOption in assembly).value.copy(includeScala = false)
assemblyJarName
in assembly := "spark-mllib-test.jar"

/* you need to be able to undo the "provided" annotation on the deps when running your spark
programs locally i.e. from sbt; this bit reincludes the full classpaths in the compile and run tasks. */

fullClasspath
in Runtime := (fullClasspath in (Compile, run)).value