Configure o Spark com o conector Cassandra

Use este script para fazer o Spark 2.0 funcionar com o Cassandra.
Levei um tempo para descobrir como fazer essa configuração.

# install java
sudo apt
-get update -y
sudo apt
-get install software-properties-common -y
sudo
add-apt-repository -y ppa:openjdk-r/ppa
sudo apt
-get install wget -y
sudo apt
-get install openjdk-8-jdk -y
sudo apt
-get update -y

# make serve directory
sudo mkdir
-p /srv
cd
/srv

# scala 2.11 required for cassandra spark connector
sudo wget http
://downloads.lightbend.com/scala/2.11.7/scala-2.11.7.deb
sudo dpkg
-i scala-2.11.7.deb

# get spark
sudo wget http
://d3kbcqa49mib13.cloudfront.net/spark-2.0.0-bin-hadoop2.7.tgz
sudo tar
-zxf spark-2.0.0-bin-hadoop2.7.tgz
sudo mv spark
-2.0.0-bin-hadoop2.7 spark

# build spark cassandra connector
echo
"deb https://dl.bintray.com/sbt/debian /" | sudo tee -a /etc/apt/sources.list.d/sbt.list
sudo apt
-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 642AC823
sudo apt
-get install apt-transport-https -y
sudo apt
-get update -y
sudo apt
-get install sbt -y
git clone https
://github.com/datastax/spark-cassandra-connector.git
cd spark
-cassandra-connector
git checkout v2
.0.0-M2
sudo sbt assembly
-Dscala-2.11=true

# move spark cassandra connector to spark jar directory
find
. -iname "*.jar" -type f -exec /bin/cp {} /srv/spark/jars/ ;

# start master
/srv/spark/sbin/start-master.sh --host 0.0.0.0

# start slave
/srv/spark/sbin/start-slave.sh --host 0.0.0.0 spark://localhost:7077

# start shell
/srv/spark/sbin/spark-shell --driver-class-path $(echo /srv/spark/jars/*.jar |sed 's/ /:/g')

# test

sc.stop

import org.apache.spark

import org.apache.spark._

import org.apache.spark.SparkContext

import org.apache.spark.SparkContext._

import org.apache.spark.SparkConf

import org.apache.spark.sql.SQLContext

import org.apache.spark.sql.cassandra

import org.apache.spark.sql.cassandra._

import com.datastax.spark

import com.datastax.spark._

import com.datastax.spark.connector

import com.datastax.spark.connector._

import com.datastax.spark.connector.cql

import com.datastax.spark.connector.cql._

import com.datastax.spark.connector.cql.CassandraConnector

import com.datastax.spark.connector.cql.CassandraConnector._


val conf = new SparkConf(true).set("spark.cassandra.connection.host", "cassandraserver")

val sc = new SparkContext("spark://localhost:7077", "test", conf)

val table = sc.cassandraTable("keyspace", "users")

println(table.count)