Apache Spark SQL

case class Doc(alias: String, classId: Int, typeId: Int)

val data
= sc.textFile(bigFile).map(_.split("\|"))
val docs
= data.map(p => Doc(p(0).trim, p(1).trim.toInt, p(2).trim.toInt))
val df
= docs.toDF()

df
.show
df
.printSchema
df
.groupBy("classId").count.show

df
.registerTempTable("docs")
sqlContext
.sql("SELECT COUNT(alias) FROM docs WHERE typeId = 99999").show