case class Doc(alias: String, classId: Int, typeId: Int)
val data = sc.textFile(bigFile).map(_.split("\|"))
val docs = data.map(p => Doc(p(0).trim, p(1).trim.toInt, p(2).trim.toInt))
val df = docs.toDF()
df.show
df.printSchema
df.groupBy("classId").count.show
df.registerTempTable("docs")
sqlContext.sql("SELECT COUNT(alias) FROM docs WHERE typeId = 99999").show