spark dataFrame api操作
2021-03-04 21:25
标签:com substring == import creat 操作 sort park port api代码示例入下 操作文件内容如下: spark dataFrame api操作 标签:com substring == import creat 操作 sort park port 原文地址:https://www.cnblogs.com/erlou96/p/12919747.htmlobject DataFrameCase {
def main(args: Array[String]): Unit = {
val sparksession = SparkSession.builder().appName("DataFrameCase").master("local[2]").getOrCreate()
val path = "G:\\student.data"
val sparkrdd = sparksession.sparkContext.textFile(path)
//隐式转化
import sparksession.implicits._
val studentDF = sparkrdd.map(_.split("\\|")).map(line => Student(line(0).toInt,line(1),line(2),line(3))).toDF()
val studentDF2 = sparkrdd.map(_.split("\\|")).map(line => Student(line(0).toInt,line(1),line(2),line(3))).toDF()
studentDF.show()
studentDF.take(10)
studentDF.filter("name=‘‘ OR name = ‘NULL‘").show()
//取前30行,不截取字段值
studentDF.show(30,false)
studentDF.where("name=‘‘ OR name = ‘NULL‘").show()
studentDF.filter("SUBSTRING(name,0,1)=‘t‘").show()
studentDF.sort("name").show()
//可以传入列,或直接传入字段
studentDF.sort(studentDF("name").desc,studentDF("id")).show()
studentDF.select(studentDF.col("name").as("studentname")).show()
//join
studentDF.join(studentDF2,studentDF.col("id") === studentDF2.col("id")).show()
}
case class Student(id:Int,name:String,phone:String,email:String)
}
1|burke|18722323|sdfsdf.com
2|turke|18722323|sdfsdf.com
3|turke|18722323|sdfsdf.com
4|curke|18722323|sdfsdf.com
5|burke|18722323|sdfsdf.com
6|burke|18722323|sdfsdf.com
7|burke|18722323|sdfsdf.com
8|murke|18722323|sdfsdf.com
9|burke|18722323|sdfsdf.com
10||1872563|sdfsdf.com
11||2232323|dsfsdf.com
12|NULL|937489|dsfsdew.com
文章标题:spark dataFrame api操作
文章链接:http://soscw.com/index.php/essay/60157.html