DataFrame不同风格比较
一,DSL风格语法
//加载数据
val rdd1=sc.textFile("/person.txt").map(x=>x.split(" "))
//定义一个样例类
case class Person(id:String,name:String,age:Int)
//把rdd与样例类进行关联
val personRDD=rdd1.map(x=>Person(x(0),x(1),x(2).toInt))
//把rdd转换成DataFrame
val personDF=personRDD.toDF//打印schema信息
personDF.printSchema//展示数据
personDF.show//查询指定的字段
personDF.select("name").show
personDF.select($"name").show
personDF.select(col("name").show//实现age+1personDF.select($"name",$"age",$"age"+1).show //实现age大于30过滤personDF.filter($"age" > 30).show//按照age分组统计次数personDF.groupBy("age").count.show //按照age分组统计次数降序personDF.groupBy("age").count().sort($"count".desc)show
二,SQL风格语法
//DataFrame注册成表
personDF.createTempView("person")//使用SparkSession调用sql方法统计查询
spark.sql("select * from person").show
spark.sql("select name from person").show
spark.sql("select name,age from person").show
spark.sql("select * from person where age >30").show
spark.sql("select count(*) from person where age >30").show
spark.sql("select age,count(*) from person group by age").show
spark.sql("select age,count(*) as count from person group by age").show
spark.sql("select * from person order by age desc").show