sparksql加载mysql表中的数据
<dependency><groupId>mysql</groupId><artifactId>mysql-connector-java</artifactId><version>5.1.38</version>
</dependency>
import java.util.Propertiesimport org.apache.spark.SparkConf
import org.apache.spark.sql.{DataFrame, SparkSession}//todo:利用sparksql加载mysql表中的数据
object DataFromMysql {def main(args: Array[String]): Unit = {//1、创建SparkConf对象val sparkConf: SparkConf = new SparkConf().setAppName("DataFromMysql").setMaster("local[2]")//2、创建SparkSession对象val spark: SparkSession = SparkSession.builder().config(sparkConf).getOrCreate()//3、读取mysql表的数据//3.1 指定mysql连接地址val url="jdbc:mysql://node1:3306/spark"//3.2 指定要加载的表名val tableName="iplocation"// 3.3 配置连接数据库的相关属性val properties = new Properties()//用户名properties.setProperty("user","root")//密码properties.setProperty("password","123456")val mysqlDF: DataFrame = spark.read.jdbc(url,tableName,properties)//打印schema信息mysqlDF.printSchema()//展示数据mysqlDF.show()//把dataFrame注册成表mysqlDF.createTempView("iplocation")spark.sql("select * from iplocation where total_count >1500").show()spark.stop()}
}
sparksql保存结果数据到mysql表中
import java.util.Propertiesimport org.apache.spark.sql.{DataFrame, SparkSession}//todo:通过sparksql把结果数据写入到mysql表中
object Data2Mysql {def main(args: Array[String]): Unit = {//1、创建SparkSessionval spark: SparkSession = SparkSession.builder().appName("Data2Mysql") .getOrCreate()//2、读取mysql表中数据//2.1 定义url连接val url="jdbc:mysql://node1:3306/spark"//2.2 定义表名val table="iplocation"//2.3 定义属性val properties=new Properties()properties.setProperty("user","root")properties.setProperty("password","123456")val mysqlDF: DataFrame = spark.read.jdbc(url,table,properties)//把dataFrame注册成一张表mysqlDF.createTempView("iplocation")//通过sparkSession调用sql方法//需要统计经度和维度出现的人口总数大于1000的记录 保存到mysql表中val result: DataFrame = spark.sql("select * from iplocation where total_count >1000")//保存结果数据到mysql表中//mode:指定数据的插入模式//overwrite: 表示覆盖,如果表不存在,事先帮我们创建//append :表示追加, 如果表不存在,事先帮我们创建//ignore :表示忽略,如果表事先存在,就不进行任何操作//error :如果表事先存在就报错(默认选项)result.write.mode(args(0)).jdbc(url,args(1),properties)//关闭spark.stop()}
}
提交任务脚本
spark-submit
–master spark://node1:7077
–class com.sql.DataMysql
–executor-memory 1g
–total-executor-cores 4
–driver-class-path /opt/hive/lib/mysql-connector-java-5.1.38.jar
–jars /export/servers/hive/lib/mysql-connector-java-5.1.35.jar
spark_class01-1.0-SNAPSHOT.jar
append user