使用flink时需要提前准备好scala环境
一、创建maven项目
二、添加pom依赖
<properties><scala.version>2.11.12</scala.version></properties><dependency><groupId>org.scala-lang</groupId><artifactId>scala-library</artifactId><version>${scala.version}</version></dependency><dependency><groupId>org.scala-lang</groupId><artifactId>scala-compiler</artifactId><version>${scala.version}</version></dependency><dependency><groupId>org.scala-lang</groupId><artifactId>scala-reflect</artifactId><version>${scala.version}</version></dependency><dependency><groupId>org.apache.commons</groupId><artifactId>commons-compress</artifactId><version>1.21</version></dependency><dependency><groupId>org.apache.flink</groupId><artifactId>flink-scala_2.11</artifactId><version>1.14.0</version></dependency><dependency><groupId>org.apache.flink</groupId><artifactId>flink-clients_2.11</artifactId><version>1.14.0</version></dependency>
三、编码实现
准备数据源test_flink.txt
河南 郑州
河南 信阳
郑州 金水区
河南 开封
郑州 管城区
信阳 浉河区
信阳 平桥区
开封 龙亭区
编码实现
import org.apache.flink.api.scala._
import org.apache.flink.api.scala.ExecutionEnvironment
object FlinkWordCount {def main(args: Array[String]): Unit = {//创建执行环境val environment = ExecutionEnvironment.getExecutionEnvironment//读取文件val dataSet = environment.readTextFile("D:/workplace/java-item/res/file/test_flink.txt")//将读取的字符扁平化操作,并且按照空字符分割装入到元祖之中,按照元组的第一个元素分组,分组后按照元组的第二个值求和val aggregateDataSet = dataSet.flatMap(_.split(" ")).map((_, 1)).groupBy(0).sum(1)///打印聚合数据aggregateDataSet.print()}
}