解析kafka当中的json格式的数据,入hbase
import java.util.Propertiesimport com.alibaba.fastjson.{JSON, JSONObject}
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.contrib.streaming.state.RocksDBStateBackend
import org.apache.flink.streaming.api.CheckpointingMode
import org.apache.flink.streaming.api.environment.CheckpointConfig
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011object IncrementOrder {def main(args: Array[String]): Unit = {val environment: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment//隐式转换import org.apache.flink.api.scala._//checkpoint配置environment.enableCheckpointing(100);environment.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);environment.getCheckpointConfig.setMinPauseBetweenCheckpoints(500);environment.getCheckpointConfig.setCheckpointTimeout(60000);environment.getCheckpointConfig.setMaxConcurrentCheckpoints(1);environment.getCheckpointConfig.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);environment.setStateBackend(new RocksDBStateBackend("hdfs://node01:8020/flink_kafka/checkpoints",true));val props = new Propertiesprops.put("bootstrap.servers", "node01:9092")props.put("zookeeper.connect", "node01:2181")props.put("group.id", "flinkHouseGroup")props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")props.put("auto.offset.reset", "latest")props.put("flink.partition-discovery.interval-millis", "30000")val kafkaSource = new FlinkKafkaConsumer011[String]("flink_house",new SimpleStringSchema(),props)kafkaSource.setCommitOffsetsOnCheckpoints(true)//设置statebackendval result: DataStream[String] = environment.addSource(kafkaSource)val orderResult: DataStream[OrderObj] = result.map(x => {val jsonObj: JSONObject = JSON.parseObject(x)val database: AnyRef = jsonObj.get("database")val table: AnyRef = jsonObj.get("table")val `type`: AnyRef = jsonObj.get("type")val string: String = jsonObj.get("data").toStringOrderObj(database.toString,table.toString,`type`.toString,string)})orderResult.addSink(new HBaseSinkFunction)environment.execute()}
}
case class OrderObj(database:String,table:String,`type`:String,data:String) extends Serializable
定义插入数据到hbase的程序
import com.alibaba.fastjson.{JSON, JSONObject}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction}
import org.apache.hadoop.conf
import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
import org.apache.hadoop.hbase.client._class HBaseSinkFunction extends RichSinkFunction[OrderObj]{var connection:Connection = _var hbTable:Table = _override def open(parameters: Configuration): Unit = {val configuration: conf.Configuration = HBaseConfiguration.create()configuration.set("hbase.zookeeper.quorum", "node01,node02,node03")configuration.set("hbase.zookeeper.property.clientPort", "2181")connection = ConnectionFactory.createConnection(configuration)hbTable = connection.getTable(TableName.valueOf("flink:data_orders"))}override def close(): Unit = {if(null != hbTable){hbTable.close()}if(null != connection){connection.close()}}def insertHBase(hbTable: Table, orderObj: OrderObj) = {val database: String = orderObj.databaseval table: String = orderObj.tableval value: String = orderObj.`type`val orderJson: JSONObject = JSON.parseObject(orderObj.data)val orderId: String = orderJson.get("orderId").toStringval orderNo: String = orderJson.get("orderNo").toStringval userId: String = orderJson.get("userId").toStringval goodId: String = orderJson.get("goodId").toStringval goodsMoney: String = orderJson.get("goodsMoney").toStringval realTotalMoney: String = orderJson.get("realTotalMoney").toStringval payFrom: String = orderJson.get("payFrom").toStringval province: String = orderJson.get("province").toStringval createTime: String = orderJson.get("createTime").toStringval put = new Put(orderId.getBytes())put.addColumn("f1".getBytes(),"orderNo".getBytes(),orderNo.getBytes())put.addColumn("f1".getBytes(),"userId".getBytes(),userId.getBytes())put.addColumn("f1".getBytes(),"goodId".getBytes(),goodId.getBytes())put.addColumn("f1".getBytes(),"goodsMoney".getBytes(),goodsMoney.getBytes())put.addColumn("f1".getBytes(),"realTotalMoney".getBytes(),realTotalMoney.getBytes())put.addColumn("f1".getBytes(),"payFrom".getBytes(),payFrom.getBytes())put.addColumn("f1".getBytes(),"province".getBytes(),province.getBytes())put.addColumn("f1".getBytes(),"createTime".getBytes(),createTime.getBytes())
/*
*
* */hbTable.put(put);}def deleteHBaseData(hbTable: Table, orderObj: OrderObj) = {val orderJson: JSONObject = JSON.parseObject(orderObj.data)val orderId: String = orderJson.get("orderId").toStringval delete = new Delete(orderId.getBytes())hbTable.delete(delete)}override def invoke(orderObj: OrderObj, context: SinkFunction.Context[_]): Unit = {val database: String = orderObj.databaseval table: String = orderObj.tableval typeResult: String = orderObj.`type`if(database.equalsIgnoreCase("product") && table.equalsIgnoreCase("kaikeba_orders")){if(typeResult.equalsIgnoreCase("insert")){//插入hbase数据insertHBase(hbTable,orderObj)}else if(typeResult.equalsIgnoreCase("update")){//更新hbase数据insertHBase(hbTable,orderObj)}else if(typeResult.equalsIgnoreCase("delete")){//删除hbase数据deleteHBaseData(hbTable,orderObj)}}}
}