kafka中根据时间戳开始消费数据
package com.cindasc.rtasset.source;import com.cindasc.rtasset.util.Constants;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer;
import org.apache.flink.kafka.shaded.org.apache.kafka.clients.consumer.OffsetResetStrategy;
import org.apache.flink.kafka.shaded.org.apache.kafka.common.TopicPartition;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.yaml.snakeyaml.nodes.CollectionNode;import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;/*** 支持按topic指定开始消费时间戳** @author 86158*/
public class KafkaOffsetsInitializer implements OffsetsInitializer {private Logger logger = LoggerFactory.getLogger(KafkaOffsetsInitializer.class);private static final long serialVersionUID = 1L;/*** key:topic,value:开始消费时间戳*/private Map<String, Long> topicStartingTimestamps;private ParameterTool parameters;/*** @param topicStartingTimestamps* @param parameters*/public KafkaOffsetsInitializer(Map<String, Long> topicStartingTimestamps, ParameterTool parameters) {this.topicStartingTimestamps = topicStartingTimestamps;this.parameters = parameters;}@Overridepublic Map<TopicPartition, Long> getPartitionOffsets(Collection<TopicPartition> partitions,PartitionOffsetsRetriever partitionOffsetsRetriever) {//定义起始时间,初始offsetMap<TopicPartition, Long> startingTimestamps = new HashMap<>();Map<TopicPartition, Long> initialOffsets = new HashMap<>();//commited offsetMap<TopicPartition, Long> committedOffsets = partitionOffsetsRetriever.committedOffsets(partitions);//beginningOffsets the first offset for the given partitions.Map<TopicPartition, Long> beginningOffsets = partitionOffsetsRetriever.beginningOffsets(partitions);//endOffsets the for the given partitions.Map<TopicPartition, Long> endOffsets = partitionOffsetsRetriever.endOffsets(partitions);final long now = System.currentTimeMillis();partitions.forEach(tp -> {//起始时间赋值为从redis中获取到相对应topic的时间Long startingTimestamp = topicStartingTimestamps.get(tp.topic());if (startingTimestamp == null) {//redis里没有取到消费开始时间从启动时间消费startingTimestamp = now;logger.info("从redis没有取到时间戳,topic:{},partition:{},使用当前时间:{},{}", tp.topic(), tp.partition(), now, new Date(now));}logger.info("读取时间戳,topic:{},partition:{},时间戳:{},{}", tp.topic(), tp.partition(), now, new Date(now));startingTimestamps.put(tp, startingTimestamp);});partitionOffsetsRetriever.offsetsForTimes(startingTimestamps).forEach((tp, offsetMetadata) -> {long offsetForTime = beginningOffsets.get(tp);long offsetForCommit = beginningOffsets.get(tp);if (offsetMetadata != null) {offsetForTime = offsetMetadata.offset();logger.info("根据时间戳取到offset,topic:{},partition:{},offset:{}", tp.topic(), tp.partition(), offsetForTime);}Long commitedOffset = committedOffsets.get(tp);if (commitedOffset != null) {offsetForCommit = commitedOffset.longValue();logger.info("根据已提交offset取到offset,topic:{},partition:{},offset:{}", tp.topic(), tp.partition(), offsetForCommit);}logger.info("设置读取offset,topic:{},partition:{},offset:{},endOffset:{}", tp.topic(), tp.partition(), Math.max(offsetForTime, offsetForCommit), endOffsets.get(tp));//对比时间戳对应的offset和checkpoint保存的offset,取较大值//initialOffsets.put(tp, Math.max(offsetForTime, offsetForCommit));initialOffsets.put(tp, offsetForCommit);if (parameters.getBoolean(Constants.LOADERFAILED,false)){initialOffsets.put(tp, Math.min(offsetForTime, offsetForCommit));}});return initialOffsets;}@Overridepublic OffsetResetStrategy getAutoOffsetResetStrategy() {return OffsetResetStrategy.NONE;}
}