一、介绍
clickHourse表自增ID主要时两种方式:
- insert数据时,手动维护一个全局ID
- 给表设置uuid字段,使用 generateUUIDv4()函数赋予默认值。
这里的话推荐手动维护一个全局的自增ID,不推荐使用UUID的方式,主要原因有以下几个:
- uuid字段占用的存储空间比手动维护自增ID的空间大很多,可参数性能对比章节。
- 使用uuid字段,如果该表涉及到按批次分别读取,UUID无法实现因为UUID不能用来比较。
- 如果使用需要做分页,UUID如何设置偏移量这是一个问题。
二、手动维护表的全局自增ID(推荐)
代码思路
- 在插入数据之前,可以为每条记录生成一个唯一的自增ID,并将其作为排序的依据。
- 多线程入库的话使用 Java的同步机制来确保ID的唯一性即AtomicLong类
- 程序提供时或者插入前查询最大该表最大的ID:在插入新数据之前,从表中查询当前的最大ID。这个最大ID将作为新数据插入时的起始ID。
- 然后每一次insert给DATAID赋予 AtomicLong类的值+1 即可
表结构
CREATE TABLE default.SQLLOG_FULL20240609_1809
(`DATAID` UInt64,`LOGTIME` DateTime64(3,'Asia/Shanghai'),`CURRTIME` DateTime('Asia/Shanghai') DEFAULT now(),`SESS` String,`THRD` Int64,`USERNAME` String,`TRXID` Int64,`STMT` String,`APPNAME` String,`IP` String,`INFOSTR` String
)
ENGINE = MergeTree
PARTITION BY toHour(LOGTIME)
ORDER BY (DATAID,LOGTIME)
SETTINGS index_granularity = 8192;
批量插入代码
@Data
public class LineBean
{public static int SQL_TYPE_PARAMS = 1;public static int SQL_TYPE_SQLSTR = 2;private String time;private String sess;private String thrd;private String user;private String trxid;private String stmt;private String appname;private String ip;private String infoStr;private Integer lineType; //判断是 参数 /Sqlprivate String lineExecSqlType;
}public class ClickHouseDbConnection {public static AtomicLong idGenerator = new AtomicLong(0);public static void main(String[] args) {//1. 设置当前表的最大ID 并放置到 Long tableMaxId = querySqlLogTableMaxId(connection, dbSqllogTableName);idGenerator.set(tableMaxId+1);//2. 封装对象List//3. 执行batchDataListCommit方法}//获取表中的DATAID最大值private static Long querySqlLogTableMaxId() throws SQLException {String url = "jdbc:clickhouse://192.168.112.146:8123/default";Properties properties = new Properties();properties.setProperty("user", "default");properties.setProperty("password", "root");Connection connection = DriverManager.getConnection(url, properties);String querySql = "SELECT MAX(DATAID) AS DATAID FROM "+dbSqllogTableName;ResultSet resultSet = connection.prepareStatement(querySql).executeQuery();resultSet.next();return resultSet.getLong(1);}public static void batchDataListCommit(List<LineBean> batchData) {String url = "jdbc:clickhouse://192.168.112.146:8123/default";Properties properties = new Properties();properties.setProperty("user", "default");properties.setProperty("password", "root");String sqlLogTableName = PropertiesUtils.getCustomPropertyValue(CommonConstant.DBMS_SYNC_TABLE_NAME);Connection connection = null;PreparedStatement preparedStatement = null;try {connection = DriverManager.getConnection(url, properties);String insert_sql = "insert into "+sqlLogTableName+"(LOGTIME, SESS, THRD, USERNAME, TRXID, STMT, APPNAME, IP, INFOSTR) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)";connection.setAutoCommit(true);//将自动提交关闭preparedStatement = connection.prepareStatement(insert_sql);for (int i = 0; i < batchData.size(); i++) {int idx = 1;LineBean lineBean = batchData.get(i);preparedStatement.setLong(idx++, idGenerator.incrementAndGet());preparedStatement.setTimestamp(idx++, new Timestamp(DateUtil.parse(lineBean.getTime(),DateTimeUtils.FORMATTER_STR).getTime()));preparedStatement.setString(idx++, lineBean.getSess());preparedStatement.setLong(idx++, Long.parseLong(lineBean.getThrd()));preparedStatement.setString(idx++, lineBean.getUser());preparedStatement.setLong(idx++, Long.parseLong(lineBean.getTrxid()));preparedStatement.setString(idx++, lineBean.getStmt());preparedStatement.setString(idx++, lineBean.getAppname());preparedStatement.setString(idx++, lineBean.getIp());preparedStatement.setString(idx++, lineBean.getInfoStr());preparedStatement.addBatch();}preparedStatement.executeBatch();} catch (SQLException e) {StaticLog.error(e,"batch dataList error info {}",e.getMessage());}finally {if (preparedStatement != null) {try {preparedStatement.close();} catch (SQLException e) {StaticLog.error(e,"close db preparedStatement error info {}",e.getMessage());}}if(connection!=null){try {connection.close();} catch (SQLException e) {StaticLog.error(e,"close db connection error info {}",e.getMessage());}}}}
}
三、给表设置自增UUID字段
create table时 新增字段``DATAID UUID DEFAULT generateUUIDv4()
会默认生成唯一键
表结构
CREATE TABLE default.SQLLOG_FULL20240609_1220
(`DATAID` UUID DEFAULT generateUUIDv4(),`LOGTIME` DateTime64(3,'Asia/Shanghai'),`CURRTIME` DateTime('Asia/Shanghai') DEFAULT now(),`SESS` String,`THRD` Int64,`USERNAME` String,`TRXID` Int64,`STMT` String,`APPNAME` String,`IP` String,`INFOSTR` String
)
ENGINE = MergeTree
PARTITION BY toHour(LOGTIME)
ORDER BY (LOGTIME, CURRTIME)
SETTINGS index_granularity = 8192;
批量插入代码
@Data
public class LineBean
{public static int SQL_TYPE_PARAMS = 1;public static int SQL_TYPE_SQLSTR = 2;private String time;private String sess;private String thrd;private String user;private String trxid;private String stmt;private String appname;private String ip;private String infoStr;private Integer lineType; //判断是 参数 /Sqlprivate String lineExecSqlType;
}public static void batchDataListCommit(List<LineBean> batchData) {String url = "jdbc:clickhouse://192.168.112.146:8123/default";Properties properties = new Properties();properties.setProperty("user", "default");properties.setProperty("password", "root");String sqlLogTableName = PropertiesUtils.getCustomPropertyValue(CommonConstant.DBMS_SYNC_TABLE_NAME);Connection connection = null;PreparedStatement preparedStatement = null;try {connection = DriverManager.getConnection(url, properties);String insert_sql = "insert into "+sqlLogTableName+"(LOGTIME, SESS, THRD, USERNAME, TRXID, STMT, APPNAME, IP, INFOSTR) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)";connection.setAutoCommit(true);//将自动提交关闭preparedStatement = connection.prepareStatement(insert_sql);for (int i = 0; i < batchData.size(); i++) {int idx = 1;LineBean lineBean = batchData.get(i);preparedStatement.setTimestamp(idx++, new Timestamp(DateUtil.parse(lineBean.getTime(),DateTimeUtils.FORMATTER_STR).getTime()));preparedStatement.setString(idx++, lineBean.getSess());preparedStatement.setLong(idx++, Long.parseLong(lineBean.getThrd()));preparedStatement.setString(idx++, lineBean.getUser());preparedStatement.setLong(idx++, Long.parseLong(lineBean.getTrxid()));preparedStatement.setString(idx++, lineBean.getStmt());preparedStatement.setString(idx++, lineBean.getAppname());preparedStatement.setString(idx++, lineBean.getIp());preparedStatement.setString(idx++, lineBean.getInfoStr());preparedStatement.addBatch();}preparedStatement.executeBatch();} catch (SQLException e) {StaticLog.error(e,"batch dataList error info {}",e.getMessage());}finally {if (preparedStatement != null) {try {preparedStatement.close();} catch (SQLException e) {StaticLog.error(e,"close db preparedStatement error info {}",e.getMessage());}}if(connection!=null){try {connection.close();} catch (SQLException e) {StaticLog.error(e,"close db connection error info {}",e.getMessage());}}}
}
四、关于表自增方式的性能对比
插入性能上差距不上,使用相同的1G日志进行批量入库
1)不添加自增ID,入库效率稳定在17S左右,表大小118M
2)使用UUID方式的自增ID方式,入库效率在17-21S左右,表大小为153M ,较不添加增长35M
3)使用外部维护表自增ID方式,入库效率在18S左右,表大小127M,较无主键时增长9M