1 pom.xml配置,要与服务器上的版本要一致,并将hive-site.xml 文件放入resources文件夹中
<dependencies><dependency><groupId>org.projectlombok</groupId><artifactId>lombok</artifactId></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-common</artifactId><version>3.3.1</version></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-hdfs</artifactId><version>3.3.1</version></dependency><dependency><groupId>org.apache.hive</groupId><artifactId>hive-metastore</artifactId><version>3.1.3</version></dependency><dependency><groupId>org.apache.hive</groupId><artifactId>hive-common</artifactId><version>3.1.3</version></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-mapreduce-client-core</artifactId><version>3.1.3</version></dependency><dependency><groupId>org.apache.hive</groupId><artifactId>hive-exec</artifactId><version>3.1.3</version><exclusions><exclusion><artifactId>hadoop-annotations</artifactId><groupId>org.apache.hadoop</groupId></exclusion><exclusion><artifactId>hadoop-yarn-server-common</artifactId><groupId>org.apache.hadoop</groupId></exclusion><exclusion><artifactId>hadoop-yarn-server-resourcemanager</artifactId><groupId>org.apache.hadoop</groupId></exclusion><exclusion><artifactId>hadoop-yarn-server-applicationhistoryservice</artifactId><groupId>org.apache.hadoop</groupId></exclusion><exclusion><artifactId>hadoop-yarn-server-web-proxy</artifactId><groupId>org.apache.hadoop</groupId></exclusion><exclusion><artifactId>hadoop-yarn-common</artifactId><groupId>org.apache.hadoop</groupId></exclusion><exclusion><artifactId>jackson-core-asl</artifactId><groupId>org.codehaus.jackson</groupId></exclusion><exclusion><artifactId>jackson-mapper-asl</artifactId><groupId>org.codehaus.jackson</groupId></exclusion><exclusion><artifactId>jersey-core</artifactId><groupId>com.sun.jersey</groupId></exclusion><exclusion><artifactId>jersey-server</artifactId><groupId>com.sun.jersey</groupId></exclusion><exclusion><artifactId>zookeeper</artifactId><groupId>org.apache.zookeeper</groupId></exclusion></exclusions></dependency></dependencies>
2 pojo类封装
import lombok.Data;
import java.io.Serializable;
import java.util.Date;@Data
public class TableMetaInfo implements Serializable {private static final long serialVersionUID = 1L;/*** 表id*/private Long id;/*** 表名*/private String tableName;/*** 库名*/private String schemaName;/*** 字段名json ( 来源:hive)*/private String colNameJson;/*** 分区字段名json( 来源:hive)*/private String partitionColNameJson;/*** hdfs所属人 ( 来源:hive)*/private String tableFsOwner;/*** 参数信息 ( 来源:hive)*/private String tableParametersJson;/*** 表备注 ( 来源:hive)*/private String tableComment;/*** hdfs路径 ( 来源:hive)*/private String tableFsPath;/*** 输入格式( 来源:hive)*/private String tableInputFormat;/*** 输出格式 ( 来源:hive)*/private String tableOutputFormat;/*** 行格式 ( 来源:hive)*/private String tableRowFormatSerde;/*** 表创建时间 ( 来源:hive)*/private String tableCreateTime;/*** 表类型 ( 来源:hive)*/private String tableType;/*** 分桶列 ( 来源:hive)*/private String tableBucketColsJson;/*** 分桶个数 ( 来源:hive)*/private Long tableBucketNum;/*** 排序列 ( 来源:hive)*/private String tableSortColsJson;/*** 数据量大小 ( 来源:hdfs)*/private Long tableSize=0L;/*** 所有副本数据总量大小 ( 来源:hdfs)*/private Long tableTotalSize=0L;/*** 最后修改时间 ( 来源:hdfs)*/private Date tableLastModifyTime;/*** 最后访问时间 ( 来源:hdfs)*/private Date tableLastAccessTime;/*** 当前文件系统容量 ( 来源:hdfs)*/private Long fsCapcitySize;/*** 当前文件系统使用量 ( 来源:hdfs)*/private Long fsUsedSize;/*** 当前文件系统剩余量 ( 来源:hdfs)*/private Long fsRemainSize;}
3 核心类
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.support.spring.PropertyPreFilters;
import org.apache.commons.lang3.time.DateFormatUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.RetryingMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.thrift.TException;
import java.io.IOException;
import java.net.URI;
import java.util.Date;/*** hive表元数据信息获取*/
public class HiveMetaTest {IMetaStoreClient hiveClient = getHiveClient();// 初始化 hive 客户端private IMetaStoreClient getHiveClient() {// 把本地的hive文件加载为hiveConf对象HiveConf hiveConf = new HiveConf();hiveConf.addResource(Thread.currentThread().getContextClassLoader().getResourceAsStream("hive-site.xml"));// hiveConf.addResource("hive-site.xml");
// hiveConf.addResource(new URL("file:///home/atguigu/dga/hive-site.xml"));IMetaStoreClient client = null;try { //创建客户端client = RetryingMetaStoreClient.getProxy(hiveConf, true);} catch (Exception e) {throw new RuntimeException(e);}return client;}//根据库名 表名 获得某个表的元数据private TableMetaInfo getTableMeta(String schemaName, String tableName){TableMetaInfo tableMetaInfo = new TableMetaInfo();try {Table table = hiveClient.getTable(schemaName, tableName);// 把table中的元数据 提取到 tableMetaInfoSystem.out.println(table);System.out.println();//表名tableMetaInfo.setTableName(tableName);//库名tableMetaInfo.setSchemaName(schemaName);// 过滤掉不需要的PropertyPreFilters.MySimplePropertyPreFilter mySimplePropertyPreFilter = new PropertyPreFilters().addFilter("comment", "name", "type");//字段名json ( 来源:hive)tableMetaInfo.setColNameJson(JSON.toJSONString(table.getSd().getCols(),mySimplePropertyPreFilter) ); // 字段名 ,字段类型,备注//分区字段名json( 来源:hive)tableMetaInfo.setPartitionColNameJson(JSON.toJSONString(table.getPartitionKeys(),mySimplePropertyPreFilter));//hdfs所属人 ( 来源:hive)tableMetaInfo.setTableFsOwner(table.getOwner());//参数信息 ( 来源:hive)tableMetaInfo.setTableParametersJson(JSON.toJSONString(table.getParameters()));//表备注tableMetaInfo.setTableComment(table.getParameters().get("comment"));//hdfs路径tableMetaInfo.setTableFsPath(table.getSd().getLocation());//输入格式tableMetaInfo.setTableInputFormat(table.getSd().getInputFormat());//输出格式tableMetaInfo.setTableOutputFormat(table.getSd().getOutputFormat());//行格式tableMetaInfo.setTableRowFormatSerde(table.getSd().getSerdeInfo().getSerializationLib());String tableCreateDate = DateFormatUtils.format(new Date(table.getCreateTime() * 1000L), "yyyy-MM-dd HH:mm:ss");// Date date = DateUtils.parseDate(tableCreateDate, "yyyy-MM-dd HH:mm:ss");//表创建时间tableMetaInfo.setTableCreateTime( tableCreateDate );//表类型tableMetaInfo.setTableType(table.getTableType());if(table.getSd().getBucketCols().size()>0){//分桶列tableMetaInfo.setTableBucketColsJson(JSON.toJSONString(table.getSd().getBucketCols()));//分桶个数tableMetaInfo.setTableBucketNum(table.getSd().getNumBuckets()+0L);//排序列tableMetaInfo.setTableSortColsJson(JSON.toJSONString(table.getSd().getSortCols()));}} catch (TException e) {throw new RuntimeException(e);}return tableMetaInfo;}public void addHdfsInfo(TableMetaInfo tableMetaInfo){try {FileSystem fileSystem = FileSystem.get(new URI(tableMetaInfo.getTableFsPath()), new Configuration(), tableMetaInfo.getTableFsOwner());FileStatus[] fileStatuses = fileSystem.listStatus(new Path(tableMetaInfo.getTableFsPath())); //listStatus获取某个目录下的文件或文件夹集合//进行递归 遍历,hdfs文件 总副本大小等等信息addFileInfo(fileStatuses,tableMetaInfo,fileSystem);//作用不大//当前文件系统容量//tableMetaInfo.setFsCapcitySize( fileSystem.getStatus().getCapacity() );//当前文件系统剩余量//tableMetaInfo.setFsRemainSize( fileSystem.getStatus().getRemaining() );//当前文件系统使用量//tableMetaInfo.setFsUsedSize( fileSystem.getStatus().getUsed() );} catch (Exception e) {throw new RuntimeException(e);}}// 作业 : tableMetaInfo 最近访问时间 , 最近修改时间, 总副本大小// 递归处理 , 每个递归方法 一定是一个 叶子节点的收敛处理 和 分支节点的继续下探 ( java 下探就增加方法栈的深度, 默认1w个左右 如果过深会造成栈溢出)public void addFileInfo(FileStatus[] fileStatuses , TableMetaInfo tableMetaInfo,FileSystem fileSystem) throws IOException {//遍历所有 filestatusfor (FileStatus fileStatus : fileStatuses) {if(! fileStatus.isDirectory()){ // 文件 取大小 累加tableSizelong accessTime = fileStatus.getAccessTime();long modificationTime = fileStatus.getModificationTime();short replication = fileStatus.getReplication();long filesize = fileStatus.getLen();//文件字节数tableMetaInfo.setTableSize(tableMetaInfo.getTableSize()+filesize); //累加到表的总大小tableMetaInfo.setTableTotalSize(tableMetaInfo.getTableTotalSize()+ filesize*replication); //总副本大小//取最近的修改时间if(tableMetaInfo.getTableLastModifyTime()==null){tableMetaInfo.setTableLastModifyTime( new Date(modificationTime));} else if (tableMetaInfo.getTableLastModifyTime().getTime()<modificationTime) {tableMetaInfo.setTableLastModifyTime(new Date(modificationTime));}//取最近的访问时间if(tableMetaInfo.getTableLastAccessTime()==null){tableMetaInfo.setTableLastAccessTime( new Date(accessTime));} else if (tableMetaInfo.getTableLastAccessTime().getTime()<accessTime) {tableMetaInfo.setTableLastAccessTime(new Date(accessTime));}} else{// 文件夹 获得文件夹下的Filestatus 进行递归FileStatus[] subFileStatus = fileSystem.listStatus(fileStatus.getPath());addFileInfo(subFileStatus,tableMetaInfo,fileSystem);}}}public static void main(String[] args) {HiveMetaTest hiveMetaTest = new HiveMetaTest();TableMetaInfo tableMeta = hiveMetaTest.getTableMeta("test_db_1", "orders");hiveMetaTest.addHdfsInfo(tableMeta);System.out.println("tableMeta = " + tableMeta);}
}