目录
- 前置
- HBase数据压缩
- 效果
- 获取数据(反序列化)
前置
安装说明
使用说明
HBaseDDL和DML操作
HBase数据压缩
问题
在上文的datain中原文
每次写入数据会写入4个单元格的内容,现在希望能对其进行筛减,合并成1格,减少存储空间(序列化)
datain2
此处仅修改了插入方法之前的内容,将数据合并,对于插入方法没有做修改,其他内容仿照之前的即可。
package org.wunaiieq;import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
import org.wunaiieq.util.MyPhone;import java.text.SimpleDateFormat;
import java.util.*;public class datain2 {public static Connection connection = HBaseConnection.connection;public static Random random=new Random();public static SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");public static String getPhone(String prefix) {return prefix + String.format("%08d", random.nextInt(99999999));}private static String getData(int year) {Calendar calendar = Calendar.getInstance();calendar.set(year, 0, 1);calendar.add(Calendar.MONTH, random.nextInt(12));calendar.add(Calendar.DAY_OF_MONTH, random.nextInt(31));calendar.add(Calendar.HOUR_OF_DAY, random.nextInt(12));Date time = calendar.getTime();return simpleDateFormat.format(time);}public static void main(String[] args) throws Exception {Table table = connection.getTable(TableName.valueOf("wunaiieq", "phone_log"));List<Put> putList =new ArrayList<Put>();//10个用户for (int i = 0; i < 10 ; i++) {String phonenumber = getPhone("158");for (int j = 0; j < 1000; j++) {putList.clear();String dnum = getPhone("199");int length = random.nextInt(200) + 1;int type = random.nextInt(2);String date = getData(2050);String rowkey = phonenumber + "_" + (Long.MAX_VALUE - simpleDateFormat.parse(date).getTime()) + i + j;Put put =new Put(Bytes.toBytes(rowkey));//构造器MyPhone.Phone.Builder builder = MyPhone.Phone.newBuilder();//数据写入builder.setDnum(dnum);builder.setLength(length);builder.setDate(date);builder.setType(type);//构造器创建phone对象MyPhone.Phone phone = builder.build();put.addColumn(Bytes.toBytes("basic"),Bytes.toBytes("info"),phone.toByteArray());putList.add(put);}table.put(putList);}}}
其他内容
修改pom.xml——增加对protobuf的依赖
导入myPhone.java——protobuf生成的java文件
编写HbaseDML.java(参考专栏博客)
效果
原有大小
更新后
获取数据(反序列化)
数据存储格式已经修改,因此获取数据的方式需要改变
Using2.java
package org.wunaiieq;import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.wunaiieq.util.MyPhone;import java.io.IOException;
import java.text.SimpleDateFormat;public class Using2 {public static SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");public static Connection connection0 = HBaseConnection.connection;static Table table;static {try {table = connection0.getTable(TableName.valueOf("wunaiieq", "phone_log"));System.out.println(table);} catch (IOException e) {throw new RuntimeException(e);}}public static void scanProtocBuf(String phoneNumber) throws Exception {String startRow = phoneNumber+"_"+(Long.MAX_VALUE-simpleDateFormat.parse("2099-04-01 00:00:00").getTime());String stopRow = phoneNumber+"_"+(Long.MAX_VALUE-simpleDateFormat.parse("2000-03-01 00:00:00").getTime());Scan scan = new Scan();scan.withStartRow(Bytes.toBytes(startRow));scan.withStopRow(Bytes.toBytes(stopRow),true);//执行查询ResultScanner resultScanner = table.getScanner(scan);//解析resultScannerfor(Result result:resultScanner){Cell[] cells = result.rawCells();//获取值byte[] phoneInfoBytes = CellUtil.cloneValue(cells[0]);//将字节数据中的数据反序列化为MyPhone.Phone对象MyPhone.Phone phone = MyPhone.Phone.parseFrom(phoneInfoBytes);System.out.print(phone.getDnum()+"--");System.out.print(phone.getType()+"--");System.out.print(phone.getLength()+"--");System.out.println(phone.getDate()+"--");}}public static void main(String[] args) throws Exception {String phoneNumber0 = "15894163362";scanProtocBuf(phoneNumber0);String phoneNumber1 = "15898559729";scanProtocBuf(phoneNumber1);String phoneNumber2 = "15807236902";scanProtocBuf(phoneNumber2);}
}