HBase MapReduce

1. HBase to HBase

Mapper 继承 TableMapper,输入为Rowkey和Result.

public abstract class TableMapper<KEYOUT, VALUEOUT> extends Mapper<ImmutableBytesWritable, Result, KEYOUT, VALUEOUT> {
public TableMapper() {
}
}
package com.scb.jason.mapper;import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.Mapper;import java.io.IOException;/*** Created by Administrator on 2017/8/16.*/
public class User2BasicMapper extends TableMapper<ImmutableBytesWritable, Put> {private ImmutableBytesWritable mapOutputkey = new ImmutableBytesWritable();@Overridepublic void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {//Get rowKey
        mapOutputkey.set(key.get());Put put = new Put(key.get());for(Cell cell:value.rawCells()){if("info".equals(Bytes.toString(CellUtil.cloneFamily(cell)))){if("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){put.add(cell);}if("age".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){put.add(cell);}}}context.write(mapOutputkey,put);}}

Reducer 继承 TableReducer

public abstract class TableReducer<KEYIN, VALUEIN, KEYOUT> extends Reducer<KEYIN, VALUEIN, KEYOUT, Mutation> {
public TableReducer() {
}
}
package com.scb.jason.reducer;import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;import java.io.IOException;/*** Created by Administrator on 2017/8/16.*/
public class User2BasicReducer extends TableReducer<ImmutableBytesWritable, Put, ImmutableBytesWritable>{@Overrideprotected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {for(Put put:values){context.write(null,put);}}
}

Driver

package com.scb.jason.driver;import com.scb.jason.mapper.User2BasicMapper;
import com.scb.jason.reducer.User2BasicReducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;import java.io.IOException;/*** Created by Administrator on 2017/8/16.*/
public class User2BasicDriver extends Configured implements Tool{public int run(String[] strings) throws Exception {Job job = Job.getInstance(this.getConf(),this.getClass().getSimpleName());job.setJarByClass(this.getClass());Scan scan = new Scan();scan.setCaching(500);        // 1 is the default in Scan, which will be bad for MapReduce jobsscan.setCacheBlocks(false);  // don't set to true for MR jobs// set other scan attrs
TableMapReduceUtil.initTableMapperJob("user",      // input tablescan,             // Scan instance to control CF and attribute selectionUser2BasicMapper.class,   // mapper classImmutableBytesWritable.class,             // mapper output keyPut.class,             // mapper output value
                job);TableMapReduceUtil.initTableReducerJob("basic",      // output tableUser2BasicReducer.class,             // reducer class
                job);job.setNumReduceTasks(1);boolean isSuccess = job.waitForCompletion(true);return isSuccess?1:0;}public static void main(String[] args) throws Exception {Configuration configuration = HBaseConfiguration.create();int status = ToolRunner.run(configuration,new User2BasicDriver(),args);System.exit(status);}}

 2. HBase to File

Mapper

package com.scb.jason.mapper;import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;import java.io.IOException;/*** Created by Administrator on 2017/8/16.*/
public class User2FileMapper extends TableMapper<Text, Text> {private Text rowKeyText = new Text();private Text valueText = new Text();@Overridepublic void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {//Get rowKey
        rowKeyText.set(key.get());Put put = new Put(key.get());byte[] inforName = null;byte[] inforAge = null;for(Cell cell:value.rawCells()){if("info".equals(Bytes.toString(CellUtil.cloneFamily(cell)))){if("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){inforName = CellUtil.cloneValue(cell);}if("age".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){inforAge = CellUtil.cloneValue(cell);}}}valueText.set(new String(inforName)+"\t"+new String(inforAge));context.write(rowKeyText,valueText);}}

No Reducer Reducer

Driver

package com.scb.jason.driver;import com.scb.jason.mapper.User2BasicMapper;
import com.scb.jason.mapper.User2FileMapper;
import com.scb.jason.reducer.User2BasicReducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;/*** Created by Administrator on 2017/8/16.*/
public class User2FileDriver extends Configured implements Tool{public int run(String[] args) throws Exception {Job job = Job.getInstance(this.getConf(),this.getClass().getSimpleName());job.setJarByClass(this.getClass());Scan scan = new Scan();scan.setCaching(500);        // 1 is the default in Scan, which will be bad for MapReduce jobsscan.setCacheBlocks(false);  // don't set to true for MR jobs// set other scan attrs
TableMapReduceUtil.initTableMapperJob("user",      // input tablescan,             // Scan instance to control CF and attribute selectionUser2FileMapper.class,   // mapper classText.class,             // mapper output keyText.class,             // mapper output value
                job);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(Text.class);job.setOutputFormatClass(TextOutputFormat.class);FileOutputFormat.setOutputPath(job,new Path(args[0]));job.setNumReduceTasks(1);boolean isSuccess = job.waitForCompletion(true);return isSuccess?1:0;}public static void main(String[] args) throws Exception {Configuration configuration = HBaseConfiguration.create();int status = ToolRunner.run(configuration,new User2FileDriver(),args);System.exit(status);}}

3. File to HBase

Driver

package com.scb.jason.driver;import com.scb.jason.mapper.File2HbaseMapper;
import com.scb.jason.mapper.User2BasicMapper;
import com.scb.jason.reducer.File2HBaseReducer;
import com.scb.jason.reducer.User2BasicReducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;/*** Created by Administrator on 2017/8/16.*/
public class File2BasicDriver extends Configured implements Tool{public int run(String[] strings) throws Exception {Job job = Job.getInstance(this.getConf(),this.getClass().getSimpleName());job.setJarByClass(this.getClass());job.setMapperClass(File2HbaseMapper.class);FileInputFormat.addInputPath(job,new Path("F:\\Workspace\\File"));job.setMapOutputKeyClass(ImmutableBytesWritable.class);job.setMapOutputValueClass(Put.class);TableMapReduceUtil.initTableReducerJob("basic",      // output tableFile2HBaseReducer.class,             // reducer class
                job);job.setNumReduceTasks(1);boolean isSuccess = job.waitForCompletion(true);return isSuccess?1:0;}public static void main(String[] args) throws Exception {Configuration configuration = HBaseConfiguration.create();int status = ToolRunner.run(configuration,new File2BasicDriver(),args);System.exit(status);}}

Mapper

package com.scb.jason.mapper;import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;import java.io.IOException;
import java.util.StringTokenizer;/*** Created by Administrator on 2017/8/17.*/
public class File2HbaseMapper extends Mapper<LongWritable,Text,ImmutableBytesWritable,Put> {private ImmutableBytesWritable mapOutputkey = new ImmutableBytesWritable();@Overridepublic void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {String lineValue = value.toString();StringTokenizer stringTokenizer = new StringTokenizer(lineValue);String rowkey = stringTokenizer.nextToken();String name = stringTokenizer.nextToken();String age = stringTokenizer.nextToken();Put put = new Put(Bytes.toBytes(rowkey));put.add(Bytes.toBytes("info"),Bytes.toBytes("name"),Bytes.toBytes(name));put.add(Bytes.toBytes("info"),Bytes.toBytes("age"),Bytes.toBytes(age));mapOutputkey.set(Bytes.toBytes(key.get()));context.write(mapOutputkey,put);}}

Reducer

package com.scb.jason.reducer;import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;import java.io.IOException;/*** Created by Administrator on 2017/8/25.*/
public class File2HBaseReducer extends TableReducer<ImmutableBytesWritable, Put, ImmutableBytesWritable> {@Overrideprotected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {for(Put put:values){context.write(null,put);}}}

 4. HBase to RDBMS

public static class MyRdbmsReducer extends Reducer<Text, IntWritable, Text, IntWritable>  {private Connection c = null;public void setup(Context context) {// create DB connection...
  }public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {// do summarization// in this example the keys are Text, but this is just an example
  }public void cleanup(Context context) {// close db connection
  }}

5. File -> HFile ->  HBase 批量导入

http://www.cnblogs.com/shitouer/archive/2013/02/20/hbase-hfile-bulk-load.html

 

转载于:https://www.cnblogs.com/xdlaoliu/p/7406789.html

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.mzph.cn/news/371395.shtml

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

第六期的知识点

1.volatile详解 在应用程序中&#xff0c;volatile主要是被设计用来修饰被不同线程访问和修改的变量 .volatile的本意是“易变的” 因为访问寄存器要比访问内存单元快的多,所以编译器一般都会作减少存取内存的优化&#xff0c;但有可能会读脏数据。当要求使用volatile声明变量值…

在DelayQueue中更改延迟,从而更改顺序

因此&#xff0c;我正在研究构建一个简单的对象缓存&#xff0c;该缓存在给定时间后会使对象过期。 显而易见的机制是使用Java并发包中的DelayedQueue类。 但我想知道是否有可能在将对象添加到队列后更新延迟。 看一下Delayed接口&#xff0c;似乎没有充分的理由不在文档中&…

vi编辑器服务器维护,vi编辑器有哪几种工作模式及如何转换_网站服务器运行维护,vi编辑器,工作模式...

整理分享一些 Linux思维导图(值得收藏)_网站服务器运行维护本篇文章整理分享了一些 Linux思维导图(值得收藏)。有一定的参考价值&#xff0c;有需要的朋友可以参考一下&#xff0c;希望对大家有所帮助。vi编辑器有三种基本的工作模式&#xff0c;分别是&#xff1a;指令行模式、…

(八)cmockery中的calculator和run_tests函数的注释代码

所分析的calculator.c和calculator_test.c文件位于 工程中的 cmockery/src/example/ 目录下&#xff0c;是一个相对而言比较全面的样例程序&#xff0c;用到了cmockery项目中的大多数单元测试方法。基本上涵盖了之前所有的样例程序中的用法&#xff0c;还有两组测试是database操…

家用双wan口路由器推荐_请推荐双WAN口的有线千兆硬路由器?

利益相关&#xff1a;TP-LINK一线销售人员(来看看会不会有推荐我司产品的2333 )路由器&#xff1a;TL-ER3220G&#xff0c;带机量300终端&#xff0c;可管理50个AP&#xff0c;最大支持四条宽带接入POE交换机&#xff1a;TL-SF1005P(5口百兆) TL-SG1005P(5口千兆) TL-SF1009PH(…

第一章魔兽窗口

开始显示第一个窗体 用户直接点登陆的话就会提示用户名不能为空密码不能为空 没有账号的话只能先注册&#xff0c;点击蓝色摁钮进入下一个窗体 这里有判断是否为空&#xff0c;注册成功后利用窗体传值&#xff0c;并且打开第一个窗口 把注册的用户名和密码写上去就可以的登陆到…

Apache Digester示例–轻松配置

解决问题–硬编码&#xff0c;需要为您的应用程序创建自定义配置&#xff0c;例如struts配置文件&#xff0c;以仅通过更改文件来改变应用程序行为。 Apache Digester可以轻松为您完成此任务。 使用Apache Digester相当容易将XML文档转换为相应的Java bean对象层次结构。 请参阅…

腾讯云搭svn服务器,腾讯云使用笔记二: 安装svn服务器及web同步

A01&#xff1a;安装subversionsudo apt-get install subversionA02:创建仓库很多目录可以放subversion文件仓库&#xff0c;最常见的是/usr/local/svn和/home/svnsudo mkdir -p /home/svn/youshengyousesudo svnadmin create /home/svn/youshengyouse//说明&#xff1a;先创建…

python将图像转换为8位单通道_使用Python将图片转换为单通道黑白图片

本文介绍如何使用python将图片转换为纯黑白的单通道图片。文中用到的脚本支持彩色、灰度、带alpha通道的输入图片以及SVG矢量图&#xff0c;支持调整输出图片大小以及设置灰度阈值。最后介绍如何输出SSD1306 OLED显示屏可用的XBM文件&#xff0c;并利用输出的XBM数据在0.96寸的…

Java FlameGraph 火焰图

上周一个偶然的机会听同事提到了Java FlameGraph&#xff0c;刚实验了一下&#xff0c;效果非常好。 一、什么是FlameGraph 直接看图说话。FlameGraph 是 SVG格式&#xff0c;矢量图&#xff0c;可以随意扩大缩小&#xff0c;看不清的信息可以放大看。图中&#xff0c;各种红橙…

ADB 常用命令

获取Android设备号 adb shell getprop ro.serialno 获取系统版本 adb shell getprop ro.build.version.release>4.2.2 获取系统api版本 adb shell getprop ro.build.version.sdk>17 获取设备分辨率&#xff08;SDK4.3&#xff09; adb shell wm size获取设备屏幕密度&am…

哪个Java线程消耗了我的CPU?

当您的Java应用程序占用100&#xff05;的CPU时&#xff0c;您该怎么办&#xff1f; 事实证明&#xff0c;您可以使用内置的UNIX和JDK工具轻松找到有问题的线程。 不需要探查器或代理。 为了进行测试&#xff0c;我们将使用以下简单程序&#xff1a; public class Main {publi…

烟草局计算机笔试,2020年广西南宁烟草局什么时候笔试?

最近广西烟草局各地市社招通知频发&#xff0c;南宁烟草局报名截止至今都无任何消息&#xff0c;根据往年的考情&#xff0c;通知近期很大可能会发布&#xff0c;将于6月底完成笔面!你备考好了吗&#xff1f;今天广西中公国企小编来给大家说一下南宁烟草局社招的笔试内容及备考…

JAVA Swing 组件演示***

下面是Swing组件的演示&#xff1a; package a_swing;import java.awt.BorderLayout; import java.awt.Color; import java.awt.Container; import java.awt.Cursor; import java.awt.Dimension; import java.awt.GridLayout; import java.awt.event.ActionEvent; import java.…

Spring 3.1缓存和@CacheEvict

我的上一个博客演示了Spring 3.1的Cacheable批注的应用&#xff0c; Cacheable批注用于标记返回值将存储在缓存中的方法。 但是&#xff0c; Cacheable只是Spring的Guy为缓存而设计的一对注释​​中的一个&#xff0c;另一个是CacheEvict 。 像Cacheable一样&#xff0c; Cache…

centos 获取硬件序列号_如何在 Linux 上查找硬件规格

在 Linux 系统上有许多工具可用于查找硬件规格。-- Sk&#xff08;作者&#xff09;在 Linux 系统上有许多工具可用于查找硬件规格。在这里&#xff0c;我列出了四种最常用的工具&#xff0c;可以获取 Linux 系统的几乎所有硬件&#xff08;和软件&#xff09;细节。好在是这些…

位置服务器管理器,查看 DIMM 位置

键入&#xff1a;-> show /System/Memory/DIMMs -t locationTarget | Property | Value-----------------------------------------------------------------------/System/Memory/DIMMs/ | location | CMIOU0/CM/CMP/BOB00/CH0/DIMM (CPU MemoryDIMM_0 | | IO Unit 0 Memor…

Spring –持久层–编写实体并配置Hibernate

欢迎来到本教程的第二部分。 当您看到本文有多长时间时&#xff0c;请不要惊慌–我向您保证&#xff0c;这主要是简单的POJO和一些生成的代码。 在开始之前&#xff0c;我们需要更新我们的Maven依赖项&#xff0c;因为我们现在将使用Hibernate和Spring。 将以下依赖项添加到pom…

无线服务器主机名是,wifi默认服务器主机名

wifi默认服务器主机名 内容精选换一换以CentOS 7操作系统的弹性云服务器为例&#xff1a;登录Linux弹性云服务器&#xff0c;查看“cloud-init”的配置文件。检查“/etc/cloud/cloud.cfg”文件中“update_hostname”是否被注释或者删除。如果没有被注释或者删除&#xff0c;则需…

pygame里面物体闪烁运动_利用自闪烁发光二极管探究小车在倾斜轨道上的运动规律...

2020年11月23日&#xff0c;周一&#xff0c;24小时安全值班。利用当班中午的时间&#xff0c;微主在创客空间测试了自闪烁发光二极管在匀加速运动中的效果&#xff0c;结果还比较满意。将小车放置在倾斜的轨道上&#xff0c;将自闪烁发光二极管和纽扣电池构成频闪光源&#xf…