数据集展示
7369 | SMITH | CLERK | 7902 | 1980/12/17 | 800 | 20 | |
7499 | ALLEN | SALESMAN | 7698 | 1981/2/20 | 1600 | 300 | 30 |
7521 | WARD | SALESMAN | 7698 | 1981/2/22 | 1250 | 500 | 30 |
7566 | JONES | MANAGER | 7839 | 1981/4/2 | 2975 | 20 | |
7654 | MARTIN | SALESMAN | 7698 | 1981/9/28 | 1250 | 1400 | 30 |
7698 | BLAKE | MANAGER | 7839 | 1981/5/1 | 2850 | 30 | |
7782 | CLARK | MANAGER | 7839 | 1981/6/9 | 2450 | 10 | |
7788 | SCOTT | ANALYST | 7566 | 1987/4/19 | 3000 | 20 | |
7839 | KING | PRESIDENT | 1981/11/17 | 5000 | 10 | ||
7844 | TURNER | SALESMAN | 7698 | 1981/9/8 | 1500 | 0 | 30 |
7876 | ADAMS | CLERK | 7788 | 1987/5/23 | 1100 | 20 | |
7900 | JAMES | CLERK | 7698 | 1981/12/3 | 950 | 30 | |
7902 | FORD | ANALYST | 7566 | 1981/12/3 | 3000 | 20 | |
7934 | MILLER | CLERK | 7782 | 1982/1/23 | 1300 | 10 |
建立不同的java类
SalaryTotalMain、SalaryTotalMapper、SalaryTotalReducer这三个类
对应代码如下:
SalaryTotalMain
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class SalaryTotalMain {public static void main(String[] args) throws Exception {Configuration conf = new Configuration();Job job = Job.getInstance(conf);job.setJarByClass(SalaryTotalMain.class);job.setMapperClass(SalaryTotalMapper.class);job.setReducerClass(SalaryTotalReducer.class);job.setMapOutputKeyClass(LongWritable.class);job.setMapOutputValueClass(LongWritable.class);job.setOutputKeyClass(LongWritable.class);job.setOutputValueClass(LongWritable.class);FileInputFormat.setInputPaths(job, new Path(args[0]));FileOutputFormat.setOutputPath(job, new Path(args[1]));job.waitForCompletion(true);}
}
SalaryTotalMapper
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
public class SalaryTotalMapper extends Mapper<LongWritable,Text, LongWritable, LongWritable> {@Overrideprotected void map(LongWritable key1,Text value1, Context context)throws IOException, InterruptedException {String data = value1.toString();String[] words = data.split(",");context.write(new LongWritable(Integer.parseInt(words[7])),new LongWritable(Integer.parseInt(words[5])));}}
SalaryTotalReducer
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
public class SalaryTotalReducer extends Reducer<LongWritable, LongWritable,LongWritable, LongWritable> {@Overrideprotected void reduce(LongWritable k3, Iterable<LongWritable> v3,Context context) throws IOException,InterruptedException {long total = 0;long max = 0;for(LongWritable v:v3){total=v.get();if(max<total){max=total;}}context.write(k3, new LongWritable(max));}}
命令
hadoop jar 2.jar ch02.SalaryTotalMain /user/data/input/emp.csv /user/data/output/ch2
hadoop jar 包名 主类 输入路径 输出路径
对应结果
学习链接
在Ubuntu上用mapreduce进行词频统计(伪分布式)_mapreduce怎么统计txt文件词频终端-CSDN博客
利用mapreduce统计部门的最高工资_使用mapreduce查询某个部门中薪资最高的员工姓名,如果输出结果的格式为“薪资 员-CSDN博客
hadoop编程之工资序列化排序-CSDN博客
hadoop编程之部门工资求和-CSDN博客
hadoop编程之词频统计-CSDN博客