一、项目结构
用到的文件有WordCount.java、core-site.xml、mapreduce-site.xml、yarn-site.xml、log4j.properties、pom.xml
二、项目源码
(1)WordCount.java
package com.mk.mapreduce;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;
import java.util.StringTokenizer;public class WordCount {public static class MyMapper extends Mapper<LongWritable, Text, Text, IntWritable> {@Overrideprotected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {StringTokenizer tokenizer = new StringTokenizer(value.toString());while (tokenizer.hasMoreTokens()){context.write(new Text(tokenizer.nextToken()), new IntWritable(1));}}}public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable>{@Overrideprotected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {int sum = 0;for (IntWritable v: values) {sum += v.get();}context.write(key, new IntWritable(sum));}}public static void main(String[] args) throws Exception {final String uri = "hdfs://192.168.150.128:9000";Configuration conf = new Configuration();if(System.getProperty("os.name").toLowerCase().contains("win"))conf.set("mapreduce.app-submission.cross-platform","true");Job job = new Job(conf, "word count");String jar = ".\\out\\artifacts\\hadoop_test_jar\\hadoop-test.jar";job.setJar(jar);job.setJarByClass(WordCount.class);job.setMapperClass(MyMapper.class);job.setCombinerClass(MyReducer.class);job.setReducerClass(MyReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);FileInputFormat.addInputPath(job, new Path(uri + "/input"));FileOutputFormat.setOutputPath(job, new Path(uri + "/output"));System.exit(job.waitForCompletion(true) ? 0 : 1);}}
(2)core-site.xml
<configuration><property><name>fs.defaultFS</name><value>hdfs://hadoop01:9000</value> </property><property><name>hadoop.tmp.dir</name><value>/home/mk/data/tmp</value></property>
</configuration>
(3)mapreduce-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration><property><name>mapreduce.framework.name</name><value>yarn</value>
</property>
<property><name>mapreduce.jobhistory.address</name><value>hadoop01:10020</value>
</property>
<property><name>mapreduce.jobhistory.webapp.address</name><value>hadoop01:19888</value></property></configuration>
(4)yarn-site.xml
<configuration><property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property><name>yarn.resourcemanager.scheduler.address</name> <value>hadoop01:8030</value> </property> <property> <name>yarn.resourcemanager.resource-tracker.address</name> <value>hadoop01:8025</value> </property><property> <name>yarn.resourcemanager.address</name><value>hadoop01:8040</value></property>
</configuration>
(5)log4j.properties
log4j.rootLogger=INFO, stdout
#log4j.logger.org.springframework=INFO
#log4j.logger.org.apache.activemq=INFO
#log4j.logger.org.apache.activemq.spring=WARN
#log4j.logger.org.apache.activemq.store.journal=INFO
#log4j.logger.org.activeio.journal=INFOlog4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} | %-5.5p | %-16.16t | %-32.32c{1} | %-32.32C %4L | %m%n
(6)pom.xml
<?xml version="1.0" encoding="UTF-8"?><project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"><modelVersion>4.0.0</modelVersion><groupId>com.mk</groupId><artifactId>hadoop-test</artifactId><version>1.0-SNAPSHOT</version><name>hadoop-test</name><url>http://www.mk.com</url><properties><project.build.sourceEncoding>UTF-8</project.build.sourceEncoding><maven.compiler.source>1.7</maven.compiler.source><maven.compiler.target>1.7</maven.compiler.target><project.build.sourceEncoding>UTF-8</project.build.sourceEncoding><hadoop.version>2.6.0</hadoop.version></properties><dependencies><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-client</artifactId><version>${hadoop.version}</version></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-common</artifactId><version>${hadoop.version}</version></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-hdfs</artifactId><version>${hadoop.version}</version></dependency><dependency><groupId>junit</groupId><artifactId>junit</artifactId><version>4.11</version><scope>test</scope></dependency></dependencies><build><pluginManagement><plugins><plugin><artifactId>maven-clean-plugin</artifactId><version>3.1.0</version></plugin><plugin><artifactId>maven-resources-plugin</artifactId><version>3.0.2</version></plugin><plugin><artifactId>maven-compiler-plugin</artifactId><version>3.8.0</version></plugin><plugin><artifactId>maven-surefire-plugin</artifactId><version>2.22.1</version></plugin><plugin><artifactId>maven-jar-plugin</artifactId><version>3.0.2</version></plugin><plugin><artifactId>maven-install-plugin</artifactId><version>2.5.2</version></plugin><plugin><artifactId>maven-deploy-plugin</artifactId><version>2.8.2</version></plugin><!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle --><plugin><artifactId>maven-site-plugin</artifactId><version>3.7.1</version></plugin><plugin><artifactId>maven-project-info-reports-plugin</artifactId><version>3.0.0</version></plugin></plugins></pluginManagement></build>
</project>