一.环境描述
spark提交job到yarn报错,业务代码比较简单,通过接口调用获取数据,将数据通过sparksql将数据写入hive中,尝试各种替换hadoop版本,最后拿下
1.hadoop环境
2.项目 pom.xml
spark-submit \
--name GridCorrelationMain \
--master yarn \
--deploy-mode cluster \
--executor-cores 2 \
--executor-memory 4G \
--num-executors 5 \
--driver-memory 2G \
--class cn.zd.maincode.wangge.GridCorrelationMain \
/home/boeadm/zwj/iot/cp-etl-spark-data/target/cp_zhengda_spark_utils-1.0-SNAPSHOT.jareyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE2OTI0MzU5NjgsImlhdCI6MTY5MjM0OTU2Mywic3ViIjo1MjB9.rCmnhF2EhdzH62T7lP3nmxQSxh17PotscxEcZkjL5hk<dependencies><dependency><groupId>org.apache.commons</groupId><artifactId>commons-configuration2</artifactId><version>2.9.0</version></dependency><dependency><groupId>org.apache.spark</groupId><artifactId>spark-core_2.11</artifactId><version>2.3.3</version><exclusions><exclusion><artifactId>hadoop-client</artifactId><groupId>org.apache.hadoop</groupId></exclusion><exclusion><artifactId>slf4j-log4j12</artifactId><groupId>org.slf4j</groupId></exclusion></exclusions></dependency><dependency><groupId>org.apache.spark</groupId><artifactId>spark-sql_2.11</artifactId><version>2.3.3</version><!--<scope>provided</scope>--><!-- <exclusions><exclusion><groupId>com.google.guava</groupId><artifactId>guava</artifactId></exclusion></exclusions>--></dependency><!--<dependency><groupId>com.google.guava</groupId><artifactId>guava</artifactId><version>15.0</version></dependency>
--><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-common</artifactId><version>${hadoop.version}</version><exclusions><exclusion><groupId>commons-codec</groupId><artifactId>commons-codec</artifactId></exclusion><exclusion><groupId>commons-httpclient</groupId><artifactId>commons-httpclient</artifactId></exclusion><!-- <exclusion><groupId>com.google.guava</groupId><artifactId>guava</artifactId></exclusion>--></exclusions><!--<scope>provided</scope>--></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-client</artifactId><version>${hadoop.version}</version><exclusions><exclusion><artifactId>hadoop-common</artifactId><groupId>org.apache.hadoop</groupId></exclusion></exclusions></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-hdfs</artifactId><version>${hadoop.version}</version></dependency><dependency><groupId>org.apache.spark</groupId><artifactId>spark-hive_2.11</artifactId><version>2.3.2</version><exclusions><exclusion><artifactId>hive-exec</artifactId><groupId>org.spark-project.hive</groupId></exclusion><exclusion><artifactId>hive-metastore</artifactId><groupId>org.spark-project.hive</groupId></exclusion></exclusions></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-mapreduce-client-core</artifactId><version>${hadoop.version}</version></dependency><dependency><groupId>org.apache.hive</groupId><artifactId>hive-jdbc</artifactId><exclusions><exclusion><groupId>org.eclipse.jetty.aggregate</groupId><artifactId>jetty-all</artifactId></exclusion><exclusion><groupId>org.apache.hive</groupId><artifactId>hive-shims</artifactId></exclusion><exclusion><artifactId>hbase-mapreduce</artifactId><groupId>org.apache.hbase</groupId></exclusion><exclusion><artifactId>hbase-server</artifactId><groupId>org.apache.hbase</groupId></exclusion><exclusion><artifactId>log4j-slf4j-impl</artifactId><groupId>org.apache.logging.log4j</groupId></exclusion><exclusion><artifactId>slf4j-log4j12</artifactId><groupId>org.slf4j</groupId></exclusion></exclusions><version>2.1.1</version></dependency><!--服务验证相关依赖--><dependency><groupId>org.apache.httpcomponents</groupId><artifactId>httpclient</artifactId><version>4.5.13</version><exclusions><exclusion><groupId>commons-codec</groupId><artifactId>commons-codec</artifactId></exclusion></exclusions><!--<scope>provided</scope>--></dependency><!--本地跑的话 需要这个jar--><dependency><groupId>commons-codec</groupId><artifactId>commons-codec</artifactId><version>1.15</version><!--<scope>provided</scope>--></dependency><dependency><groupId>com.typesafe</groupId><artifactId>config</artifactId><version>1.3.1</version></dependency><!-- https://mvnrepository.com/artifact/com.alibaba/fastjson --><dependency><groupId>com.alibaba</groupId><artifactId>fastjson</artifactId><version>1.2.62</version></dependency><dependency><groupId>com.alibaba</groupId><artifactId>fastjson</artifactId><version>${fastjson.version}</version></dependency><!-- https://mvnrepository.com/artifact/org.json/json --><dependency><groupId>org.json</groupId><artifactId>json</artifactId><version>20160810</version></dependency><dependency><groupId>com.github.qlone</groupId><artifactId>retrofit-crawler</artifactId><version>1.0.0</version></dependency><dependency><groupId>com.oracle.database.jdbc</groupId><artifactId>ojdbc8</artifactId><version>12.2.0.1</version></dependency><!--mysql连接--><dependency><groupId>mysql</groupId><artifactId>mysql-connector-java</artifactId><version>5.1.40</version></dependency><dependency><groupId>javax.mail</groupId><artifactId>javax.mail-api</artifactId><version>1.5.6</version></dependency><dependency><groupId>org.apache.commons</groupId><artifactId>commons-email</artifactId><version>1.4</version></dependency></dependencies>
3.项目集群提交报错
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupRelation(SessionCatalog.scala:696)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveRelations$$lookupTableFromCatalog(Analyzer.scala:730)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.resolveRelation(Analyzer.scala:685)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$8.applyOrElse(Analyzer.scala:715)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$8.applyOrElse(Analyzer.scala:708)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$apply$1.apply(AnalysisHelper.scala:90)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$apply$1.apply(AnalysisHelper.scala:90)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:89)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:194)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.resolveOperatorsUp(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUp(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:326)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:194)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.resolveOperatorsUp(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUp(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1$$anonfun$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:326)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:87)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$$anonfun$resolveOperatorsUp$1.apply(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:194)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.resolveOperatorsUp(AnalysisHelper.scala:86)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsUp(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:708)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:654)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:87)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:84)
at scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:124)
at scala.collection.immutable.List.foldLeft(List.scala:84)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:84)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:76)
at scala.collection.immutable.List.foreach(List.scala:392)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:76)
at org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:127)
at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:121)
at org.apache.spark.sql.catalyst.analysis.Analyzer$$anonfun$executeAndCheck$1.apply(Analyzer.scala:106)
at org.apache.spark.sql.catalyst.analysis.Analyzer$$anonfun$executeAndCheck$1.apply(Analyzer.scala:105)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:201)
at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:105)
at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:57)
at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:55)
at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:47)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:78)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:651)
at cn.zd.maincode.wangge.GridCorrelationMain$.createDataFrameAndTempView(GridCorrelationMain.scala:264)
at cn.zd.maincode.wangge.GridCorrelationMain$.horecaGridInfo(GridCorrelationMain.scala:148)
at cn.zd.maincode.wangge.GridCorrelationMain$.main(GridCorrelationMain.scala:110)
at cn.zd.maincode.wangge.GridCorrelationMain.main(GridCorrelationMain.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:673)
Caused by: java.lang.ExceptionInInitializerError
at org.apache.hadoop.hive.conf.HiveConf.<clinit>(HiveConf.java:105)
at org.apache.spark.sql.hive.client.HiveClientImpl.newState(HiveClientImpl.scala:153)
at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:118)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:292)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:395)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:284)
at org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:68)
at org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:67)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:217)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:217)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:217)
at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:99)
... 72 more
Caused by: java.lang.IllegalArgumentException: Unrecognized Hadoop major version number: 3.0.0-cdh6.3.2
at org.apache.hadoop.hive.shims.ShimLoader.getMajorVersion(ShimLoader.java:169)
at org.apache.hadoop.hive.shims.ShimLoader.loadShims(ShimLoader.java:134)
at org.apache.hadoop.hive.shims.ShimLoader.getHadoopShims(ShimLoader.java:95)
at org.apache.hadoop.hive.conf.HiveConf$ConfVars.<clinit>(HiveConf.java:354)
... 88 moreEnd of LogType:stderr
4.最终解决方式
将相关依赖不打进包中
<dependency><groupId>org.apache.hive</groupId><artifactId>hive-jdbc</artifactId><exclusions><exclusion><groupId>org.eclipse.jetty.aggregate</groupId><artifactId>jetty-all</artifactId></exclusion><exclusion><groupId>org.apache.hive</groupId><artifactId>hive-shims</artifactId></exclusion><exclusion><artifactId>hbase-mapreduce</artifactId><groupId>org.apache.hbase</groupId></exclusion><exclusion><artifactId>hbase-server</artifactId><groupId>org.apache.hbase</groupId></exclusion><exclusion><artifactId>log4j-slf4j-impl</artifactId><groupId>org.apache.logging.log4j</groupId></exclusion><exclusion><artifactId>slf4j-log4j12</artifactId><groupId>org.slf4j</groupId></exclusion></exclusions><version>2.1.1</version></dependency><!--服务验证相关依赖--><dependency><groupId>org.apache.httpcomponents</groupId><artifactId>httpclient</artifactId><version>4.5.13</version><exclusions><exclusion><groupId>commons-codec</groupId><artifactId>commons-codec</artifactId></exclusion></exclusions><!--<scope>provided</scope>--></dependency><!--本地跑的话 需要这个jar--><dependency><groupId>commons-codec</groupId><artifactId>commons-codec</artifactId><version>1.15</version><!--<scope>provided</scope>--></dependency><dependency><groupId>com.typesafe</groupId><artifactId>config</artifactId><version>1.3.1</version></dependency><!-- https://mvnrepository.com/artifact/com.alibaba/fastjson --><dependency><groupId>com.alibaba</groupId><artifactId>fastjson</artifactId><version>1.2.62</version></dependency><dependency><groupId>com.alibaba</groupId><artifactId>fastjson</artifactId><version>${fastjson.version}</version></dependency><!-- https://mvnrepository.com/artifact/org.json/json --><dependency><groupId>org.json</groupId><artifactId>json</artifactId><version>20160810</version></dependency><dependency><groupId>com.github.qlone</groupId><artifactId>retrofit-crawler</artifactId><version>1.0.0</version></dependency><dependency><groupId>com.oracle.database.jdbc</groupId><artifactId>ojdbc8</artifactId><version>12.2.0.1</version></dependency><!--mysql连接--><dependency><groupId>mysql</groupId><artifactId>mysql-connector-java</artifactId><version>5.1.40</version></dependency><!--10月31日 新取消-->
<!-- <dependency><groupId>com.google.guava</groupId><artifactId>guava</artifactId><version>28.0-jre</version></dependency>--><!-- https://mvnrepository.com/artifact/org.apache.directory.studio/org.apache.commons.codec --><!-- https://mvnrepository.com/artifact/org.apache.commons/org.apache.commons.codec --><!--邮件发送依赖--><dependency><groupId>javax.mail</groupId><artifactId>javax.mail-api</artifactId><version>1.5.6</version></dependency><dependency><groupId>org.apache.commons</groupId><artifactId>commons-email</artifactId><version>1.4</version></dependency><!--<dependency><groupId>org.scala-lang</groupId><artifactId>scala-library</artifactId><version>2.11.2</version></dependency><dependency><groupId>org.scala-lang</groupId><artifactId>scala-reflect</artifactId><version>2.11.2</version></dependency><dependency><groupId>org.scala-lang</groupId><artifactId>scala-compiler</artifactId><version>2.11.2</version></dependency>--><!-- <dependency>-->
<!-- <groupId>com.starrocks</groupId>-->
<!-- <artifactId>starrocks-spark2_2.11</artifactId>-->
<!-- <version>1.0.1</version>-->
<!-- </dependency>--></dependencies>