环境:
hadoop 2.7.2
spark-without-hadoop 2.4.6
hive 2.3.4
hive-site.xml
<property><name>hive.execution.engine</name><value>spark</value>
</property>
<property><name>spark.yarn.jars</name><value>hdfs://hadoop03:9000/spark-jars/*</value>
</property>
./bin/hive
for f in ${SPARK_HOME}/jars/*.jar; doCLASSPATH=${CLASSPATH}:$f;
done
/opt/module/spark-2.4.4-bin-without-hadoop/bin/spark-submit \
--properties-file /tmp/spark-submit.8770948269278745415.properties \
--class org.apache.hive.spark.client.RemoteDriver /opt/module/hive/lib/hive-exec-2.3.4.jar \
--remote-host hadoop03 \
--remote-port 43182 \
--conf hive.spark.client.connect.timeout=1000 \
--conf hive.spark.client.server.connect.timeout=90000 \
--conf hive.spark.client.channel.log.level=null \
--conf hive.spark.client.rpc.max.size=52428800 \
--conf hive.spark.client.rpc.threads=8 \
--conf hive.spark.client.secret.bits=256 \
--conf hive.spark.client.rpc.server.address=null
问题记录
1)java.lang.NoClassDefFoundError: scala/collection/Iterable
https://stackoverflow.com/questions/38345447/apache-hive-exception-noclassdeffounderror-scala-collection-iterable
2) 各种 classNotFound
…