spark学习笔记:spark sql
- 2019 年 10 月 5 日
- 筆記
版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://blog.csdn.net/u014365862/article/details/100982613
spark-env.sh环境配置:(添加hive支持)
export JAVA_HOME=/usr/lib/jdk1.8.0_171 export SPARK_HISTORY_OPTS="-Dspark.history.kerberos.enabled=false -Dspark.history.kerberos.principal= -Dspark.history.kerberos.keytab=" HADOOP_CONF_DIR=/***/emr-hadoop-2.7.2/etc/hadoop export HADOOP_HOME=/***/emr-hadoop-2.7.2 export HADOOP_CONF_DIR=/***/emr-hadoop-2.7.2/etc/hadoop export YARN_CONF_DIR=/***/emr-hadoop-2.7.2/etc/hadoop export CLASSPATH=$CLASSPATH:/***/emr-apache-hive-2.3.2-bin/lib export HIVE_CONF_DIR=/***/emr-apache-hive-2.3.2-bin/conf export SPARK_CLASSPATH=$SPARK_CLASSPATH:/***/emr-apache-hive-2.3.2-bin/lib/mysql-connector-java-5.1.38.jar
scala代码:
import org.apache.spark.sql.SQLContext import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.SparkSession import org.apache.spark.sql.hive.HiveContext object SparkConfTrait { val conf = new SparkConf( ).setAppName( "TestSpark Pipeline" ) val sparkContext = new SparkContext( conf ) val hiveContext = new HiveContext(sparkContext) val sqlContext = new SQLContext(sparkContext) val spark = SparkSession.builder().enableHiveSupport.appName("TestSpark").getOrCreate() } object SparkSQL{ def sqlFromFile( dataSqlFile:String ): DataFrame = { val sqlQuery = Source.fromFile( dataSqlFile ).mkString val dataSqlFrame = SparkConfTrait.spark.sql( sqlQuery ) dataSqlFrame } // 测试 def main(args: Array[String]): Unit = { // val sqlQuery = Source.fromFile("path/to/data.sql").mkString //read file val trainDataSqlFrame = sqlFromFile( "path/to/data.sql" ) trainDataSqlFrame.show() } } object HiveQL{ def sqlFromFile( dataSqlFile:String ): DataFrame = { val sqlQuery = Source.fromFile( dataSqlFile ).mkString val dataSqlFrame = SparkConfTrait.hiveContext.sql( sqlQuery ) dataSqlFrame } // 测试 def main(args: Array[String]): Unit = { // val sqlQuery = Source.fromFile("path/to/data.sql").mkString //read file val trainDataSqlFrame = sqlFromFile( "path/to/data.sql" ) trainDataSqlFrame.show() } }