­

spark学习笔记:spark sql

  • 2019 年 10 月 5 日
  • 筆記

版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。

本文链接:https://blog.csdn.net/u014365862/article/details/100982613

spark-env.sh环境配置:(添加hive支持)

export JAVA_HOME=/usr/lib/jdk1.8.0_171  export SPARK_HISTORY_OPTS="-Dspark.history.kerberos.enabled=false   -Dspark.history.kerberos.principal=   -Dspark.history.kerberos.keytab="  HADOOP_CONF_DIR=/***/emr-hadoop-2.7.2/etc/hadoop  export HADOOP_HOME=/***/emr-hadoop-2.7.2  export HADOOP_CONF_DIR=/***/emr-hadoop-2.7.2/etc/hadoop  export YARN_CONF_DIR=/***/emr-hadoop-2.7.2/etc/hadoop    export CLASSPATH=$CLASSPATH:/***/emr-apache-hive-2.3.2-bin/lib  export HIVE_CONF_DIR=/***/emr-apache-hive-2.3.2-bin/conf  export SPARK_CLASSPATH=$SPARK_CLASSPATH:/***/emr-apache-hive-2.3.2-bin/lib/mysql-connector-java-5.1.38.jar

scala代码:

import org.apache.spark.sql.SQLContext  import org.apache.spark.{SparkConf, SparkContext}  import org.apache.spark.sql.SparkSession  import org.apache.spark.sql.hive.HiveContext      object SparkConfTrait {        val conf = new SparkConf( ).setAppName( "TestSpark Pipeline" )      val sparkContext = new SparkContext( conf )      val hiveContext = new HiveContext(sparkContext)      val sqlContext = new SQLContext(sparkContext)      val spark = SparkSession.builder().enableHiveSupport.appName("TestSpark").getOrCreate()    }        object SparkSQL{        def sqlFromFile( dataSqlFile:String ): DataFrame = {          val sqlQuery = Source.fromFile( dataSqlFile ).mkString          val dataSqlFrame = SparkConfTrait.spark.sql( sqlQuery )          dataSqlFrame      }          // 测试      def main(args: Array[String]): Unit = {          // val sqlQuery = Source.fromFile("path/to/data.sql").mkString //read file            val trainDataSqlFrame = sqlFromFile( "path/to/data.sql"  )          trainDataSqlFrame.show()        }  }          object HiveQL{        def sqlFromFile( dataSqlFile:String ): DataFrame = {          val sqlQuery = Source.fromFile( dataSqlFile ).mkString          val dataSqlFrame = SparkConfTrait.hiveContext.sql( sqlQuery )          dataSqlFrame      }            // 测试      def main(args: Array[String]): Unit = {          // val sqlQuery = Source.fromFile("path/to/data.sql").mkString //read file            val trainDataSqlFrame = sqlFromFile( "path/to/data.sql" )          trainDataSqlFrame.show()        }  }