我正在使用火花 2.0.0 。这是我的代码:
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
object WikiDataframe {
def getDataframe(sparkSession: SparkSession): DataFrame = {
val df = sparkSession.read.option("header", "true").option("inferSchema", "true").csv(FILE_LOCATION)
df.registerTempTable("pageviews_by_second")
df
}
def main(args: Array[String]) {
val sparkSession = SparkSession
.builder()
.appName("Spark SQL Example")
.master("local")
.getOrCreate()
val pageViewsDF = WikiDataframe.getDataframe(sparkSession)
val query: DataFrame = sparkSession.sql("select Date from (select * from pageviews_by_second ) a")
var logicalQuery: LogicalPlan = query.queryExecution.logical
println("logicalQuery : " + logicalQuery);
import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
println("Eliminating sub queries");
logicalQuery = EliminateSubqueryAliases.apply(logicalQuery)
}
}
我坚持执行logicalQuery
. 我想得到dataframe
或dataset
如果可能的话。任何帮助,将不胜感激