aboutsummaryrefslogtreecommitdiff
path: root/bigtop-bigpetstore
diff options
context:
space:
mode:
authorRJ Nowling <rnowling@gmail.com>2015-09-08 13:48:15 -0500
committerRJ Nowling <rnowling@gmail.com>2015-09-08 13:49:19 -0500
commit96a2a4aca547d71eeab36094edff2159cc0bab4f (patch)
tree23bb75823a42f0cd40ec657ba0e22ba58a7893f8 /bigtop-bigpetstore
parent5efdbfbfae25b09f6a706f511abdf04943bb8057 (diff)
BIGTOP-2019. BigPetStore Spark isn't compiling due to changes in SQL API
Diffstat (limited to 'bigtop-bigpetstore')
-rw-r--r--bigtop-bigpetstore/bigpetstore-spark/src/main/scala/org/apache/bigpetstore/spark/analytics/PetStoreStatistics.scala17
1 files changed, 9 insertions, 8 deletions
diff --git a/bigtop-bigpetstore/bigpetstore-spark/src/main/scala/org/apache/bigpetstore/spark/analytics/PetStoreStatistics.scala b/bigtop-bigpetstore/bigpetstore-spark/src/main/scala/org/apache/bigpetstore/spark/analytics/PetStoreStatistics.scala
index 2d376a40..e7e5b087 100644
--- a/bigtop-bigpetstore/bigpetstore-spark/src/main/scala/org/apache/bigpetstore/spark/analytics/PetStoreStatistics.scala
+++ b/bigtop-bigpetstore/bigpetstore-spark/src/main/scala/org/apache/bigpetstore/spark/analytics/PetStoreStatistics.scala
@@ -68,7 +68,7 @@ object PetStoreStatistics {
def queryTxByMonth(sqlContext: SQLContext): Array[StatisticsTxByMonth] = {
import sqlContext._
- val results: SchemaRDD = sql("SELECT count(*), month FROM Transactions GROUP BY month")
+ val results: DataFrame = sql("SELECT count(*), month FROM Transactions GROUP BY month")
val transactionsByMonth = results.collect()
for(x<-transactionsByMonth){
println(x)
@@ -82,7 +82,7 @@ object PetStoreStatistics {
def queryTxByProductZip(sqlContext: SQLContext): Array[StatisticsTxByProductZip] = {
import sqlContext._
- val results: SchemaRDD = sql(
+ val results: DataFrame = sql(
"""SELECT count(*) c, productId, zipcode
FROM Transactions t
JOIN Stores s ON t.storeId = s.storeId
@@ -104,7 +104,7 @@ GROUP BY productId, zipcode""")
def queryTxByProduct(sqlContext: SQLContext): Array[StatisticsTxByProduct] = {
import sqlContext._
- val results: SchemaRDD = sql(
+ val results: DataFrame = sql(
"""SELECT count(*) c, productId FROM Transactions GROUP BY productId""")
val groupedProducts = results.collect()
@@ -121,16 +121,17 @@ GROUP BY productId, zipcode""")
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
import sqlContext._
+ import sqlContext.implicits._
// Transform the Non-SparkSQL Calendar into a SparkSQL-friendly field.
val mappableTransactions:RDD[TransactionSQL] =
r._5.map { trans => trans.toSQL() }
- r._1.registerTempTable("Locations")
- r._2.registerTempTable("Stores")
- r._3.registerTempTable("Customers")
- r._4.registerTempTable("Product")
- mappableTransactions.registerTempTable("Transactions")
+ r._1.toDF().registerTempTable("Locations")
+ r._2.toDF().registerTempTable("Stores")
+ r._3.toDF().registerTempTable("Customers")
+ r._4.toDF().registerTempTable("Product")
+ mappableTransactions.toDF().registerTempTable("Transactions")
val txByMonth = queryTxByMonth(sqlContext)