org.apache.spark.mllib.recommendation.MatrixFactorizationModel Scala Examples
The following examples show how to use org.apache.spark.mllib.recommendation.MatrixFactorizationModel.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: RecommendationExample.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.{SparkConf, SparkContext} // $example on$ import org.apache.spark.mllib.recommendation.ALS import org.apache.spark.mllib.recommendation.MatrixFactorizationModel import org.apache.spark.mllib.recommendation.Rating // $example off$ object RecommendationExample { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("CollaborativeFilteringExample") val sc = new SparkContext(conf) // $example on$ // Load and parse the data val data = sc.textFile("data/mllib/als/test.data") val ratings = data.map(_.split(',') match { case Array(user, item, rate) => Rating(user.toInt, item.toInt, rate.toDouble) }) // Build the recommendation model using ALS val rank = 10 val numIterations = 10 val model = ALS.train(ratings, rank, numIterations, 0.01) // Evaluate the model on rating data val usersProducts = ratings.map { case Rating(user, product, rate) => (user, product) } val predictions = model.predict(usersProducts).map { case Rating(user, product, rate) => ((user, product), rate) } val ratesAndPreds = ratings.map { case Rating(user, product, rate) => ((user, product), rate) }.join(predictions) val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) => val err = (r1 - r2) err * err }.mean() println("Mean Squared Error = " + MSE) // Save and load model model.save(sc, "target/tmp/myCollaborativeFilter") val sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter") // $example off$ } } // scalastyle:on println
Example 2
Source File: EvaluateResult.scala From learning-spark with Apache License 2.0 | 5 votes |
package com.javachen.grab import org.apache.spark.mllib.recommendation.{ALS, MatrixFactorizationModel, Rating} import org.apache.spark.rdd.RDD object EvaluateResult { def coverage(training: RDD[Rating],userRecommends:RDD[(Int, List[Int])])={ userRecommends.flatMap(_._2).distinct().count.toDouble / training.map(_.product).distinct().count } def popularity(training: RDD[Rating],userRecommends:RDD[(Int, List[Int])])={ var ret = 0.0 var n=0 val item_popularity=training.map{ case Rating(user, product, rate) => (product,(user, rate)) }.groupByKey(4).map{case (product,list)=> (product,list.size) }.collectAsMap() userRecommends.flatMap(_._2).collect().foreach { p => ret = ret + math.log(1 + item_popularity.get(p).get) n = n + 1 } ret/n } def recallAndPrecisionAndF1(training: RDD[Rating],userRecommends:RDD[(Int, List[Int])]):(Double, Double,Double) = { val usersProducts: RDD[(Int, Int)] = training.map { case Rating(user, product, rate) => (user, product) } val groupData=userRecommends.join(usersProducts.groupByKey().map {case (k,v) => (k,v.toList)}) val (hit, testNum, recNum) = groupData.map{ case (user, (mItems, tItems)) => var count = 0 // 计算准确率:推荐命中商品数/实际推荐商品数, topN为推荐上限值 val precNum = mItems.length for (i <- 0 until precNum) if (tItems.contains(mItems(i))) count += 1 (count, tItems.length, precNum) }.reduce( (t1, t2) => (t1._1 + t2._1, t1._2 + t2._2, t1._3 + t2._3) ) val recall: Double = hit * 1.0 / testNum val precision: Double = hit * 1.0 / recNum val f1: Double = 2 * recall * precision / (recall + precision) println(s"$hit,$testNum,$recNum") (recall,precision,f1) } def recallAndPrecision(test:RDD[Rating],result:RDD[Rating]):Double = { val numHit: Long = result.intersection(test).count val recall: Double = numHit * 1.0 / test.count val precision: Double = numHit * 1.0 / result.count val f1: Double = 2 * recall * precision / (recall + precision) System.out.println("recall : " + recall + "\nprecision : " + precision + "\nf1 : " + f1) f1 } }
Example 3
Source File: MatrixFactorizationModelWrapper.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.api.python import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.recommendation.{MatrixFactorizationModel, Rating} import org.apache.spark.rdd.RDD private[python] class MatrixFactorizationModelWrapper(model: MatrixFactorizationModel) extends MatrixFactorizationModel(model.rank, model.userFeatures, model.productFeatures) { def predict(userAndProducts: JavaRDD[Array[Any]]): RDD[Rating] = predict(SerDe.asTupleRDD(userAndProducts.rdd)) def getUserFeatures: RDD[Array[Any]] = { SerDe.fromTuple2RDD(userFeatures.map { case (user, feature) => (user, Vectors.dense(feature)) }.asInstanceOf[RDD[(Any, Any)]]) } def getProductFeatures: RDD[Array[Any]] = { SerDe.fromTuple2RDD(productFeatures.map { case (product, feature) => (product, Vectors.dense(feature)) }.asInstanceOf[RDD[(Any, Any)]]) } def wrappedRecommendProductsForUsers(num: Int): RDD[Array[Any]] = { SerDe.fromTuple2RDD(recommendProductsForUsers(num).asInstanceOf[RDD[(Any, Any)]]) } def wrappedRecommendUsersForProducts(num: Int): RDD[Array[Any]] = { SerDe.fromTuple2RDD(recommendUsersForProducts(num).asInstanceOf[RDD[(Any, Any)]]) } }
Example 4
Source File: RecommendationExample.scala From BigDatalog with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.{SparkContext, SparkConf} // $example on$ import org.apache.spark.mllib.recommendation.ALS import org.apache.spark.mllib.recommendation.MatrixFactorizationModel import org.apache.spark.mllib.recommendation.Rating // $example off$ object RecommendationExample { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("CollaborativeFilteringExample") val sc = new SparkContext(conf) // $example on$ // Load and parse the data val data = sc.textFile("data/mllib/als/test.data") val ratings = data.map(_.split(',') match { case Array(user, item, rate) => Rating(user.toInt, item.toInt, rate.toDouble) }) // Build the recommendation model using ALS val rank = 10 val numIterations = 10 val model = ALS.train(ratings, rank, numIterations, 0.01) // Evaluate the model on rating data val usersProducts = ratings.map { case Rating(user, product, rate) => (user, product) } val predictions = model.predict(usersProducts).map { case Rating(user, product, rate) => ((user, product), rate) } val ratesAndPreds = ratings.map { case Rating(user, product, rate) => ((user, product), rate) }.join(predictions) val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) => val err = (r1 - r2) err * err }.mean() println("Mean Squared Error = " + MSE) // Save and load model model.save(sc, "target/tmp/myCollaborativeFilter") val sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter") // $example off$ } } // scalastyle:on println
Example 5
Source File: MatrixFactorizationModelWrapper.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.api.python import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.recommendation.{MatrixFactorizationModel, Rating} import org.apache.spark.rdd.RDD private[python] class MatrixFactorizationModelWrapper(model: MatrixFactorizationModel) extends MatrixFactorizationModel(model.rank, model.userFeatures, model.productFeatures) { def predict(userAndProducts: JavaRDD[Array[Any]]): RDD[Rating] = predict(SerDe.asTupleRDD(userAndProducts.rdd)) def getUserFeatures: RDD[Array[Any]] = { SerDe.fromTuple2RDD(userFeatures.map { case (user, feature) => (user, Vectors.dense(feature)) }.asInstanceOf[RDD[(Any, Any)]]) } def getProductFeatures: RDD[Array[Any]] = { SerDe.fromTuple2RDD(productFeatures.map { case (product, feature) => (product, Vectors.dense(feature)) }.asInstanceOf[RDD[(Any, Any)]]) } def wrappedRecommendProductsForUsers(num: Int): RDD[Array[Any]] = { SerDe.fromTuple2RDD(recommendProductsForUsers(num).asInstanceOf[RDD[(Any, Any)]]) } def wrappedRecommendUsersForProducts(num: Int): RDD[Array[Any]] = { SerDe.fromTuple2RDD(recommendUsersForProducts(num).asInstanceOf[RDD[(Any, Any)]]) } }
Example 6
Source File: RecommendationExample.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.{SparkConf, SparkContext} // $example on$ import org.apache.spark.mllib.recommendation.ALS import org.apache.spark.mllib.recommendation.MatrixFactorizationModel import org.apache.spark.mllib.recommendation.Rating // $example off$ object RecommendationExample { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("CollaborativeFilteringExample") val sc = new SparkContext(conf) // $example on$ // Load and parse the data val data = sc.textFile("data/mllib/als/test.data") val ratings = data.map(_.split(',') match { case Array(user, item, rate) => Rating(user.toInt, item.toInt, rate.toDouble) }) // Build the recommendation model using ALS val rank = 10 val numIterations = 10 val model = ALS.train(ratings, rank, numIterations, 0.01) // Evaluate the model on rating data val usersProducts = ratings.map { case Rating(user, product, rate) => (user, product) } val predictions = model.predict(usersProducts).map { case Rating(user, product, rate) => ((user, product), rate) } val ratesAndPreds = ratings.map { case Rating(user, product, rate) => ((user, product), rate) }.join(predictions) val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) => val err = (r1 - r2) err * err }.mean() println(s"Mean Squared Error = $MSE") // Save and load model model.save(sc, "target/tmp/myCollaborativeFilter") val sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter") // $example off$ sc.stop() } } // scalastyle:on println
Example 7
Source File: MatrixFactorizationModelWrapper.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.api.python import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.recommendation.{MatrixFactorizationModel, Rating} import org.apache.spark.rdd.RDD private[python] class MatrixFactorizationModelWrapper(model: MatrixFactorizationModel) extends MatrixFactorizationModel(model.rank, model.userFeatures, model.productFeatures) { def predict(userAndProducts: JavaRDD[Array[Any]]): RDD[Rating] = predict(SerDe.asTupleRDD(userAndProducts.rdd)) def getUserFeatures: RDD[Array[Any]] = { SerDe.fromTuple2RDD(userFeatures.map { case (user, feature) => (user, Vectors.dense(feature)) }.asInstanceOf[RDD[(Any, Any)]]) } def getProductFeatures: RDD[Array[Any]] = { SerDe.fromTuple2RDD(productFeatures.map { case (product, feature) => (product, Vectors.dense(feature)) }.asInstanceOf[RDD[(Any, Any)]]) } }
Example 8
Source File: MatrixFactorizationModelWrapper.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.api.python import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.recommendation.{MatrixFactorizationModel, Rating} import org.apache.spark.rdd.RDD private[python] class MatrixFactorizationModelWrapper(model: MatrixFactorizationModel) extends MatrixFactorizationModel(model.rank, model.userFeatures, model.productFeatures) { def predict(userAndProducts: JavaRDD[Array[Any]]): RDD[Rating] = predict(SerDe.asTupleRDD(userAndProducts.rdd)) def getUserFeatures: RDD[Array[Any]] = { SerDe.fromTuple2RDD(userFeatures.map { case (user, feature) => (user, Vectors.dense(feature)) }.asInstanceOf[RDD[(Any, Any)]]) } def getProductFeatures: RDD[Array[Any]] = { SerDe.fromTuple2RDD(productFeatures.map { case (product, feature) => (product, Vectors.dense(feature)) }.asInstanceOf[RDD[(Any, Any)]]) } }
Example 9
Source File: MatrixFactorizationModelWrapper.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.api.python import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.recommendation.{MatrixFactorizationModel, Rating} import org.apache.spark.rdd.RDD private[python] class MatrixFactorizationModelWrapper(model: MatrixFactorizationModel) extends MatrixFactorizationModel(model.rank, model.userFeatures, model.productFeatures) { def predict(userAndProducts: JavaRDD[Array[Any]]): RDD[Rating] = predict(SerDe.asTupleRDD(userAndProducts.rdd)) def getUserFeatures: RDD[Array[Any]] = { SerDe.fromTuple2RDD(userFeatures.map { case (user, feature) => (user, Vectors.dense(feature)) }.asInstanceOf[RDD[(Any, Any)]]) } def getProductFeatures: RDD[Array[Any]] = { SerDe.fromTuple2RDD(productFeatures.map { case (product, feature) => (product, Vectors.dense(feature)) }.asInstanceOf[RDD[(Any, Any)]]) } def wrappedRecommendProductsForUsers(num: Int): RDD[Array[Any]] = { SerDe.fromTuple2RDD(recommendProductsForUsers(num).asInstanceOf[RDD[(Any, Any)]]) } def wrappedRecommendUsersForProducts(num: Int): RDD[Array[Any]] = { SerDe.fromTuple2RDD(recommendUsersForProducts(num).asInstanceOf[RDD[(Any, Any)]]) } }
Example 10
Source File: RecommendationExample.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.{SparkConf, SparkContext} // $example on$ import org.apache.spark.mllib.recommendation.ALS import org.apache.spark.mllib.recommendation.MatrixFactorizationModel import org.apache.spark.mllib.recommendation.Rating // $example off$ object RecommendationExample { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("CollaborativeFilteringExample") val sc = new SparkContext(conf) // $example on$ // Load and parse the data val data = sc.textFile("data/mllib/als/test.data") val ratings = data.map(_.split(',') match { case Array(user, item, rate) => Rating(user.toInt, item.toInt, rate.toDouble) }) // Build the recommendation model using ALS val rank = 10 val numIterations = 10 val model = ALS.train(ratings, rank, numIterations, 0.01) // Evaluate the model on rating data val usersProducts = ratings.map { case Rating(user, product, rate) => (user, product) } val predictions = model.predict(usersProducts).map { case Rating(user, product, rate) => ((user, product), rate) } val ratesAndPreds = ratings.map { case Rating(user, product, rate) => ((user, product), rate) }.join(predictions) val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) => val err = (r1 - r2) err * err }.mean() println("Mean Squared Error = " + MSE) // Save and load model model.save(sc, "target/tmp/myCollaborativeFilter") val sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter") // $example off$ } } // scalastyle:on println
Example 11
Source File: RankingDataProvider.scala From spark-ranking-metrics with The Unlicense | 5 votes |
package com.github.jongwook import org.apache.spark.SparkConf import org.apache.spark.mllib.recommendation.{ALS, MatrixFactorizationModel, Rating} import org.apache.spark.sql.SparkSession import org.scalatest._ object RankingDataProvider { def apply(ratings: Seq[Rating], k: Int = 100): (Seq[Rating], Seq[Rating]) = { val spark = SparkSession.builder().master(new SparkConf().get("spark.master", "local[8]")).getOrCreate() val sc = spark.sparkContext val Array(trainRatings, testRatings) = sc.parallelize(ratings).cache().randomSplit(Array(0.9, 0.1), 0) val model = ALS.trainImplicit(trainRatings, rank = 10, iterations = 2, lambda = 2, blocks = 100, alpha = 10) val testUsers = testRatings.map(_.user).collect().toSet val testUsersBroadcast = spark.sparkContext.broadcast(testUsers) val testUserFeatures = model.userFeatures.filter { case (user, feature) => testUsersBroadcast.value.contains(user) }.repartition(100).cache() val testModel = new MatrixFactorizationModel(model.rank, testUserFeatures, model.productFeatures.repartition(100).cache()) val result = testModel.recommendProductsForUsers(k) val prediction = result.values.flatMap(ratings => ratings).collect() val groundTruth = testRatings.collect() (prediction, groundTruth) } } class RankingDataProvider extends FlatSpec with Matchers { "Ranking Data Provider" should "calculate the rankings" in { val ratings = MovieLensLoader.load() val (prediction, groundTruth) = RankingDataProvider(ratings) prediction.map(_.user).distinct.sorted should equal (groundTruth.map(_.user).distinct.sorted) } }
Example 12
Source File: MatrixFactorizationModelWrapper.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.api.python import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.recommendation.{MatrixFactorizationModel, Rating} import org.apache.spark.rdd.RDD private[python] class MatrixFactorizationModelWrapper(model: MatrixFactorizationModel) extends MatrixFactorizationModel(model.rank, model.userFeatures, model.productFeatures) { def predict(userAndProducts: JavaRDD[Array[Any]]): RDD[Rating] = predict(SerDe.asTupleRDD(userAndProducts.rdd)) def getUserFeatures: RDD[Array[Any]] = { SerDe.fromTuple2RDD(userFeatures.map { case (user, feature) => (user, Vectors.dense(feature)) }.asInstanceOf[RDD[(Any, Any)]]) } def getProductFeatures: RDD[Array[Any]] = { SerDe.fromTuple2RDD(productFeatures.map { case (product, feature) => (product, Vectors.dense(feature)) }.asInstanceOf[RDD[(Any, Any)]]) } def wrappedRecommendProductsForUsers(num: Int): RDD[Array[Any]] = { SerDe.fromTuple2RDD(recommendProductsForUsers(num).asInstanceOf[RDD[(Any, Any)]]) } def wrappedRecommendUsersForProducts(num: Int): RDD[Array[Any]] = { SerDe.fromTuple2RDD(recommendUsersForProducts(num).asInstanceOf[RDD[(Any, Any)]]) } }
Example 13
Source File: RecommendationExample.scala From sparkoscope with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.{SparkConf, SparkContext} // $example on$ import org.apache.spark.mllib.recommendation.ALS import org.apache.spark.mllib.recommendation.MatrixFactorizationModel import org.apache.spark.mllib.recommendation.Rating // $example off$ object RecommendationExample { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("CollaborativeFilteringExample") val sc = new SparkContext(conf) // $example on$ // Load and parse the data val data = sc.textFile("data/mllib/als/test.data") val ratings = data.map(_.split(',') match { case Array(user, item, rate) => Rating(user.toInt, item.toInt, rate.toDouble) }) // Build the recommendation model using ALS val rank = 10 val numIterations = 10 val model = ALS.train(ratings, rank, numIterations, 0.01) // Evaluate the model on rating data val usersProducts = ratings.map { case Rating(user, product, rate) => (user, product) } val predictions = model.predict(usersProducts).map { case Rating(user, product, rate) => ((user, product), rate) } val ratesAndPreds = ratings.map { case Rating(user, product, rate) => ((user, product), rate) }.join(predictions) val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) => val err = (r1 - r2) err * err }.mean() println("Mean Squared Error = " + MSE) // Save and load model model.save(sc, "target/tmp/myCollaborativeFilter") val sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter") // $example off$ } } // scalastyle:on println
Example 14
Source File: MovieRecommendation.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package com.packt.ScalaML.MovieRecommendation import org.apache.spark.sql.SparkSession import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ import org.apache.spark.sql.SQLContext import org.apache.spark.sql.SQLImplicits import org.apache.spark.sql._ import org.apache.spark.sql.Dataset import org.apache.spark.mllib.recommendation.ALS import org.apache.spark.mllib.recommendation.MatrixFactorizationModel import org.apache.spark.mllib.recommendation.Rating import scala.Tuple2 import org.apache.spark.rdd.RDD object MovieRecommendation { //Compute the RMSE to evaluate the model. Less the RMSE better the model and it's prediction capability. def computeRmse(model: MatrixFactorizationModel, data: RDD[Rating], implicitPrefs: Boolean): Double = { val predictions: RDD[Rating] = model.predict(data.map(x => (x.user, x.product))) val predictionsAndRatings = predictions.map { x => ((x.user, x.product), x.rating) }.join(data.map(x => ((x.user, x.product), x.rating))).values if (implicitPrefs) { println("(Prediction, Rating)") println(predictionsAndRatings.take(5).mkString("\n")) } math.sqrt(predictionsAndRatings.map(x => (x._1 - x._2) * (x._1 - x._2)).mean()) } def main(args: Array[String]): Unit = { val spark: SparkSession = SparkSession .builder() .appName("JavaLDAExample") .master("local[*]") .config("spark.sql.warehouse.dir", "E:/Exp/"). getOrCreate() val ratigsFile = "data/ratings.csv" val df1 = spark.read.format("com.databricks.spark.csv").option("header", true).load(ratigsFile) val ratingsDF = df1.select(df1.col("userId"), df1.col("movieId"), df1.col("rating"), df1.col("timestamp")) ratingsDF.show(false) val moviesFile = "data/movies.csv" val df2 = spark.read.format("com.databricks.spark.csv").option("header", "true").load(moviesFile) val moviesDF = df2.select(df2.col("movieId"), df2.col("title"), df2.col("genres")) moviesDF.show(false) ratingsDF.createOrReplaceTempView("ratings") moviesDF.createOrReplaceTempView("movies") var rmseTest = computeRmse(model, testRDD, true) println("Test RMSE: = " + rmseTest) //Less is better //Movie recommendation for a specific user. Get the top 6 movie predictions for user 668 println("Recommendations: (MovieId => Rating)") println("----------------------------------") val recommendationsUser = model.recommendProducts(668, 6) recommendationsUser.map(rating => (rating.product, rating.rating)).foreach(println) println("----------------------------------") spark.stop() } }
Example 15
Source File: RecommendationModelReuse.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package com.packt.ScalaML.MovieRecommendation import org.apache.spark.sql.SparkSession import org.apache.spark.mllib.recommendation.ALS import org.apache.spark.mllib.recommendation.MatrixFactorizationModel import org.apache.spark.mllib.recommendation.Rating import scala.Tuple2 import org.apache.spark.rdd.RDD object RecommendationModelReuse { def main(args: Array[String]): Unit = { val spark: SparkSession = SparkSession .builder() .appName("JavaLDAExample") .master("local[*]") .config("spark.sql.warehouse.dir", "E:/Exp/"). getOrCreate() val ratigsFile = "data/ratings.csv" val ratingDF = spark.read.format("com.databricks.spark.csv").option("header", true).load(ratigsFile) val selectedRatingsDF = ratingDF.select(ratingDF.col("userId"), ratingDF.col("movieId"), ratingDF.col("rating"), ratingDF.col("timestamp")) // Randomly split ratings RDD into training data RDD (75%) and test data RDD (25%) val splits = selectedRatingsDF.randomSplit(Array(0.75, 0.25), seed = 12345L) val testData = splits(1) val testRDD = testData.rdd.map(row => { val userId = row.getString(0) val movieId = row.getString(1) val ratings = row.getString(2) Rating(userId.toInt, movieId.toInt, ratings.toDouble) }) //Load the workflow back val same_model = MatrixFactorizationModel.load(spark.sparkContext, "model/MovieRecomModel/") // Making Predictions. Get the top 6 movie predictions for user 668 println("Rating:(UserID, MovieID, Rating)") println("----------------------------------") val topRecsForUser = same_model.recommendProducts(458, 10) for (rating <- topRecsForUser) { println(rating.toString()) } println("----------------------------------") val rmseTest = MovieRecommendation.computeRmse(same_model, testRDD, true) println("Test RMSE: = " + rmseTest) //Less is better //Movie recommendation for a specific user. Get the top 6 movie predictions for user 668 println("Recommendations: (MovieId => Rating)") println("----------------------------------") val recommendationsUser = same_model.recommendProducts(458, 10) recommendationsUser.map(rating => (rating.product, rating.rating)).foreach(println) println("----------------------------------") spark.stop() } }
Example 16
Source File: RecommenderSystem.scala From recommendersystem with Apache License 2.0 | 5 votes |
package com.infosupport.recommendedcontent.core import java.io.Serializable import akka.actor.{Props, Actor, ActorLogging} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.spark.SparkContext import org.apache.spark.mllib.recommendation.MatrixFactorizationModel private def generateRecommendations(userId: Int, count: Int) = { log.info(s"Generating ${count} recommendations for user with ID ${userId}") // Generate recommendations based on the machine learning model. // When there's no trained model return an empty list instead. val results = model match { case Some(m) => m.recommendProducts(userId,count) .map(rating => Recommendation(rating.product,rating.rating)) .toList case None => Nil } sender ! Recommendations(results) } }
Example 17
Source File: ModelTrainer.scala From recommendersystem with Apache License 2.0 | 5 votes |
package com.infosupport.recommendedcontent.core import akka.actor.{Props, ActorLogging, Actor} import org.apache.spark.SparkContext import org.apache.spark.mllib.recommendation.{Rating, ALS, MatrixFactorizationModel} import com.datastax.spark.connector._ private def trainModel() = { val table = context.system.settings.config.getString("cassandra.table") val keyspace = context.system.settings.config.getString("cassandra.keyspace") // Retrieve the ratings given by users from the database. // Map them to the rating structure needed by the Alternate Least Squares algorithm. val ratings = sc.cassandraTable(keyspace, table).map(record => Rating(record.get[Int]("user_id"), record.get[Int]("item_id"), record.get[Double]("rating"))) // These settings control how well the predictions are going // to fit the actual observations we loaded from Cassandra. // Modify these to optimize the model! val rank = 10 val iterations = 10 val lambda = 0.01 val model = ALS.train(ratings, rank, iterations, lambda) sender ! TrainingResult(model) context.stop(self) } }
Example 18
Source File: MatrixFactorizationModelWrapper.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.api.python import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.recommendation.{MatrixFactorizationModel, Rating} import org.apache.spark.rdd.RDD private[python] class MatrixFactorizationModelWrapper(model: MatrixFactorizationModel) extends MatrixFactorizationModel(model.rank, model.userFeatures, model.productFeatures) { def predict(userAndProducts: JavaRDD[Array[Any]]): RDD[Rating] = predict(SerDe.asTupleRDD(userAndProducts.rdd)) def getUserFeatures: RDD[Array[Any]] = { SerDe.fromTuple2RDD(userFeatures.map { case (user, feature) => (user, Vectors.dense(feature)) }.asInstanceOf[RDD[(Any, Any)]]) } def getProductFeatures: RDD[Array[Any]] = { SerDe.fromTuple2RDD(productFeatures.map { case (product, feature) => (product, Vectors.dense(feature)) }.asInstanceOf[RDD[(Any, Any)]]) } def wrappedRecommendProductsForUsers(num: Int): RDD[Array[Any]] = { SerDe.fromTuple2RDD(recommendProductsForUsers(num).asInstanceOf[RDD[(Any, Any)]]) } def wrappedRecommendUsersForProducts(num: Int): RDD[Array[Any]] = { SerDe.fromTuple2RDD(recommendUsersForProducts(num).asInstanceOf[RDD[(Any, Any)]]) } }