org.apache.spark.mllib.recommendation.ALS Scala Examples
The following examples show how to use org.apache.spark.mllib.recommendation.ALS.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: RankingDataProvider.scala From spark-ranking-metrics with The Unlicense | 5 votes |
package com.github.jongwook import org.apache.spark.SparkConf import org.apache.spark.mllib.recommendation.{ALS, MatrixFactorizationModel, Rating} import org.apache.spark.sql.SparkSession import org.scalatest._ object RankingDataProvider { def apply(ratings: Seq[Rating], k: Int = 100): (Seq[Rating], Seq[Rating]) = { val spark = SparkSession.builder().master(new SparkConf().get("spark.master", "local[8]")).getOrCreate() val sc = spark.sparkContext val Array(trainRatings, testRatings) = sc.parallelize(ratings).cache().randomSplit(Array(0.9, 0.1), 0) val model = ALS.trainImplicit(trainRatings, rank = 10, iterations = 2, lambda = 2, blocks = 100, alpha = 10) val testUsers = testRatings.map(_.user).collect().toSet val testUsersBroadcast = spark.sparkContext.broadcast(testUsers) val testUserFeatures = model.userFeatures.filter { case (user, feature) => testUsersBroadcast.value.contains(user) }.repartition(100).cache() val testModel = new MatrixFactorizationModel(model.rank, testUserFeatures, model.productFeatures.repartition(100).cache()) val result = testModel.recommendProductsForUsers(k) val prediction = result.values.flatMap(ratings => ratings).collect() val groundTruth = testRatings.collect() (prediction, groundTruth) } } class RankingDataProvider extends FlatSpec with Matchers { "Ranking Data Provider" should "calculate the rankings" in { val ratings = MovieLensLoader.load() val (prediction, groundTruth) = RankingDataProvider(ratings) prediction.map(_.user).distinct.sorted should equal (groundTruth.map(_.user).distinct.sorted) } }
Example 2
Source File: Recommender.scala From awesome-recommendation-engine with Apache License 2.0 | 5 votes |
package example.utils import example.model.AmazonRating import org.apache.spark.SparkContext import org.apache.spark.mllib.recommendation.{ALS, Rating} import org.joda.time.{Seconds, DateTime} import scala.io.Source import scala.util.Random class Recommender(@transient sc: SparkContext, ratingFile: String) extends Serializable { val NumRecommendations = 10 val MinRecommendationsPerUser = 10 val MaxRecommendationsPerUser = 20 val MyUsername = "myself" val NumPartitions = 20 @transient val random = new Random() with Serializable println("Using this ratingFile: " + ratingFile) // first create an RDD out of the rating file val rawTrainingRatings = sc.textFile(ratingFile).map { line => val Array(userId, productId, scoreStr) = line.split(",") AmazonRating(userId, productId, scoreStr.toDouble) } // only keep users that have rated between MinRecommendationsPerUser and MaxRecommendationsPerUser products val trainingRatings = rawTrainingRatings.groupBy(_.userId) .filter(r => MinRecommendationsPerUser <= r._2.size && r._2.size < MaxRecommendationsPerUser) .flatMap(_._2) .repartition(NumPartitions) .cache() println(s"Parsed $ratingFile. Kept ${trainingRatings.count()} ratings out of ${rawTrainingRatings.count()}") // create user and item dictionaries val userDict = new Dictionary(MyUsername +: trainingRatings.map(_.userId).distinct.collect) println("User Dictionary have " + userDict.size + " elements.") val productDict = new Dictionary(trainingRatings.map(_.productId).distinct.collect) println("Product Dictionary have " + productDict.size + " elements.") private def toSparkRating(amazonRating: AmazonRating) = { Rating(userDict.getIndex(amazonRating.userId), productDict.getIndex(amazonRating.productId), amazonRating.rating) } private def toAmazonRating(rating: Rating) = { AmazonRating(userDict.getWord(rating.user), productDict.getWord(rating.product), rating.rating ) } // convert to Spark Ratings using the dictionaries val sparkRatings = trainingRatings.map(toSparkRating) def getRandomProductId = productDict.getWord(random.nextInt(productDict.size)) def predict(ratings: Seq[AmazonRating]) = { // train model val myRatings = ratings.map(toSparkRating) val myRatingRDD = sc.parallelize(myRatings) val startAls = DateTime.now val model = ALS.train((sparkRatings ++ myRatingRDD).repartition(NumPartitions), 10, 20, 0.01) val myProducts = myRatings.map(_.product).toSet val candidates = sc.parallelize((0 until productDict.size).filterNot(myProducts.contains)) // get ratings of all products not in my history ordered by rating (higher first) and only keep the first NumRecommendations val myUserId = userDict.getIndex(MyUsername) val recommendations = model.predict(candidates.map((myUserId, _))).collect val endAls = DateTime.now val result = recommendations.sortBy(-_.rating).take(NumRecommendations).map(toAmazonRating) val alsTime = Seconds.secondsBetween(startAls, endAls).getSeconds println(s"ALS Time: $alsTime seconds") result } }
Example 3
Source File: EvaluateResult.scala From learning-spark with Apache License 2.0 | 5 votes |
package com.javachen.grab import org.apache.spark.mllib.recommendation.{ALS, MatrixFactorizationModel, Rating} import org.apache.spark.rdd.RDD object EvaluateResult { def coverage(training: RDD[Rating],userRecommends:RDD[(Int, List[Int])])={ userRecommends.flatMap(_._2).distinct().count.toDouble / training.map(_.product).distinct().count } def popularity(training: RDD[Rating],userRecommends:RDD[(Int, List[Int])])={ var ret = 0.0 var n=0 val item_popularity=training.map{ case Rating(user, product, rate) => (product,(user, rate)) }.groupByKey(4).map{case (product,list)=> (product,list.size) }.collectAsMap() userRecommends.flatMap(_._2).collect().foreach { p => ret = ret + math.log(1 + item_popularity.get(p).get) n = n + 1 } ret/n } def recallAndPrecisionAndF1(training: RDD[Rating],userRecommends:RDD[(Int, List[Int])]):(Double, Double,Double) = { val usersProducts: RDD[(Int, Int)] = training.map { case Rating(user, product, rate) => (user, product) } val groupData=userRecommends.join(usersProducts.groupByKey().map {case (k,v) => (k,v.toList)}) val (hit, testNum, recNum) = groupData.map{ case (user, (mItems, tItems)) => var count = 0 // 计算准确率:推荐命中商品数/实际推荐商品数, topN为推荐上限值 val precNum = mItems.length for (i <- 0 until precNum) if (tItems.contains(mItems(i))) count += 1 (count, tItems.length, precNum) }.reduce( (t1, t2) => (t1._1 + t2._1, t1._2 + t2._2, t1._3 + t2._3) ) val recall: Double = hit * 1.0 / testNum val precision: Double = hit * 1.0 / recNum val f1: Double = 2 * recall * precision / (recall + precision) println(s"$hit,$testNum,$recNum") (recall,precision,f1) } def recallAndPrecision(test:RDD[Rating],result:RDD[Rating]):Double = { val numHit: Long = result.intersection(test).count val recall: Double = numHit * 1.0 / test.count val precision: Double = numHit * 1.0 / result.count val f1: Double = 2 * recall * precision / (recall + precision) System.out.println("recall : " + recall + "\nprecision : " + precision + "\nf1 : " + f1) f1 } }
Example 4
Source File: RecommendationExample.scala From BigDatalog with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.{SparkContext, SparkConf} // $example on$ import org.apache.spark.mllib.recommendation.ALS import org.apache.spark.mllib.recommendation.MatrixFactorizationModel import org.apache.spark.mllib.recommendation.Rating // $example off$ object RecommendationExample { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("CollaborativeFilteringExample") val sc = new SparkContext(conf) // $example on$ // Load and parse the data val data = sc.textFile("data/mllib/als/test.data") val ratings = data.map(_.split(',') match { case Array(user, item, rate) => Rating(user.toInt, item.toInt, rate.toDouble) }) // Build the recommendation model using ALS val rank = 10 val numIterations = 10 val model = ALS.train(ratings, rank, numIterations, 0.01) // Evaluate the model on rating data val usersProducts = ratings.map { case Rating(user, product, rate) => (user, product) } val predictions = model.predict(usersProducts).map { case Rating(user, product, rate) => ((user, product), rate) } val ratesAndPreds = ratings.map { case Rating(user, product, rate) => ((user, product), rate) }.join(predictions) val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) => val err = (r1 - r2) err * err }.mean() println("Mean Squared Error = " + MSE) // Save and load model model.save(sc, "target/tmp/myCollaborativeFilter") val sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter") // $example off$ } } // scalastyle:on println
Example 5
Source File: RecommendationExample.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.{SparkConf, SparkContext} // $example on$ import org.apache.spark.mllib.recommendation.ALS import org.apache.spark.mllib.recommendation.MatrixFactorizationModel import org.apache.spark.mllib.recommendation.Rating // $example off$ object RecommendationExample { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("CollaborativeFilteringExample") val sc = new SparkContext(conf) // $example on$ // Load and parse the data val data = sc.textFile("data/mllib/als/test.data") val ratings = data.map(_.split(',') match { case Array(user, item, rate) => Rating(user.toInt, item.toInt, rate.toDouble) }) // Build the recommendation model using ALS val rank = 10 val numIterations = 10 val model = ALS.train(ratings, rank, numIterations, 0.01) // Evaluate the model on rating data val usersProducts = ratings.map { case Rating(user, product, rate) => (user, product) } val predictions = model.predict(usersProducts).map { case Rating(user, product, rate) => ((user, product), rate) } val ratesAndPreds = ratings.map { case Rating(user, product, rate) => ((user, product), rate) }.join(predictions) val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) => val err = (r1 - r2) err * err }.mean() println(s"Mean Squared Error = $MSE") // Save and load model model.save(sc, "target/tmp/myCollaborativeFilter") val sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter") // $example off$ sc.stop() } } // scalastyle:on println
Example 6
Source File: ALSDome.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.examples.mllib import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.mllib.recommendation.ALS import org.apache.spark.mllib.recommendation.Rating Rating(userId.toInt, itemId.toInt, rating.toDouble) } //绑定评分数据和个人评分数据 val movieratings = ratings.union(pratings) //使用ALS建立模型,设定rank为5,迭代次数为10以及lambda为0.01 val model = ALS.train(movieratings, 10, 10, 0.01) //在模型上选定一部电影预测我的评分,让我们从电影ID为195的<终结者>开始 model.predict(sc.parallelize(Array((944,195)))).collect.foreach(println) //在模型上选定一部电影预测我的评分,让我们从电影ID为402<人鬼情未了> model.predict(sc.parallelize(Array((944,402)))).collect.foreach(println) //在模型上选定一部电影预测我的评分,让我们从电影ID为148<黑夜幽灵> model.predict(sc.parallelize(Array((944,402)))).collect.foreach(println) } }
Example 7
Source File: ALSExample.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.examples.mllib import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.mllib.recommendation.ALS import org.apache.spark.mllib.recommendation.Rating object ALSExample { def main(args: Array[String]) { //为每个用户进行推荐,推荐的结果可以以用户id为key,结果为value存入redis或者hbase中 val users = data.map(_.split(",") match { case Array(user, product, rate) => (user) }).distinct().collect() //users: Array[String] = Array(4, 2, 3, 1) users.foreach( user => { //依次为用户推荐商品 var rs = model.recommendProducts(user.toInt, numIterations) var value = "" var key = 0 //拼接推荐结果 rs.foreach(r => { key = r.user value = value + r.product + ":" + r.rating + "," }) println(key.toString + " " + value) }) //对预测结果按预测的评分排序 predictions.collect.sortBy(_._2) //对预测结果按用户进行分组,然后合并推荐结果,这部分代码待修正 predictions.map { case ((user, product), rate) => (user, (product, rate)) }.groupByKey.collect //格式化测试评分和实际评分的结果 val formatedRatesAndPreds = ratesAndPreds.map { case ((user, product), (rate, pred)) => user + "," + product + "," + rate + "," + pred } formatedRatesAndPreds.collect() } }
Example 8
Source File: ALStrainImplicit.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.examples.mllib import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.mllib.recommendation.ALS import org.apache.spark.mllib.recommendation.Rating val userMap = userIndex.collectAsMap //广播userMap val broadcastUserMap = sc.broadcast(userMap) //广播songMap val broadcastSongMap = sc.broadcast(songMap) //将triplets数据转换为一个数组 val tripArray = triplets.map(_.split("\\W+")) //导入Rating包 import org.apache.spark.mllib.recommendation.Rating //将tripArray数组转换为评级对象RDD val ratings = tripArray.map { case Array(user, song, plays)=> val userId = broadcastUserMap.value.getOrElse(user, 0) val songId = broadcastUserMap.value.getOrElse(song, 0) Rating(userId, songId, plays.toDouble) } //导入ALS import org.apache.spark.mllib.recommendation.ALS //将Rank设置为10,迭代次数设为10,Rank模型中的潜在特征数 val model = ALS.trainImplicit(ratings, 10, 10) //从triplet中导出用户和歌曲元组 val usersSongs = ratings.map( r => { println(r.user+"|||"+r.product) (r.user, r.product) }) //预测用户和歌曲 val predictions = model.predict(usersSongs) predictions.foreach { x => println(x.user.toString()+"|||||"+x.rating.toString()+"======="+x.product.toString()) } } }
Example 9
Source File: RecommendationExample.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.{SparkConf, SparkContext} // $example on$ import org.apache.spark.mllib.recommendation.ALS import org.apache.spark.mllib.recommendation.MatrixFactorizationModel import org.apache.spark.mllib.recommendation.Rating // $example off$ object RecommendationExample { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("CollaborativeFilteringExample") val sc = new SparkContext(conf) // $example on$ // Load and parse the data val data = sc.textFile("data/mllib/als/test.data") val ratings = data.map(_.split(',') match { case Array(user, item, rate) => Rating(user.toInt, item.toInt, rate.toDouble) }) // Build the recommendation model using ALS val rank = 10 val numIterations = 10 val model = ALS.train(ratings, rank, numIterations, 0.01) // Evaluate the model on rating data val usersProducts = ratings.map { case Rating(user, product, rate) => (user, product) } val predictions = model.predict(usersProducts).map { case Rating(user, product, rate) => ((user, product), rate) } val ratesAndPreds = ratings.map { case Rating(user, product, rate) => ((user, product), rate) }.join(predictions) val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) => val err = (r1 - r2) err * err }.mean() println("Mean Squared Error = " + MSE) // Save and load model model.save(sc, "target/tmp/myCollaborativeFilter") val sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter") // $example off$ } } // scalastyle:on println
Example 10
Source File: RecommendationExample.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.{SparkConf, SparkContext} // $example on$ import org.apache.spark.mllib.recommendation.ALS import org.apache.spark.mllib.recommendation.MatrixFactorizationModel import org.apache.spark.mllib.recommendation.Rating // $example off$ object RecommendationExample { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("CollaborativeFilteringExample") val sc = new SparkContext(conf) // $example on$ // Load and parse the data val data = sc.textFile("data/mllib/als/test.data") val ratings = data.map(_.split(',') match { case Array(user, item, rate) => Rating(user.toInt, item.toInt, rate.toDouble) }) // Build the recommendation model using ALS val rank = 10 val numIterations = 10 val model = ALS.train(ratings, rank, numIterations, 0.01) // Evaluate the model on rating data val usersProducts = ratings.map { case Rating(user, product, rate) => (user, product) } val predictions = model.predict(usersProducts).map { case Rating(user, product, rate) => ((user, product), rate) } val ratesAndPreds = ratings.map { case Rating(user, product, rate) => ((user, product), rate) }.join(predictions) val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) => val err = (r1 - r2) err * err }.mean() println("Mean Squared Error = " + MSE) // Save and load model model.save(sc, "target/tmp/myCollaborativeFilter") val sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter") // $example off$ } } // scalastyle:on println
Example 11
Source File: RecommendationExample.scala From sparkoscope with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.{SparkConf, SparkContext} // $example on$ import org.apache.spark.mllib.recommendation.ALS import org.apache.spark.mllib.recommendation.MatrixFactorizationModel import org.apache.spark.mllib.recommendation.Rating // $example off$ object RecommendationExample { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("CollaborativeFilteringExample") val sc = new SparkContext(conf) // $example on$ // Load and parse the data val data = sc.textFile("data/mllib/als/test.data") val ratings = data.map(_.split(',') match { case Array(user, item, rate) => Rating(user.toInt, item.toInt, rate.toDouble) }) // Build the recommendation model using ALS val rank = 10 val numIterations = 10 val model = ALS.train(ratings, rank, numIterations, 0.01) // Evaluate the model on rating data val usersProducts = ratings.map { case Rating(user, product, rate) => (user, product) } val predictions = model.predict(usersProducts).map { case Rating(user, product, rate) => ((user, product), rate) } val ratesAndPreds = ratings.map { case Rating(user, product, rate) => ((user, product), rate) }.join(predictions) val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) => val err = (r1 - r2) err * err }.mean() println("Mean Squared Error = " + MSE) // Save and load model model.save(sc, "target/tmp/myCollaborativeFilter") val sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter") // $example off$ } } // scalastyle:on println
Example 12
Source File: ScalaApp.scala From Machine-Learning-with-Spark-Second-Edition with MIT License | 5 votes |
import java.text.SimpleDateFormat import java.util.Calendar import org.apache.spark.SparkContext import org.apache.spark.mllib.recommendation.{ALS, Rating} //import org.apache.spark. val predictedRating = model.predict(789, 123) println(predictedRating) val userId = 789 val K = 10 val topKRecs = model.recommendProducts(userId, K) println(topKRecs.mkString("\n")) val movies = sc.textFile(PATH + "/ml-100k/u.item") val titles = movies.map(line => line.split("\\|").take(2)).map(array => (array(0).toInt, array(1))).collectAsMap() titles(123) // res68: String = Frighteners, The (1996) val moviesForUser = ratings.keyBy(_.user).lookup(789) // moviesForUser: Seq[org.apache.spark.mllib.recommendation.Rating] = WrappedArray(Rating(789,1012,4.0), Rating(789,127,5.0), Rating(789,475,5.0), Rating(789,93,4.0), ... // ... println(moviesForUser.size) moviesForUser.sortBy(-_.rating).take(10).map(rating => (titles(rating.product), rating.rating)).foreach(println) topKRecs.map(rating => (titles(rating.product), rating.rating)).foreach(println) sc.stop() //bw.close() } class Util { def getDate(): String = { val today = Calendar.getInstance().getTime() // (2) create a date "formatter" (the date format we want) val formatter = new SimpleDateFormat("yyyy-MM-dd-hh.mm.ss") // (3) create a new String using the date format we want val folderName = formatter.format(today) return folderName } } }
Example 13
Source File: MovieLensDataPowerIterationClustering.scala From Machine-Learning-with-Spark-Second-Edition with MIT License | 5 votes |
package org.sparksamples import org.apache.spark.mllib.recommendation.{ALS, Rating} import org.apache.spark.{SparkConf, SparkContext} object MovieLensDataPowerIterationClustering { val PATH= "../data/ml-100k" def main(args: Array[String]): Unit = { val spConfig = (new SparkConf).setMaster("local[1]").setAppName("SparkApp"). set("spark.driver.allowMultipleContexts", "true") val sc = new SparkContext(spConfig) //val path = PATH + "../data/" //val rdd = sc.wholeTextFiles(path) val movies = sc.textFile(PATH + "/u.item") println(movies.first) val genres = sc.textFile(PATH + "/u.genre") genres.take(5).foreach(println) val genreMap = genres.filter(!_.isEmpty).map(line => line.split("\\|")). map(array => (array(1), array(0))).collectAsMap val titlesAndGenres = movies.map(_.split("\\|")).map { array => val genres = array.toSeq.slice(5, array.size) val genresAssigned = genres.zipWithIndex.filter { case (g, idx) => g == "1" }.map { case (g, idx) => genreMap(idx.toString) } (array(0).toInt, (array(1), genresAssigned)) } val rawData = sc.textFile(PATH + "/u.data") val rawRatings = rawData.map(_.split("\t").take(3)) val ratings = rawRatings.map{ case Array(user, movie, rating) => Rating(user.toInt, movie.toInt, rating.toDouble) } ratings.cache val alsModel = ALS.train(ratings, 50, 10, 0.1) import org.apache.spark.mllib.linalg.Vectors val movieFactors = alsModel.productFeatures.map { case (id, factor) => (id, Vectors.dense(factor)) } val movieVectors = movieFactors.map(_._2) val userFactors = alsModel.userFeatures.map { case (id, factor) => (id, Vectors.dense(factor)) } val userVectors = userFactors.map(_._2) val numClusters = 5 val numIterations = 10 val numRuns = 3 import org.apache.spark.mllib.clustering.PowerIterationClustering //val bKMeans = new PowerIterationClustering()() val piClustering = new PowerIterationClustering() piClustering.setMaxIterations(10) piClustering.setK(numClusters) println("done") } }
Example 14
Source File: L9-12CollabFiltering.scala From prosparkstreaming with Apache License 2.0 | 5 votes |
package org.apress.prospark import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.mllib.recommendation.ALS import org.apache.spark.mllib.recommendation.Rating import org.apache.spark.rdd.RDD.doubleRDDToDoubleRDDFunctions import org.apache.spark.rdd.RDD.rddToPairRDDFunctions import org.apache.spark.streaming.Seconds import org.apache.spark.streaming.StreamingContext object CollabFilteringApp { def main(args: Array[String]) { if (args.length != 3) { System.err.println( "Usage: CollabFilteringApp <appname> <batchInterval> <iPath>") System.exit(1) } val Seq(appName, batchInterval, iPath) = args.toSeq val conf = new SparkConf() .setAppName(appName) .setJars(SparkContext.jarOfClass(this.getClass).toSeq) val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt)) val ratingStream = ssc.textFileStream(iPath).map(_.split(" ") match { case Array(subject, activity, freq) => Rating(subject.toInt, activity.toInt, freq.toDouble) }) val rank = 10 val numIterations = 10 val lambda = 0.01 ratingStream.foreachRDD(ratingRDD => { val testTrain = ratingRDD.randomSplit(Array(0.3, 0.7)) val model = ALS.train(testTrain(1), rank, numIterations, lambda) val test = testTrain(0).map { case Rating(subject, activity, freq) => (subject, activity) } val prediction = model.predict(test) prediction.take(5).map(println) }) ssc.start() ssc.awaitTermination() } }
Example 15
Source File: MovieRecommendation.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package com.packt.ScalaML.MovieRecommendation import org.apache.spark.sql.SparkSession import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ import org.apache.spark.sql.SQLContext import org.apache.spark.sql.SQLImplicits import org.apache.spark.sql._ import org.apache.spark.sql.Dataset import org.apache.spark.mllib.recommendation.ALS import org.apache.spark.mllib.recommendation.MatrixFactorizationModel import org.apache.spark.mllib.recommendation.Rating import scala.Tuple2 import org.apache.spark.rdd.RDD object MovieRecommendation { //Compute the RMSE to evaluate the model. Less the RMSE better the model and it's prediction capability. def computeRmse(model: MatrixFactorizationModel, data: RDD[Rating], implicitPrefs: Boolean): Double = { val predictions: RDD[Rating] = model.predict(data.map(x => (x.user, x.product))) val predictionsAndRatings = predictions.map { x => ((x.user, x.product), x.rating) }.join(data.map(x => ((x.user, x.product), x.rating))).values if (implicitPrefs) { println("(Prediction, Rating)") println(predictionsAndRatings.take(5).mkString("\n")) } math.sqrt(predictionsAndRatings.map(x => (x._1 - x._2) * (x._1 - x._2)).mean()) } def main(args: Array[String]): Unit = { val spark: SparkSession = SparkSession .builder() .appName("JavaLDAExample") .master("local[*]") .config("spark.sql.warehouse.dir", "E:/Exp/"). getOrCreate() val ratigsFile = "data/ratings.csv" val df1 = spark.read.format("com.databricks.spark.csv").option("header", true).load(ratigsFile) val ratingsDF = df1.select(df1.col("userId"), df1.col("movieId"), df1.col("rating"), df1.col("timestamp")) ratingsDF.show(false) val moviesFile = "data/movies.csv" val df2 = spark.read.format("com.databricks.spark.csv").option("header", "true").load(moviesFile) val moviesDF = df2.select(df2.col("movieId"), df2.col("title"), df2.col("genres")) moviesDF.show(false) ratingsDF.createOrReplaceTempView("ratings") moviesDF.createOrReplaceTempView("movies") var rmseTest = computeRmse(model, testRDD, true) println("Test RMSE: = " + rmseTest) //Less is better //Movie recommendation for a specific user. Get the top 6 movie predictions for user 668 println("Recommendations: (MovieId => Rating)") println("----------------------------------") val recommendationsUser = model.recommendProducts(668, 6) recommendationsUser.map(rating => (rating.product, rating.rating)).foreach(println) println("----------------------------------") spark.stop() } }
Example 16
Source File: RecommendationModelReuse.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package com.packt.ScalaML.MovieRecommendation import org.apache.spark.sql.SparkSession import org.apache.spark.mllib.recommendation.ALS import org.apache.spark.mllib.recommendation.MatrixFactorizationModel import org.apache.spark.mllib.recommendation.Rating import scala.Tuple2 import org.apache.spark.rdd.RDD object RecommendationModelReuse { def main(args: Array[String]): Unit = { val spark: SparkSession = SparkSession .builder() .appName("JavaLDAExample") .master("local[*]") .config("spark.sql.warehouse.dir", "E:/Exp/"). getOrCreate() val ratigsFile = "data/ratings.csv" val ratingDF = spark.read.format("com.databricks.spark.csv").option("header", true).load(ratigsFile) val selectedRatingsDF = ratingDF.select(ratingDF.col("userId"), ratingDF.col("movieId"), ratingDF.col("rating"), ratingDF.col("timestamp")) // Randomly split ratings RDD into training data RDD (75%) and test data RDD (25%) val splits = selectedRatingsDF.randomSplit(Array(0.75, 0.25), seed = 12345L) val testData = splits(1) val testRDD = testData.rdd.map(row => { val userId = row.getString(0) val movieId = row.getString(1) val ratings = row.getString(2) Rating(userId.toInt, movieId.toInt, ratings.toDouble) }) //Load the workflow back val same_model = MatrixFactorizationModel.load(spark.sparkContext, "model/MovieRecomModel/") // Making Predictions. Get the top 6 movie predictions for user 668 println("Rating:(UserID, MovieID, Rating)") println("----------------------------------") val topRecsForUser = same_model.recommendProducts(458, 10) for (rating <- topRecsForUser) { println(rating.toString()) } println("----------------------------------") val rmseTest = MovieRecommendation.computeRmse(same_model, testRDD, true) println("Test RMSE: = " + rmseTest) //Less is better //Movie recommendation for a specific user. Get the top 6 movie predictions for user 668 println("Recommendations: (MovieId => Rating)") println("----------------------------------") val recommendationsUser = same_model.recommendProducts(458, 10) recommendationsUser.map(rating => (rating.product, rating.rating)).foreach(println) println("----------------------------------") spark.stop() } }
Example 17
Source File: ModelTrainer.scala From recommendersystem with Apache License 2.0 | 5 votes |
package com.infosupport.recommendedcontent.core import akka.actor.{Props, ActorLogging, Actor} import org.apache.spark.SparkContext import org.apache.spark.mllib.recommendation.{Rating, ALS, MatrixFactorizationModel} import com.datastax.spark.connector._ private def trainModel() = { val table = context.system.settings.config.getString("cassandra.table") val keyspace = context.system.settings.config.getString("cassandra.keyspace") // Retrieve the ratings given by users from the database. // Map them to the rating structure needed by the Alternate Least Squares algorithm. val ratings = sc.cassandraTable(keyspace, table).map(record => Rating(record.get[Int]("user_id"), record.get[Int]("item_id"), record.get[Double]("rating"))) // These settings control how well the predictions are going // to fit the actual observations we loaded from Cassandra. // Modify these to optimize the model! val rank = 10 val iterations = 10 val lambda = 0.01 val model = ALS.train(ratings, rank, iterations, lambda) sender ! TrainingResult(model) context.stop(self) } }