org.apache.spark.ml.recommendation.ALS Scala Examples
The following examples show how to use org.apache.spark.ml.recommendation.ALS.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: ALSExample.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.evaluation.RegressionEvaluator import org.apache.spark.ml.recommendation.ALS // $example off$ import org.apache.spark.sql.SparkSession object ALSExample { // $example on$ case class Rating(userId: Int, movieId: Int, rating: Float, timestamp: Long) def parseRating(str: String): Rating = { val fields = str.split("::") assert(fields.size == 4) Rating(fields(0).toInt, fields(1).toInt, fields(2).toFloat, fields(3).toLong) } // $example off$ def main(args: Array[String]) { val spark = SparkSession .builder .appName("ALSExample") .getOrCreate() import spark.implicits._ // $example on$ val ratings = spark.read.textFile("data/mllib/als/sample_movielens_ratings.txt") .map(parseRating) .toDF() val Array(training, test) = ratings.randomSplit(Array(0.8, 0.2)) // Build the recommendation model using ALS on the training data val als = new ALS() .setMaxIter(5) .setRegParam(0.01) .setUserCol("userId") .setItemCol("movieId") .setRatingCol("rating") val model = als.fit(training) // Evaluate the model by computing the RMSE on the test data val predictions = model.transform(test) val evaluator = new RegressionEvaluator() .setMetricName("rmse") .setLabelCol("rating") .setPredictionCol("prediction") val rmse = evaluator.evaluate(predictions) println(s"Root-mean-square error = $rmse") // $example off$ spark.stop() } } // scalastyle:on println
Example 2
Source File: ALSParitySpec.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.parity.recommendation import org.apache.spark.ml.parity.SparkParityBase import org.apache.spark.ml.recommendation.ALS import org.apache.spark.ml.{Pipeline, Transformer} import org.apache.spark.sql.DataFrame class ALSParitySpec extends SparkParityBase { override val dataset: DataFrame = recommendationDataset override val sparkTransformer: Transformer = new Pipeline().setStages(Array( new ALS() .setMaxIter(5) .setRegParam(0.01) .setUserCol("userId") .setItemCol("movieId") .setRatingCol("rating") )).fit(dataset) override def equalityTest(sparkDataset: DataFrame, mleapDataset: DataFrame): Unit = super.equalityTest(sparkDataset.orderBy("userId", "movieId"), mleapDataset.orderBy("userId", "movieId")) //TODO: maybe coldStartStrategy should be serialized override val unserializedParams = Set("coldStartStrategy") }
Example 3
Source File: ALSGenericExample.scala From Machine-Learning-with-Spark-Second-Edition with MIT License | 5 votes |
// scalastyle:off println package org.sparksamples.als import org.apache.spark.SparkConf import org.apache.spark.ml.evaluation.RegressionEvaluator import org.apache.spark.ml.recommendation.ALS // $example on$ // $example off$ object ALSGenericExample { val SPARK_PATH = "/home/ubuntu/work/spark-2.0.0-bin-hadoop2.7/"; case class Rating(userId: Int, movieId: Int, rating: Float, timestamp: Long) def parseRating(str: String): Rating = { val fields = str.split("::") assert(fields.size == 4) Rating(fields(0).toInt, fields(1).toInt, fields(2).toFloat, fields(3).toLong) } def main(args: Array[String]) { import org.apache.spark.sql.SparkSession val spConfig = (new SparkConf).setMaster("local[1]").setAppName("SparkApp"). set("spark.driver.allowMultipleContexts", "true") val spark = SparkSession .builder() .appName("Spark SQL Example") .config(spConfig) .getOrCreate() import spark.implicits._ // Create an RDD of Person objects from a text file, convert it to a Dataframe val ratings = spark.sparkContext .textFile(SPARK_PATH + "data/mllib/als/sample_movielens_ratings.txt") .map(_.split("::")) .map(lineSplit => Rating(lineSplit(0).toInt, lineSplit(1).toInt, lineSplit(2).toFloat, lineSplit(3).toLong)) .toDF() val Array(training, test) = ratings.randomSplit(Array(0.8, 0.2)) // Build the recommendation model using ALS on the training data val als = new ALS() .setMaxIter(5) .setRegParam(0.01) .setUserCol("userId") .setItemCol("movieId") .setRatingCol("rating") //als. val model = als.fit(training) // Evaluate the model by computing the RMSE on the test data val predictions = model.transform(test) val evaluator = new RegressionEvaluator() .setMetricName("rmse") .setLabelCol("rating") .setPredictionCol("prediction") val rmse = evaluator.evaluate(predictions) println(s"Root-mean-square error = $rmse") spark.stop() } }
Example 4
Source File: ALSModeling.scala From Machine-Learning-with-Spark-Second-Edition with MIT License | 5 votes |
package com.spark.recommendation import java.util import com.spark.recommendation.FeatureExtraction.{Rating, parseRating} import org.apache.spark.ml.evaluation.RegressionEvaluator import org.apache.spark.ml.recommendation.ALS import org.apache.spark.sql.{Row, DataFrame, DataFrameWriter} object ALSModeling { def createALSModel() { val ratings = FeatureExtraction.getFeatures(); val Array(training, test) = ratings.randomSplit(Array(0.8, 0.2)) println(training.first()) // Build the recommendation model using ALS on the training data val als = new ALS() .setMaxIter(5) .setRegParam(0.01) .setUserCol("userId") .setItemCol("movieId") .setRatingCol("rating") val model = als.fit(training) println(model.userFactors.count()) println(model.itemFactors.count()) val predictions = model.transform(test) println(predictions.printSchema()) val evaluator = new RegressionEvaluator() .setMetricName("rmse") .setLabelCol("rating") .setPredictionCol("prediction") val rmse = evaluator.evaluate(predictions) println(s"Root-mean-square error = $rmse") } def main(args: Array[String]) { createALSModel() } }
Example 5
Source File: FeatureExtraction.scala From Machine-Learning-with-Spark-Second-Edition with MIT License | 5 votes |
package com.spark.recommendation import org.apache.spark.{sql, SparkConf} import org.apache.spark.ml.recommendation.ALS import org.apache.spark.sql.{Dataset, SparkSession} def getFeatures(): sql.DataFrame = { import spark.implicits._ //val ratings = spark.read.textFile("/Users/manpreet.singh/Sandbox/codehub/github/machinelearning/spark-ml/Chapter_05/data/ml-100k 2/u.data").map(parseRating).toDF() val ratings = spark.read.textFile("/Users/manpreet.singh/Sandbox/codehub/github/machinelearning/spark-ml/Chapter_05/2.0.0/scala-spark-app/src/main/scala/com/spark/recommendation/sample_movielens_ratings.txt").map(parseRating).toDF() println(ratings.first()) // val Array(training, test) = ratings.randomSplit(Array(0.8, 0.2)) // println(training.first()) return ratings } def getSpark(): SparkSession = { return spark } def main(args: Array[String]) { getFeatures() } }
Example 6
Source File: ALSExample.scala From sparkoscope with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.evaluation.RegressionEvaluator import org.apache.spark.ml.recommendation.ALS // $example off$ import org.apache.spark.sql.SparkSession object ALSExample { // $example on$ case class Rating(userId: Int, movieId: Int, rating: Float, timestamp: Long) def parseRating(str: String): Rating = { val fields = str.split("::") assert(fields.size == 4) Rating(fields(0).toInt, fields(1).toInt, fields(2).toFloat, fields(3).toLong) } // $example off$ def main(args: Array[String]) { val spark = SparkSession .builder .appName("ALSExample") .getOrCreate() import spark.implicits._ // $example on$ val ratings = spark.read.textFile("data/mllib/als/sample_movielens_ratings.txt") .map(parseRating) .toDF() val Array(training, test) = ratings.randomSplit(Array(0.8, 0.2)) // Build the recommendation model using ALS on the training data val als = new ALS() .setMaxIter(5) .setRegParam(0.01) .setUserCol("userId") .setItemCol("movieId") .setRatingCol("rating") val model = als.fit(training) // Evaluate the model by computing the RMSE on the test data val predictions = model.transform(test) val evaluator = new RegressionEvaluator() .setMetricName("rmse") .setLabelCol("rating") .setPredictionCol("prediction") val rmse = evaluator.evaluate(predictions) println(s"Root-mean-square error = $rmse") // $example off$ spark.stop() } } // scalastyle:on println
Example 7
Source File: ALSBasedUserItemIndexing.scala From ann4s with Apache License 2.0 | 5 votes |
package ann4s.spark.example import ann4s.spark.LocalSparkApp import org.apache.spark.ml.nn.Annoy import org.apache.spark.sql.SparkSession import org.apache.spark.ml.recommendation.ALS case class Rating(userId: Int, movieId: Int, rating: Float, timestamp: Long) object ALSBasedUserItemIndexing extends LocalSparkApp { override def run(spark: SparkSession): Unit = { import spark.implicits._ val ratings = spark.read.textFile("data/mllib/als/sample_movielens_ratings.txt") .map { str => val fields = str.split("::") assert(fields.size == 4) Rating(fields(0).toInt, fields(1).toInt, fields(2).toFloat, fields(3).toLong) } .toDF() val Array(training, test) = ratings.randomSplit(Array(0.8, 0.2)) val als = new ALS() .setMaxIter(5) .setRegParam(0.01) .setUserCol("userId") .setItemCol("movieId") .setRatingCol("rating") val model = als.fit(training) val ann = new Annoy() .setNumTrees(2) .setFraction(0.1) .setIdCol("id") .setFeaturesCol("features") val userAnnModel= ann.fit(model.userFactors) userAnnModel.saveAsAnnoyBinary("exp/als/user_factors.ann") val itemAnnModel = ann.fit(model.itemFactors) itemAnnModel.saveAsAnnoyBinary("exp/als/item_factors.ann") } }
Example 8
Source File: ALSExample.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.evaluation.RegressionEvaluator import org.apache.spark.ml.recommendation.ALS // $example off$ import org.apache.spark.sql.SparkSession object ALSExample { // $example on$ case class Rating(userId: Int, movieId: Int, rating: Float, timestamp: Long) def parseRating(str: String): Rating = { val fields = str.split("::") assert(fields.size == 4) Rating(fields(0).toInt, fields(1).toInt, fields(2).toFloat, fields(3).toLong) } // $example off$ def main(args: Array[String]) { val spark = SparkSession .builder .appName("ALSExample") .getOrCreate() import spark.implicits._ // $example on$ val ratings = spark.read.textFile("data/mllib/als/sample_movielens_ratings.txt") .map(parseRating) .toDF() val Array(training, test) = ratings.randomSplit(Array(0.8, 0.2)) // Build the recommendation model using ALS on the training data val als = new ALS() .setMaxIter(5) .setRegParam(0.01) .setUserCol("userId") .setItemCol("movieId") .setRatingCol("rating") val model = als.fit(training) // Evaluate the model by computing the RMSE on the test data val predictions = model.transform(test) val evaluator = new RegressionEvaluator() .setMetricName("rmse") .setLabelCol("rating") .setPredictionCol("prediction") val rmse = evaluator.evaluate(predictions) println(s"Root-mean-square error = $rmse") // $example off$ spark.stop() } } // scalastyle:on println