org.apache.spark.ml.recommendation.ALSModel Java Examples

The following examples show how to use org.apache.spark.ml.recommendation.ALSModel. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JavaALSExample.java    From SparkDemo with MIT License 5 votes vote down vote up
public static void main(String[] args) {
  SparkSession spark = SparkSession
    .builder()
    .appName("JavaALSExample")
    .getOrCreate();

  // $example on$
  JavaRDD<Rating> ratingsRDD = spark
    .read().textFile(Constant.LOCAL_FILE_PREX +"data/mllib/als/sample_movielens_ratings.txt").javaRDD()
    .map(new Function<String, Rating>() {
      public Rating call(String str) {
        return Rating.parseRating(str);
      }
    });
  Dataset<Row> ratings = spark.createDataFrame(ratingsRDD, Rating.class);
  Dataset<Row>[] splits = ratings.randomSplit(new double[]{0.8, 0.2});
  Dataset<Row> training = splits[0];
  Dataset<Row> test = splits[1];

  // Build the recommendation model using ALS on the training data
  ALS als = new ALS()
    .setMaxIter(5)
    .setRegParam(0.01)
    .setUserCol("userId")
    .setItemCol("movieId")
    .setRatingCol("rating");
  ALSModel model = als.fit(training);

  // Evaluate the model by computing the RMSE on the test data
  Dataset<Row> predictions = model.transform(test);

  RegressionEvaluator evaluator = new RegressionEvaluator()
    .setMetricName("rmse")
    .setLabelCol("rating")
    .setPredictionCol("prediction");
  Double rmse = evaluator.evaluate(predictions);
  System.out.println("Root-mean-square error = " + rmse);
  // $example off$
  spark.stop();
}
 
Example #2
Source File: JavaALSExampleByMl.java    From Spark_ALS with MIT License 4 votes vote down vote up
public static void main(String[] args) {
    SparkConf conf = new SparkConf().setAppName("JavaALSExample").setMaster("local");
    JavaSparkContext jsc = new JavaSparkContext(conf);
    SQLContext sqlContext = new SQLContext(jsc);

    JavaRDD<Rating> ratingsRDD = jsc.textFile("data/sample_movielens_ratings.txt")
            .map(new Function<String, Rating>() {
                public Rating call(String str) {
                    return Rating.parseRating(str);
                }
            });
    Dataset<Row> ratings = sqlContext.createDataFrame(ratingsRDD, Rating.class);
    Dataset<Row>[] splits = ratings.randomSplit(new double[]{0.8, 0.2}); // //对数据进行分割,80%为训练样例,剩下的为测试样例。
    Dataset<Row> training = splits[0];
    Dataset<Row> test = splits[1];

    // Build the recommendation model using ALS on the training data
    ALS als = new ALS().setMaxIter(5) // 设置迭代次数
            .setRegParam(0.01) // //正则化参数,使每次迭代平滑一些,此数据集取0.1好像错误率低一些。
            .setUserCol("userId").setItemCol("movieId")
            .setRatingCol("rating");
    ALSModel model = als.fit(training); // //调用算法开始训练


    Dataset<Row> itemFactors = model.itemFactors();
    itemFactors.show(1500);
    Dataset<Row> userFactors = model.userFactors();
    userFactors.show();

    // Evaluate the model by computing the RMSE on the test data
    Dataset<Row> rawPredictions = model.transform(test); //对测试数据进行预测
    Dataset<Row> predictions = rawPredictions
            .withColumn("rating", rawPredictions.col("rating").cast(DataTypes.DoubleType))
            .withColumn("prediction", rawPredictions.col("prediction").cast(DataTypes.DoubleType));

    RegressionEvaluator evaluator = new RegressionEvaluator().setMetricName("rmse").setLabelCol("rating")
            .setPredictionCol("prediction");
    Double rmse = evaluator.evaluate(predictions);
    log.info("Root-mean-square error = {} ", rmse);

    jsc.stop();
}