org.apache.spark.mllib.recommendation.Rating Java Exaples

Source File: Evaluation.java From oryx with Apache License 2.0

6 votes

/**
 * Computes root mean squared error of {@link Rating#rating()} versus predicted value.
 */
static double rmse(MatrixFactorizationModel mfModel, JavaRDD<Rating> testData) {
  JavaPairRDD<Tuple2<Integer,Integer>,Double> testUserProductValues =
      testData.mapToPair(rating -> new Tuple2<>(new Tuple2<>(rating.user(), rating.product()), rating.rating()));
  @SuppressWarnings("unchecked")
  RDD<Tuple2<Object,Object>> testUserProducts =
      (RDD<Tuple2<Object,Object>>) (RDD<?>) testUserProductValues.keys().rdd();
  JavaRDD<Rating> predictions = testData.wrapRDD(mfModel.predict(testUserProducts));
  double mse = predictions.mapToPair(
      rating -> new Tuple2<>(new Tuple2<>(rating.user(), rating.product()), rating.rating())
  ).join(testUserProductValues).values().mapToDouble(valuePrediction -> {
    double diff = valuePrediction._1() - valuePrediction._2();
    return diff * diff;
  }).mean();
  return Math.sqrt(mse);
}

Source File: JavaALS.java From SparkDemo with MIT License

5 votes

public static void main(String[] args) {

    if (args.length < 4) {
      System.err.println(
        "Usage: JavaALS <ratings_file> <rank> <iterations> <output_dir> [<blocks>]");
      System.exit(1);
    }
    SparkConf sparkConf = new SparkConf().setAppName("JavaALS");
    int rank = Integer.parseInt(args[1]);
    int iterations = Integer.parseInt(args[2]);
    String outputDir = args[3];
    int blocks = -1;
    if (args.length == 5) {
      blocks = Integer.parseInt(args[4]);
    }

    JavaSparkContext sc = new JavaSparkContext(sparkConf);
    JavaRDD<String> lines = sc.textFile(args[0]);

    JavaRDD<Rating> ratings = lines.map(new ParseRating());

    MatrixFactorizationModel model = ALS.train(ratings.rdd(), rank, iterations, 0.01, blocks);

    model.userFeatures().toJavaRDD().map(new FeaturesToString()).saveAsTextFile(
        outputDir + "/userFeatures");
    model.productFeatures().toJavaRDD().map(new FeaturesToString()).saveAsTextFile(
        outputDir + "/productFeatures");
    System.out.println("Final user/product features written to " + outputDir);

    sc.stop();
  }

Source File: CollabFilterCassandraDriver.java From Spark-Cassandra-Collabfiltering with Apache License 2.0

5 votes

double trainAndValidate(int version) throws InstantiationException, IllegalAccessException, ClassNotFoundException {
	final ICollabFilterCassandra cfc;
	String className = "collabfilter.CollabFilterCassandra" + version;
	cfc = (ICollabFilterCassandra) Class.forName(className).newInstance();
	try (Session session = this.cassandraConnector.openSession()) {
		MatrixFactorizationModel model = cfc.train(this.sparkCtx, this.cassandraConnector);
		CassandraJavaRDD<CassandraRow> validationsCassRdd = javaFunctions(this.sparkCtx).cassandraTable(RatingDO.EMPLOYERRATINGS_KEYSPACE, RatingDO.VALIDATION_TABLE);
		JavaRDD<Rating> predictionJavaRdd = cfc.predict(model, validationsCassRdd);
		double rmse = cfc.validate(predictionJavaRdd, validationsCassRdd);
		System.out.println(cfc.resultsReport(predictionJavaRdd, validationsCassRdd, rmse));
		return rmse;
	}

}

Source File: CollabFilterCassandra7.java From Spark-Cassandra-Collabfiltering with Apache License 2.0

5 votes

public double validate(JavaRDD<Rating> predictionJavaRdd, CassandraJavaRDD<CassandraRow> validationsCassRdd) {
	JavaPairRDD<Tuple2<Integer, Integer>, Double> predictionsJavaPairs = JavaPairRDD.fromJavaRDD(predictionJavaRdd.map(new org.apache.spark.api.java.function.Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() {
		@Override
		public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating pred) throws Exception {
			return new Tuple2<Tuple2<Integer, Integer>, Double>(new Tuple2<Integer, Integer>(pred.user(), pred.product()), pred.rating());
		}
		//
	}));
	JavaRDD<Rating> validationRatings = validationsCassRdd.map(new org.apache.spark.api.java.function.Function<CassandraRow, Rating>() {
		@Override
		public Rating call(CassandraRow validation) throws Exception {
			return new Rating(validation.getInt(RatingDO.USER_COL), validation.getInt(RatingDO.PRODUCT_COL), validation.getInt(RatingDO.RATING_COL));
		}
	
	});
	JavaRDD<Tuple2<Double, Double>> validationAndPredictions = JavaPairRDD.fromJavaRDD(validationRatings.map(new org.apache.spark.api.java.function.Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() {
	
		@Override
		public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating validationRating) throws Exception {
			return new Tuple2<Tuple2<Integer, Integer>, Double>(new Tuple2<Integer, Integer>(validationRating.user(), validationRating.product()), validationRating.rating());
		}
	
	})).join(predictionsJavaPairs).values();
	
	double meanSquaredError = JavaDoubleRDD.fromRDD(validationAndPredictions.map(new org.apache.spark.api.java.function.Function<Tuple2<Double, Double>, Object>() {
		@Override
		public Object call(Tuple2<Double, Double> pair) throws Exception {
			Double err = pair._1() - pair._2();
			return (Object) (err * err);// No covariance! Need to cast
		}
	}).rdd()).mean();
	double rmse = Math.sqrt(meanSquaredError);
	return rmse;
	 
}

Source File: CollabFilterCassandra7.java From Spark-Cassandra-Collabfiltering with Apache License 2.0

5 votes

public JavaRDD<Rating> predict(MatrixFactorizationModel model, CassandraJavaRDD<CassandraRow> validationsCassRdd) {
	RDD<Tuple2<Object, Object>> validationsRdd = JavaRDD.toRDD(validationsCassRdd.map(new org.apache.spark.api.java.function.Function<CassandraRow, Tuple2<Object, Object>>() {
		@Override
		public Tuple2<Object, Object> call(CassandraRow validationRow) throws Exception {
			return new Tuple2<Object, Object>(validationRow.getInt(RatingDO.USER_COL), validationRow.getInt(RatingDO.PRODUCT_COL));
		}
	}));
	JavaRDD<Rating> predictionJavaRdd = model.predict(validationsRdd).toJavaRDD();
	return predictionJavaRdd;
}

Source File: CollabFilterCassandra7.java From Spark-Cassandra-Collabfiltering with Apache License 2.0

5 votes

public MatrixFactorizationModel train(JavaSparkContext sparkCtx, CassandraConnector cassandraConnector) {
	CassandraJavaRDD<CassandraRow> trainingRdd = javaFunctions(sparkCtx).cassandraTable(RatingDO.EMPLOYERRATINGS_KEYSPACE, RatingDO.RATINGS_TABLE);
	JavaRDD<Rating> trainingJavaRdd = trainingRdd.map(new org.apache.spark.api.java.function.Function<CassandraRow, Rating>() {
		@Override
		public Rating call(CassandraRow trainingRow) throws Exception {
			return new Rating(trainingRow.getInt(RatingDO.USER_COL), trainingRow.getInt(RatingDO.PRODUCT_COL), trainingRow.getDouble(RatingDO.RATING_COL));
		}
	});
	MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(trainingJavaRdd), RANK, ITER, LAMBDA);
	return model;
}

Source File: CollabFilterCassandra8.java From Spark-Cassandra-Collabfiltering with Apache License 2.0

5 votes

private String predictionString(JavaRDD<Rating> predJavaRdd, CassandraJavaRDD<CassandraRow> validationsCassRdd) {
	java.util.function.Function<CassandraRow, Tuple2<Integer, Integer>> keyMapper = validationRow -> new Tuple2<Integer, Integer>(validationRow.getInt(RatingDO.USER_COL), validationRow.getInt(RatingDO.PRODUCT_COL));
	java.util.function.Function<CassandraRow, Double> valueMapper = validationRow -> validationRow.getDouble(RatingDO.RATING_COL);
	java.util.Map<Tuple2<Integer, Integer>, Double> validationMap = validationsCassRdd.collect().stream().collect(Collectors.toMap(keyMapper, valueMapper));

	java.util.function.Function<Rating, String> stringMapper = prediction -> {
		double validationRating = validationMap.get(new Tuple2<Integer, Integer>(prediction.user(), prediction.product()));
		String errWarningString = Math.abs(validationRating - prediction.rating()) >= 1 ? "ERR" : "OK";
		return prediction.user() + "\t" + prediction.product() + "\t" + Util.round(prediction.rating()) + "\t\t" + Util.round(validationRating) + "\t" + errWarningString;
	};
	Stream<Rating> sortedPredictions = predJavaRdd.collect().stream().sorted((o1, o2) -> o1.user() == o2.user() ? o1.product() - o2.product() : o1.user() - o2.user());
	String ret = sortedPredictions.map(stringMapper).collect(Collectors.joining("\n"));

	return ret;
}

Source File: CollabFilterCassandra8.java From Spark-Cassandra-Collabfiltering with Apache License 2.0

5 votes

public double validate(JavaRDD<Rating> predictionJavaRdd, CassandraJavaRDD<CassandraRow> validationsCassRdd) {
	JavaPairRDD<Tuple2<Integer, Integer>, Double> predictionsJavaPairs = JavaPairRDD.fromJavaRDD(predictionJavaRdd.map(pred -> new Tuple2<Tuple2<Integer, Integer>, Double>(new Tuple2<Integer, Integer>(pred.user(), pred.product()), pred.rating())));
	JavaRDD<Rating> validationRatings = validationsCassRdd.map(validation -> new Rating(validation.getInt(RatingDO.USER_COL), validation.getInt(RatingDO.PRODUCT_COL), validation.getInt(RatingDO.RATING_COL)));
	JavaRDD<Tuple2<Double, Double>> validationAndPredictions = JavaPairRDD.fromJavaRDD(validationRatings.map(validationRating -> new Tuple2<Tuple2<Integer, Integer>, Double>(new Tuple2<Integer, Integer>(validationRating.user(), validationRating.product()), validationRating.rating()))).join(predictionsJavaPairs).values();

	double meanSquaredError = JavaDoubleRDD.fromRDD(validationAndPredictions.map(pair -> {
		Double err = pair._1() - pair._2();
		return (Object) (err * err);// No covariance! Need to cast to Object
		}).rdd()).mean();
	double rmse = Math.sqrt(meanSquaredError);
	return rmse;

}

Source File: Evaluation.java From oryx with Apache License 2.0

5 votes

private static JavaPairRDD<Integer,Iterable<Rating>> predictAll(
    MatrixFactorizationModel mfModel,
    JavaRDD<Rating> data,
    JavaPairRDD<Integer,Integer> userProducts) {
  @SuppressWarnings("unchecked")
  RDD<Tuple2<Object,Object>> userProductsRDD =
      (RDD<Tuple2<Object,Object>>) (RDD<?>) userProducts.rdd();
  return data.wrapRDD(mfModel.predict(userProductsRDD)).groupBy(Rating::user);
}

Source File: ALSUpdate.java From oryx with Apache License 2.0

5 votes

/**
 * Combines {@link Rating}s with the same user/item into one, with score as the sum of
 * all of the scores.
 */
private JavaRDD<Rating> aggregateScores(JavaRDD<? extends Rating> original, double epsilon) {
  JavaPairRDD<Tuple2<Integer,Integer>,Double> tuples =
      original.mapToPair(rating -> new Tuple2<>(new Tuple2<>(rating.user(), rating.product()), rating.rating()));

  JavaPairRDD<Tuple2<Integer,Integer>,Double> aggregated;
  if (implicit) {
    // TODO can we avoid groupByKey? reduce, combine, fold don't seem viable since
    // they don't guarantee the delete elements are properly handled
    aggregated = tuples.groupByKey().mapValues(MLFunctions.SUM_WITH_NAN);
  } else {
    // For non-implicit, last wins.
    aggregated = tuples.foldByKey(Double.NaN, (current, next) -> next);
  }

  JavaPairRDD<Tuple2<Integer,Integer>,Double> noNaN =
      aggregated.filter(kv -> !Double.isNaN(kv._2()));

  if (logStrength) {
    return noNaN.map(userProductScore -> new Rating(
        userProductScore._1()._1(),
        userProductScore._1()._2(),
        Math.log1p(userProductScore._2() / epsilon)));
  } else {
    return noNaN.map(userProductScore -> new Rating(
        userProductScore._1()._1(),
        userProductScore._1()._2(),
        userProductScore._2()));
  }
}

Source File: ALSUpdate.java From oryx with Apache License 2.0

5 votes

static Rating decayRating(Rating rating, long timestamp, long now, double factor) {
  if (timestamp >= now) {
    return rating;
  }
  double days = (now - timestamp) / 86400000.0;
  return new Rating(rating.user(), rating.product(), rating.rating() * Math.pow(factor, days));
}

Source File: ALSUpdate.java From oryx with Apache License 2.0

5 votes

/**
 * @param parsedRDD parsed input as {@code String[]}
 * @return {@link Rating}s ordered by timestamp
 */
private JavaRDD<Rating> parsedToRatingRDD(JavaRDD<String[]> parsedRDD,
                                          Broadcast<? extends Map<String,Integer>> bUserIDToIndex,
                                          Broadcast<? extends Map<String,Integer>> bItemIDToIndex) {
  JavaPairRDD<Long,Rating> timestampRatingRDD = parsedRDD.mapToPair(tokens -> {
    try {
      return new Tuple2<>(
          Long.valueOf(tokens[3]),
          new Rating(bUserIDToIndex.value().get(tokens[0]),
                     bItemIDToIndex.value().get(tokens[1]),
                     // Empty value means 'delete'; propagate as NaN
                     tokens[2].isEmpty() ? Double.NaN : Double.parseDouble(tokens[2])));
    } catch (NumberFormatException | ArrayIndexOutOfBoundsException e) {
      log.warn("Bad input: {}", Arrays.toString(tokens));
      throw e;
    }
  });

  if (decayFactor < 1.0) {
    double factor = decayFactor;
    long now = System.currentTimeMillis();
    timestampRatingRDD = timestampRatingRDD.mapToPair(timestampRating -> {
        long timestamp = timestampRating._1();
        return new Tuple2<>(timestamp, decayRating(timestampRating._2(), timestamp, now, factor));
      });
  }

  if (decayZeroThreshold > 0.0) {
    double theThreshold = decayZeroThreshold;
    timestampRatingRDD = timestampRatingRDD.filter(timestampRating -> timestampRating._2().rating() > theThreshold);
  }

  return timestampRatingRDD.sortByKey().values();
}

Source File: MLSupporter.java From DDF with Apache License 2.0

5 votes

/**
 * Override this to return the approriate DDF representation matching that specified in {@link ParamInfo}. The base
 * implementation simply returns the DDF.
 *
 * @param paramInfo
 * @return
 */
@SuppressWarnings("unchecked")
@Override
protected Object convertDDF(ParamInfo paramInfo) throws DDFException {
  mLog.info(">>>> Running ConvertDDF of io.ddf.spark.ml.MLSupporter");
  if (paramInfo.argMatches(RDD.class)) {
    // Yay, our target data format is an RDD!
    RDD<?> rdd = null;

    if (paramInfo.paramMatches(LabeledPoint.class)) {
      rdd = (RDD<LabeledPoint>) this.getDDF().getRepresentationHandler().get(RDD.class, LabeledPoint.class);

    } else if (paramInfo.paramMatches(Vector.class)) {
      rdd = (RDD<Vector>) this.getDDF().getRepresentationHandler().get(RDD.class, Vector.class);
    } else if (paramInfo.paramMatches(double[].class)) {
      rdd = (RDD<double[]>) this.getDDF().getRepresentationHandler().get(RDD.class, double[].class);
    } else if (paramInfo.paramMatches(io.ddf.types.Vector.class)) {
      rdd = (RDD<io.ddf.types.Vector>) this.getDDF().getRepresentationHandler()
          .get(RDD.class, io.ddf.types.Vector.class);
    } else if (paramInfo.paramMatches(TupleMatrixVector.class)) {
      rdd = (RDD<TupleMatrixVector>) this.getDDF().getRepresentationHandler().get(RDD.class, TupleMatrixVector.class);
    } else if (paramInfo.paramMatches(Rating.class)) {
      rdd = (RDD<Rating>) this.getDDF().getRepresentationHandler().get(RDD.class, Rating.class);
    }
    //      else if (paramInfo.paramMatches(TablePartition.class)) {
    //        rdd = (RDD<TablePartition>) this.getDDF().getRepresentationHandler().get(RDD.class, TablePartition.class);
    //      }
    else if (paramInfo.paramMatches(Object.class)) {
      rdd = (RDD<Object[]>) this.getDDF().getRepresentationHandler().get(RDD.class, Object[].class);
    }

    return rdd;
  } else {
    return super.convertDDF(paramInfo);
  }
}

Source File: JavaALS.java From SparkDemo with MIT License

5 votes

@Override
public Rating call(String line) {
  String[] tok = COMMA.split(line);
  int x = Integer.parseInt(tok[0]);
  int y = Integer.parseInt(tok[1]);
  double rating = Double.parseDouble(tok[2]);
  return new Rating(x, y, rating);
}

Source File: CollabFilterCassandra8.java From Spark-Cassandra-Collabfiltering with Apache License 2.0

4 votes

public MatrixFactorizationModel train(JavaSparkContext sparkCtx, CassandraConnector cassandraConnector) {
	CassandraJavaRDD<CassandraRow> trainingRdd = javaFunctions(sparkCtx).cassandraTable(RatingDO.EMPLOYERRATINGS_KEYSPACE, RatingDO.RATINGS_TABLE);
	JavaRDD<Rating> trainingJavaRdd = trainingRdd.map(trainingRow -> new Rating(trainingRow.getInt(RatingDO.USER_COL), trainingRow.getInt(RatingDO.PRODUCT_COL), trainingRow.getDouble(RatingDO.RATING_COL)));
	MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(trainingJavaRdd), RANK, ITER, LAMBDA);
	return model;
}

Source File: CollabFilterCassandra8.java From Spark-Cassandra-Collabfiltering with Apache License 2.0

4 votes

public JavaRDD<Rating> predict(MatrixFactorizationModel model, CassandraJavaRDD<CassandraRow> validationsCassRdd) {
	RDD<Tuple2<Object, Object>> validationsRdd = JavaRDD.toRDD(validationsCassRdd.map(validationRow -> new Tuple2<Object, Object>(validationRow.getInt(RatingDO.USER_COL), validationRow.getInt(RatingDO.PRODUCT_COL))));
	JavaRDD<Rating> predictionJavaRdd = model.predict(validationsRdd).toJavaRDD();
	return predictionJavaRdd;
}

Source File: CollabFilterCassandra8.java From Spark-Cassandra-Collabfiltering with Apache License 2.0

4 votes

public String resultsReport(JavaRDD<Rating> predJavaRdd, CassandraJavaRDD<CassandraRow> validationsCassRdd, double rmse) {
	return "User\tProduct\tPredicted\tActual\tError?\n" + predictionString(predJavaRdd, validationsCassRdd) + "\n" + "RMSE = " + Util.round(rmse, 2);
}

Source File: ALSUpdate.java From oryx with Apache License 2.0

4 votes

@Override
public double evaluate(JavaSparkContext sparkContext,
                       PMML model,
                       Path modelParentPath,
                       JavaRDD<String> testData,
                       JavaRDD<String> trainData) {

  JavaRDD<String[]> parsedTestRDD = testData.map(MLFunctions.PARSE_FN);
  parsedTestRDD.cache();

  Map<String,Integer> userIDToIndex = buildIDIndexOneWayMap(model, parsedTestRDD, true);
  Map<String,Integer> itemIDToIndex = buildIDIndexOneWayMap(model, parsedTestRDD, false);

  log.info("Broadcasting ID-index mappings for {} users, {} items",
           userIDToIndex.size(), itemIDToIndex.size());

  Broadcast<Map<String,Integer>> bUserIDToIndex = sparkContext.broadcast(userIDToIndex);
  Broadcast<Map<String,Integer>> bItemIDToIndex = sparkContext.broadcast(itemIDToIndex);

  JavaRDD<Rating> testRatingData = parsedToRatingRDD(parsedTestRDD, bUserIDToIndex, bItemIDToIndex);
  double epsilon = Double.NaN;
  if (logStrength) {
    epsilon = Double.parseDouble(AppPMMLUtils.getExtensionValue(model, "epsilon"));
  }
  testRatingData = aggregateScores(testRatingData, epsilon);

  MatrixFactorizationModel mfModel =
      pmmlToMFModel(sparkContext, model, modelParentPath, bUserIDToIndex, bItemIDToIndex);

  parsedTestRDD.unpersist();

  double eval;
  if (implicit) {
    double auc = Evaluation.areaUnderCurve(sparkContext, mfModel, testRatingData);
    log.info("AUC: {}", auc);
    eval = auc;
  } else {
    double rmse = Evaluation.rmse(mfModel, testRatingData);
    log.info("RMSE: {}", rmse);
    eval = -rmse;
  }
  unpersist(mfModel);

  bUserIDToIndex.unpersist();
  bItemIDToIndex.unpersist();

  return eval;
}

Source File: ALSUpdate.java From oryx with Apache License 2.0

4 votes

@Override
public PMML buildModel(JavaSparkContext sparkContext,
                       JavaRDD<String> trainData,
                       List<?> hyperParameters,
                       Path candidatePath) {
  int features = (Integer) hyperParameters.get(0);
  double lambda = (Double) hyperParameters.get(1);
  double alpha = (Double) hyperParameters.get(2);
  double epsilon = Double.NaN;
  if (logStrength) {
    epsilon = (Double) hyperParameters.get(3);
  }
  Preconditions.checkArgument(features > 0);
  Preconditions.checkArgument(lambda >= 0.0);
  Preconditions.checkArgument(alpha > 0.0);
  if (logStrength) {
    Preconditions.checkArgument(epsilon > 0.0);
  }

  JavaRDD<String[]> parsedRDD = trainData.map(MLFunctions.PARSE_FN);
  parsedRDD.cache();

  Map<String,Integer> userIDIndexMap = buildIDIndexMapping(parsedRDD, true);
  Map<String,Integer> itemIDIndexMap = buildIDIndexMapping(parsedRDD, false);

  log.info("Broadcasting ID-index mappings for {} users, {} items",
           userIDIndexMap.size(), itemIDIndexMap.size());

  Broadcast<Map<String,Integer>> bUserIDToIndex = sparkContext.broadcast(userIDIndexMap);
  Broadcast<Map<String,Integer>> bItemIDToIndex = sparkContext.broadcast(itemIDIndexMap);

  JavaRDD<Rating> trainRatingData = parsedToRatingRDD(parsedRDD, bUserIDToIndex, bItemIDToIndex);
  trainRatingData = aggregateScores(trainRatingData, epsilon);
  ALS als = new ALS()
      .setRank(features)
      .setIterations(iterations)
      .setLambda(lambda)
      .setCheckpointInterval(5);
  if (implicit) {
    als = als.setImplicitPrefs(true).setAlpha(alpha);
  }

  RDD<Rating> trainingRatingDataRDD = trainRatingData.rdd();
  trainingRatingDataRDD.cache();
  MatrixFactorizationModel model = als.run(trainingRatingDataRDD);
  trainingRatingDataRDD.unpersist(false);

  bUserIDToIndex.unpersist();
  bItemIDToIndex.unpersist();

  parsedRDD.unpersist();

  Broadcast<Map<Integer,String>> bUserIndexToID = sparkContext.broadcast(invertMap(userIDIndexMap));
  Broadcast<Map<Integer,String>> bItemIndexToID = sparkContext.broadcast(invertMap(itemIDIndexMap));

  PMML pmml = mfModelToPMML(model,
                            features,
                            lambda,
                            alpha,
                            epsilon,
                            implicit,
                            logStrength,
                            candidatePath,
                            bUserIndexToID,
                            bItemIndexToID);
  unpersist(model);

  bUserIndexToID.unpersist();
  bItemIndexToID.unpersist();

  return pmml;
}

Source File: MLMetricsSupporter.java From DDF with Apache License 2.0

4 votes

public double rmse(DDF predictedDDF, boolean implicitPrefs) throws DDFException {
  RDD<Rating> predictions = (RDD<Rating>) predictedDDF.getRepresentationHandler().get(RDD.class, Rating.class);
  RDD<Rating> ratings = (RDD<Rating>) this.getDDF().getRepresentationHandler().get(RDD.class, Rating.class);
  return new ROCComputer().computeRmse(ratings, predictions, false);
}

Source File: CollabFilterCassandra7.java From Spark-Cassandra-Collabfiltering with Apache License 2.0

4 votes

@Override
public String resultsReport(JavaRDD<Rating> predJavaRdd, CassandraJavaRDD<CassandraRow> validationsCassRdd, double rmse) {
	return "User\tProduct\tPredicted\tActual\tError?\n" + predictionString(predJavaRdd, validationsCassRdd) + "\n" + "RMSE = " + Util.round(rmse, 2);
	
}

Source File: ICollabFilterCassandra.java From Spark-Cassandra-Collabfiltering with Apache License 2.0

votes

JavaRDD<Rating> predict(MatrixFactorizationModel model, CassandraJavaRDD<CassandraRow> validationsCassRdd);

Source File: ICollabFilterCassandra.java From Spark-Cassandra-Collabfiltering with Apache License 2.0

votes

double validate(JavaRDD<Rating> predictionJavaRdd, CassandraJavaRDD<CassandraRow> validationsCassRdd);

Source File: ICollabFilterCassandra.java From Spark-Cassandra-Collabfiltering with Apache License 2.0

votes

public abstract String resultsReport(JavaRDD<Rating> predJavaRdd, CassandraJavaRDD<CassandraRow> validationsCassRdd, double rmse);

org.apache.spark.mllib.recommendation.Rating Java Examples