org.apache.spark.mllib.recommendation.ALS Java Examples
The following examples show how to use
org.apache.spark.mllib.recommendation.ALS.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JavaALS.java From SparkDemo with MIT License | 5 votes |
public static void main(String[] args) { if (args.length < 4) { System.err.println( "Usage: JavaALS <ratings_file> <rank> <iterations> <output_dir> [<blocks>]"); System.exit(1); } SparkConf sparkConf = new SparkConf().setAppName("JavaALS"); int rank = Integer.parseInt(args[1]); int iterations = Integer.parseInt(args[2]); String outputDir = args[3]; int blocks = -1; if (args.length == 5) { blocks = Integer.parseInt(args[4]); } JavaSparkContext sc = new JavaSparkContext(sparkConf); JavaRDD<String> lines = sc.textFile(args[0]); JavaRDD<Rating> ratings = lines.map(new ParseRating()); MatrixFactorizationModel model = ALS.train(ratings.rdd(), rank, iterations, 0.01, blocks); model.userFeatures().toJavaRDD().map(new FeaturesToString()).saveAsTextFile( outputDir + "/userFeatures"); model.productFeatures().toJavaRDD().map(new FeaturesToString()).saveAsTextFile( outputDir + "/productFeatures"); System.out.println("Final user/product features written to " + outputDir); sc.stop(); }
Example #2
Source File: CollabFilterCassandra7.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | 5 votes |
public MatrixFactorizationModel train(JavaSparkContext sparkCtx, CassandraConnector cassandraConnector) { CassandraJavaRDD<CassandraRow> trainingRdd = javaFunctions(sparkCtx).cassandraTable(RatingDO.EMPLOYERRATINGS_KEYSPACE, RatingDO.RATINGS_TABLE); JavaRDD<Rating> trainingJavaRdd = trainingRdd.map(new org.apache.spark.api.java.function.Function<CassandraRow, Rating>() { @Override public Rating call(CassandraRow trainingRow) throws Exception { return new Rating(trainingRow.getInt(RatingDO.USER_COL), trainingRow.getInt(RatingDO.PRODUCT_COL), trainingRow.getDouble(RatingDO.RATING_COL)); } }); MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(trainingJavaRdd), RANK, ITER, LAMBDA); return model; }
Example #3
Source File: ALSUpdate.java From oryx with Apache License 2.0 | 4 votes |
@Override public PMML buildModel(JavaSparkContext sparkContext, JavaRDD<String> trainData, List<?> hyperParameters, Path candidatePath) { int features = (Integer) hyperParameters.get(0); double lambda = (Double) hyperParameters.get(1); double alpha = (Double) hyperParameters.get(2); double epsilon = Double.NaN; if (logStrength) { epsilon = (Double) hyperParameters.get(3); } Preconditions.checkArgument(features > 0); Preconditions.checkArgument(lambda >= 0.0); Preconditions.checkArgument(alpha > 0.0); if (logStrength) { Preconditions.checkArgument(epsilon > 0.0); } JavaRDD<String[]> parsedRDD = trainData.map(MLFunctions.PARSE_FN); parsedRDD.cache(); Map<String,Integer> userIDIndexMap = buildIDIndexMapping(parsedRDD, true); Map<String,Integer> itemIDIndexMap = buildIDIndexMapping(parsedRDD, false); log.info("Broadcasting ID-index mappings for {} users, {} items", userIDIndexMap.size(), itemIDIndexMap.size()); Broadcast<Map<String,Integer>> bUserIDToIndex = sparkContext.broadcast(userIDIndexMap); Broadcast<Map<String,Integer>> bItemIDToIndex = sparkContext.broadcast(itemIDIndexMap); JavaRDD<Rating> trainRatingData = parsedToRatingRDD(parsedRDD, bUserIDToIndex, bItemIDToIndex); trainRatingData = aggregateScores(trainRatingData, epsilon); ALS als = new ALS() .setRank(features) .setIterations(iterations) .setLambda(lambda) .setCheckpointInterval(5); if (implicit) { als = als.setImplicitPrefs(true).setAlpha(alpha); } RDD<Rating> trainingRatingDataRDD = trainRatingData.rdd(); trainingRatingDataRDD.cache(); MatrixFactorizationModel model = als.run(trainingRatingDataRDD); trainingRatingDataRDD.unpersist(false); bUserIDToIndex.unpersist(); bItemIDToIndex.unpersist(); parsedRDD.unpersist(); Broadcast<Map<Integer,String>> bUserIndexToID = sparkContext.broadcast(invertMap(userIDIndexMap)); Broadcast<Map<Integer,String>> bItemIndexToID = sparkContext.broadcast(invertMap(itemIDIndexMap)); PMML pmml = mfModelToPMML(model, features, lambda, alpha, epsilon, implicit, logStrength, candidatePath, bUserIndexToID, bItemIndexToID); unpersist(model); bUserIndexToID.unpersist(); bItemIndexToID.unpersist(); return pmml; }
Example #4
Source File: CollabFilterCassandra8.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | 4 votes |
public MatrixFactorizationModel train(JavaSparkContext sparkCtx, CassandraConnector cassandraConnector) { CassandraJavaRDD<CassandraRow> trainingRdd = javaFunctions(sparkCtx).cassandraTable(RatingDO.EMPLOYERRATINGS_KEYSPACE, RatingDO.RATINGS_TABLE); JavaRDD<Rating> trainingJavaRdd = trainingRdd.map(trainingRow -> new Rating(trainingRow.getInt(RatingDO.USER_COL), trainingRow.getInt(RatingDO.PRODUCT_COL), trainingRow.getDouble(RatingDO.RATING_COL))); MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(trainingJavaRdd), RANK, ITER, LAMBDA); return model; }