com.datastax.spark.connector.japi.CassandraRow Java Examples
The following examples show how to use
com.datastax.spark.connector.japi.CassandraRow.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CassandraRowToSpan.java From zipkin-dependencies with Apache License 2.0 | 5 votes |
private Endpoint readEndpoint(CassandraRow row, String name) { if (!inTest) { return readEndpoint(row.getUDTValue(name)); } // UDT type doesn't work in tests // Caused by: com.datastax.spark.connector.types.TypeConversionException: Cannot convert object zipkin2.storage.cassandra.Schema$EndpointUDT@67a3fdf8 of type class zipkin2.storage.cassandra.Schema$EndpointUDT to com.datastax.spark.connector.japi.UDTValue. return readEndpoint(row.getObject(name)); }
Example #2
Source File: CollabFilterCassandraDriver.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | 5 votes |
double trainAndValidate(int version) throws InstantiationException, IllegalAccessException, ClassNotFoundException { final ICollabFilterCassandra cfc; String className = "collabfilter.CollabFilterCassandra" + version; cfc = (ICollabFilterCassandra) Class.forName(className).newInstance(); try (Session session = this.cassandraConnector.openSession()) { MatrixFactorizationModel model = cfc.train(this.sparkCtx, this.cassandraConnector); CassandraJavaRDD<CassandraRow> validationsCassRdd = javaFunctions(this.sparkCtx).cassandraTable(RatingDO.EMPLOYERRATINGS_KEYSPACE, RatingDO.VALIDATION_TABLE); JavaRDD<Rating> predictionJavaRdd = cfc.predict(model, validationsCassRdd); double rmse = cfc.validate(predictionJavaRdd, validationsCassRdd); System.out.println(cfc.resultsReport(predictionJavaRdd, validationsCassRdd, rmse)); return rmse; } }
Example #3
Source File: CollabFilterCassandra7.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | 5 votes |
public double validate(JavaRDD<Rating> predictionJavaRdd, CassandraJavaRDD<CassandraRow> validationsCassRdd) { JavaPairRDD<Tuple2<Integer, Integer>, Double> predictionsJavaPairs = JavaPairRDD.fromJavaRDD(predictionJavaRdd.map(new org.apache.spark.api.java.function.Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() { @Override public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating pred) throws Exception { return new Tuple2<Tuple2<Integer, Integer>, Double>(new Tuple2<Integer, Integer>(pred.user(), pred.product()), pred.rating()); } // })); JavaRDD<Rating> validationRatings = validationsCassRdd.map(new org.apache.spark.api.java.function.Function<CassandraRow, Rating>() { @Override public Rating call(CassandraRow validation) throws Exception { return new Rating(validation.getInt(RatingDO.USER_COL), validation.getInt(RatingDO.PRODUCT_COL), validation.getInt(RatingDO.RATING_COL)); } }); JavaRDD<Tuple2<Double, Double>> validationAndPredictions = JavaPairRDD.fromJavaRDD(validationRatings.map(new org.apache.spark.api.java.function.Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() { @Override public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating validationRating) throws Exception { return new Tuple2<Tuple2<Integer, Integer>, Double>(new Tuple2<Integer, Integer>(validationRating.user(), validationRating.product()), validationRating.rating()); } })).join(predictionsJavaPairs).values(); double meanSquaredError = JavaDoubleRDD.fromRDD(validationAndPredictions.map(new org.apache.spark.api.java.function.Function<Tuple2<Double, Double>, Object>() { @Override public Object call(Tuple2<Double, Double> pair) throws Exception { Double err = pair._1() - pair._2(); return (Object) (err * err);// No covariance! Need to cast } }).rdd()).mean(); double rmse = Math.sqrt(meanSquaredError); return rmse; }
Example #4
Source File: CollabFilterCassandra7.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | 5 votes |
public JavaRDD<Rating> predict(MatrixFactorizationModel model, CassandraJavaRDD<CassandraRow> validationsCassRdd) { RDD<Tuple2<Object, Object>> validationsRdd = JavaRDD.toRDD(validationsCassRdd.map(new org.apache.spark.api.java.function.Function<CassandraRow, Tuple2<Object, Object>>() { @Override public Tuple2<Object, Object> call(CassandraRow validationRow) throws Exception { return new Tuple2<Object, Object>(validationRow.getInt(RatingDO.USER_COL), validationRow.getInt(RatingDO.PRODUCT_COL)); } })); JavaRDD<Rating> predictionJavaRdd = model.predict(validationsRdd).toJavaRDD(); return predictionJavaRdd; }
Example #5
Source File: CollabFilterCassandra7.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | 5 votes |
public MatrixFactorizationModel train(JavaSparkContext sparkCtx, CassandraConnector cassandraConnector) { CassandraJavaRDD<CassandraRow> trainingRdd = javaFunctions(sparkCtx).cassandraTable(RatingDO.EMPLOYERRATINGS_KEYSPACE, RatingDO.RATINGS_TABLE); JavaRDD<Rating> trainingJavaRdd = trainingRdd.map(new org.apache.spark.api.java.function.Function<CassandraRow, Rating>() { @Override public Rating call(CassandraRow trainingRow) throws Exception { return new Rating(trainingRow.getInt(RatingDO.USER_COL), trainingRow.getInt(RatingDO.PRODUCT_COL), trainingRow.getDouble(RatingDO.RATING_COL)); } }); MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(trainingJavaRdd), RANK, ITER, LAMBDA); return model; }
Example #6
Source File: CollabFilterCassandra8.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | 5 votes |
private String predictionString(JavaRDD<Rating> predJavaRdd, CassandraJavaRDD<CassandraRow> validationsCassRdd) { java.util.function.Function<CassandraRow, Tuple2<Integer, Integer>> keyMapper = validationRow -> new Tuple2<Integer, Integer>(validationRow.getInt(RatingDO.USER_COL), validationRow.getInt(RatingDO.PRODUCT_COL)); java.util.function.Function<CassandraRow, Double> valueMapper = validationRow -> validationRow.getDouble(RatingDO.RATING_COL); java.util.Map<Tuple2<Integer, Integer>, Double> validationMap = validationsCassRdd.collect().stream().collect(Collectors.toMap(keyMapper, valueMapper)); java.util.function.Function<Rating, String> stringMapper = prediction -> { double validationRating = validationMap.get(new Tuple2<Integer, Integer>(prediction.user(), prediction.product())); String errWarningString = Math.abs(validationRating - prediction.rating()) >= 1 ? "ERR" : "OK"; return prediction.user() + "\t" + prediction.product() + "\t" + Util.round(prediction.rating()) + "\t\t" + Util.round(validationRating) + "\t" + errWarningString; }; Stream<Rating> sortedPredictions = predJavaRdd.collect().stream().sorted((o1, o2) -> o1.user() == o2.user() ? o1.product() - o2.product() : o1.user() - o2.user()); String ret = sortedPredictions.map(stringMapper).collect(Collectors.joining("\n")); return ret; }
Example #7
Source File: CollabFilterCassandra8.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | 5 votes |
public double validate(JavaRDD<Rating> predictionJavaRdd, CassandraJavaRDD<CassandraRow> validationsCassRdd) { JavaPairRDD<Tuple2<Integer, Integer>, Double> predictionsJavaPairs = JavaPairRDD.fromJavaRDD(predictionJavaRdd.map(pred -> new Tuple2<Tuple2<Integer, Integer>, Double>(new Tuple2<Integer, Integer>(pred.user(), pred.product()), pred.rating()))); JavaRDD<Rating> validationRatings = validationsCassRdd.map(validation -> new Rating(validation.getInt(RatingDO.USER_COL), validation.getInt(RatingDO.PRODUCT_COL), validation.getInt(RatingDO.RATING_COL))); JavaRDD<Tuple2<Double, Double>> validationAndPredictions = JavaPairRDD.fromJavaRDD(validationRatings.map(validationRating -> new Tuple2<Tuple2<Integer, Integer>, Double>(new Tuple2<Integer, Integer>(validationRating.user(), validationRating.product()), validationRating.rating()))).join(predictionsJavaPairs).values(); double meanSquaredError = JavaDoubleRDD.fromRDD(validationAndPredictions.map(pair -> { Double err = pair._1() - pair._2(); return (Object) (err * err);// No covariance! Need to cast to Object }).rdd()).mean(); double rmse = Math.sqrt(meanSquaredError); return rmse; }
Example #8
Source File: CassandraRowsToDependencyLinks.java From zipkin-dependencies with Apache License 2.0 | 5 votes |
@Override public Iterable<DependencyLink> call(Iterable<CassandraRow> rows) { if (logInitializer != null) logInitializer.run(); V1ThriftSpanReader reader = V1ThriftSpanReader.create(); V1SpanConverter converter = V1SpanConverter.create(); List<Span> sameTraceId = new ArrayList<>(); for (CassandraRow row : rows) { try { V1Span v1Span = reader.read(ReadBuffer.wrapUnsafe(row.getBytes("span"))); for (Span span : converter.convert(v1Span)) { // check to see if the trace is within the interval if (span.parentId() == null) { long timestamp = span.timestampAsLong(); if (timestamp == 0 || timestamp < startTs || timestamp > endTs) { return Collections.emptyList(); } } sameTraceId.add(span); } } catch (RuntimeException e) { log.warn( String.format( "Unable to decode span from traces where trace_id=%s and ts=%s and span_name='%s'", row.getLong("trace_id"), row.getDate("ts").getTime(), row.getString("span_name")), e); } } return new DependencyLinker().putTrace(sameTraceId).link(); }
Example #9
Source File: CassandraRowsToDependencyLinks.java From zipkin-dependencies with Apache License 2.0 | 5 votes |
@Override public Iterable<DependencyLink> call(Iterable<CassandraRow> rows) { if (logInitializer != null) logInitializer.run(); // use a hash set to dedupe any redundantly accepted spans Set<Span> sameTraceId = new LinkedHashSet<>(); for (CassandraRow row : rows) { Span span = cassandraRowToSpan.call(row); sameTraceId.add(span); } return spansToDependencyLinks.call(sameTraceId); }
Example #10
Source File: CassandraDependenciesJob.java From zipkin-dependencies with Apache License 2.0 | 5 votes |
JavaPairRDD<String, DependencyLink> flatMapToLinksByTraceId( CassandraTableScanJavaRDD<CassandraRow> spans, long microsUpper, long microsLower, boolean inTest ) { if (strictTraceId) { return spans.spanBy(r -> r.getString("trace_id"), String.class) .flatMapValues( new CassandraRowsToDependencyLinks(logInitializer, microsLower, microsUpper, inTest)); } return spans.map(new CassandraRowToSpan(inTest)) .groupBy(Span::traceId) // groupBy instead of spanBy because trace_id is mixed length .flatMapValues(new SpansToDependencyLinks(logInitializer, microsLower, microsUpper)); }
Example #11
Source File: CassandraRowToSpan.java From zipkin-dependencies with Apache License 2.0 | 5 votes |
@Override public Span call(CassandraRow row) { String traceId = CassandraDependenciesJob.traceId(row), spanId = row.getString("id"); Span.Builder builder = Span.newBuilder() .traceId(traceId) .parentId(row.getString("parent_id")) .id(spanId) .timestamp(row.getLong("ts")) .shared(row.getBoolean("shared")); Map<String, String> tags = row.getMap( "tags", TypeConverter.StringConverter$.MODULE$, TypeConverter.StringConverter$.MODULE$); String error = tags.get("error"); if (error != null) builder.putTag("error", error); String kind = row.getString("kind"); if (kind != null) { try { builder.kind(Span.Kind.valueOf(kind)); } catch (IllegalArgumentException ignored) { log.debug("couldn't parse kind {} in span {}/{}", kind, traceId, spanId); } } Endpoint localEndpoint = readEndpoint(row, "l_ep"); if (localEndpoint != null) { builder.localEndpoint(localEndpoint); } Endpoint remoteEndpoint = readEndpoint(row, "r_ep"); if (remoteEndpoint != null) { builder.remoteEndpoint(remoteEndpoint); } return builder.build(); }
Example #12
Source File: CollabFilterCassandra8.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | 4 votes |
public MatrixFactorizationModel train(JavaSparkContext sparkCtx, CassandraConnector cassandraConnector) { CassandraJavaRDD<CassandraRow> trainingRdd = javaFunctions(sparkCtx).cassandraTable(RatingDO.EMPLOYERRATINGS_KEYSPACE, RatingDO.RATINGS_TABLE); JavaRDD<Rating> trainingJavaRdd = trainingRdd.map(trainingRow -> new Rating(trainingRow.getInt(RatingDO.USER_COL), trainingRow.getInt(RatingDO.PRODUCT_COL), trainingRow.getDouble(RatingDO.RATING_COL))); MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(trainingJavaRdd), RANK, ITER, LAMBDA); return model; }
Example #13
Source File: CollabFilterCassandra8.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | 4 votes |
public JavaRDD<Rating> predict(MatrixFactorizationModel model, CassandraJavaRDD<CassandraRow> validationsCassRdd) { RDD<Tuple2<Object, Object>> validationsRdd = JavaRDD.toRDD(validationsCassRdd.map(validationRow -> new Tuple2<Object, Object>(validationRow.getInt(RatingDO.USER_COL), validationRow.getInt(RatingDO.PRODUCT_COL)))); JavaRDD<Rating> predictionJavaRdd = model.predict(validationsRdd).toJavaRDD(); return predictionJavaRdd; }
Example #14
Source File: CollabFilterCassandra8.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | 4 votes |
public String resultsReport(JavaRDD<Rating> predJavaRdd, CassandraJavaRDD<CassandraRow> validationsCassRdd, double rmse) { return "User\tProduct\tPredicted\tActual\tError?\n" + predictionString(predJavaRdd, validationsCassRdd) + "\n" + "RMSE = " + Util.round(rmse, 2); }
Example #15
Source File: CollabFilterCassandra7.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | 4 votes |
@Override public String resultsReport(JavaRDD<Rating> predJavaRdd, CassandraJavaRDD<CassandraRow> validationsCassRdd, double rmse) { return "User\tProduct\tPredicted\tActual\tError?\n" + predictionString(predJavaRdd, validationsCassRdd) + "\n" + "RMSE = " + Util.round(rmse, 2); }
Example #16
Source File: ICollabFilterCassandra.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | votes |
public abstract String resultsReport(JavaRDD<Rating> predJavaRdd, CassandraJavaRDD<CassandraRow> validationsCassRdd, double rmse);
Example #17
Source File: ICollabFilterCassandra.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | votes |
double validate(JavaRDD<Rating> predictionJavaRdd, CassandraJavaRDD<CassandraRow> validationsCassRdd);
Example #18
Source File: ICollabFilterCassandra.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | votes |
JavaRDD<Rating> predict(MatrixFactorizationModel model, CassandraJavaRDD<CassandraRow> validationsCassRdd);