com.datastax.spark.connector.japi.CassandraJavaUtil Java Examples

The following examples show how to use com.datastax.spark.connector.japi.CassandraJavaUtil. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BatchTrafficDataProcessor.java    From lambda-arch with Apache License 2.0 6 votes vote down vote up
private void persistWindowTraffic(JavaRDD<WindowTrafficData> trafficDStream) {
    // Map Cassandra table column
    Map<String, String> columnNameMappings = new HashMap<>();
    columnNameMappings.put("routeId", "routeid");
    columnNameMappings.put("vehicleType", "vehicletype");
    columnNameMappings.put("totalCount", "totalcount");
    columnNameMappings.put("timeStamp", "timestamp");
    columnNameMappings.put("recordDate", "recorddate");

    // call CassandraStreamingJavaUtil function to save in DB
    CassandraJavaUtil.javaFunctions(trafficDStream).writerBuilder(
            "traffickeyspace",
            "window_traffic_batch",
            CassandraJavaUtil.mapToRow(WindowTrafficData.class, columnNameMappings)
    ).saveToCassandra();
}
 
Example #2
Source File: BatchTrafficDataProcessor.java    From lambda-arch with Apache License 2.0 6 votes vote down vote up
private void persistPOI(JavaRDD<POITrafficData> trafficDStream) {
    // Map Cassandra table column
    Map<String, String> columnNameMappings = new HashMap<String, String>();
    columnNameMappings.put("vehicleId", "vehicleid");
    columnNameMappings.put("distance", "distance");
    columnNameMappings.put("vehicleType", "vehicletype");
    columnNameMappings.put("timeStamp", "timestamp");

    // call CassandraStreamingJavaUtil function to save in DB
    CassandraJavaUtil.javaFunctions(trafficDStream)
            .writerBuilder(
                    "traffickeyspace",
                    "poi_traffic_batch",
                    CassandraJavaUtil.mapToRow(POITrafficData.class, columnNameMappings)
            )
            .withConstantTTL(120)//keeping data for 2 minutes
            .saveToCassandra();
}
 
Example #3
Source File: IoTTrafficDataProcessor.java    From iot-traffic-monitor with Apache License 2.0 6 votes vote down vote up
/**
 * Method to get window traffic counts of different type of vehicles for each route.
 * Window duration = 30 seconds and Slide interval = 10 seconds
 * 
 * @param filteredIotDataStream IoT data stream
 */
public void processWindowTrafficData(JavaDStream<IoTData> filteredIotDataStream) {

	// reduce by key and window (30 sec window and 10 sec slide).
	JavaPairDStream<AggregateKey, Long> countDStreamPair = filteredIotDataStream
			.mapToPair(iot -> new Tuple2<>(new AggregateKey(iot.getRouteId(), iot.getVehicleType()), 1L))
			.reduceByKeyAndWindow((a, b) -> a + b, Durations.seconds(30), Durations.seconds(10));

	// Transform to dstream of TrafficData
	JavaDStream<WindowTrafficData> trafficDStream = countDStreamPair.map(windowTrafficDataFunc);

	// Map Cassandra table column
	Map<String, String> columnNameMappings = new HashMap<String, String>();
	columnNameMappings.put("routeId", "routeid");
	columnNameMappings.put("vehicleType", "vehicletype");
	columnNameMappings.put("totalCount", "totalcount");
	columnNameMappings.put("timeStamp", "timestamp");
	columnNameMappings.put("recordDate", "recorddate");

	// call CassandraStreamingJavaUtil function to save in DB
	javaFunctions(trafficDStream).writerBuilder("traffickeyspace", "window_traffic",
			CassandraJavaUtil.mapToRow(WindowTrafficData.class, columnNameMappings)).saveToCassandra();
}
 
Example #4
Source File: BatchTrafficDataProcessor.java    From lambda-arch with Apache License 2.0 5 votes vote down vote up
private void persistTotalTraffic(JavaRDD<TotalTrafficData> trafficDStream) {
    // Map Cassandra table column
    Map<String, String> columnNameMappings = new HashMap<String, String>();
    columnNameMappings.put("routeId", "routeid");
    columnNameMappings.put("vehicleType", "vehicletype");
    columnNameMappings.put("totalCount", "totalcount");
    columnNameMappings.put("timeStamp", "timestamp");
    columnNameMappings.put("recordDate", "recorddate");

    CassandraJavaUtil.javaFunctions(trafficDStream).writerBuilder(
            "traffickeyspace",
            "total_traffic_batch",
            CassandraJavaUtil.mapToRow(TotalTrafficData.class, columnNameMappings)
    ).saveToCassandra();
}
 
Example #5
Source File: RealTimeHeatMapProcessor.java    From lambda-arch with Apache License 2.0 5 votes vote down vote up
private void save(JavaDStream<HeatMapData> heatMapStream) {
    // Map Cassandra table column
    Map<String, String> columnNameMappings = new HashMap<>();
    columnNameMappings.put("latitude", "latitude");
    columnNameMappings.put("longitude", "longitude");
    columnNameMappings.put("totalCount", "totalcount");
    columnNameMappings.put("timeStamp", "timestamp");

    // call CassandraStreamingJavaUtil function to save in DB
    javaFunctions(heatMapStream).writerBuilder(
            "traffickeyspace",
            "heat_map",
            CassandraJavaUtil.mapToRow(HeatMapData.class, columnNameMappings)
    ).saveToCassandra();
}
 
Example #6
Source File: BatchHeatMapProcessor.java    From lambda-arch with Apache License 2.0 5 votes vote down vote up
private void save(JavaRDD<HeatMapData> mapDataJavaRDD) {
    Map<String, String> columnNameMappings = new HashMap<>();
    columnNameMappings.put("latitude", "latitude");
    columnNameMappings.put("longitude", "longitude");
    columnNameMappings.put("totalCount", "totalcount");
    columnNameMappings.put("timeStamp", "timestamp");

    CassandraJavaUtil.javaFunctions(mapDataJavaRDD).writerBuilder(
            "traffickeyspace",
            "heat_map_batch",
            CassandraJavaUtil.mapToRow(HeatMapData.class, columnNameMappings)
    ).saveToCassandra();
}
 
Example #7
Source File: RealtimeTrafficDataProcessor.java    From lambda-arch with Apache License 2.0 5 votes vote down vote up
/**
 * Method to get total traffic counts of different type of vehicles for each route.
 *
 * @param filteredIotDataStream IoT data stream
 */
public void processTotalTrafficData(JavaDStream<IoTData> filteredIotDataStream) {

    // We need to get count of vehicle group by routeId and vehicleType
    JavaPairDStream<AggregateKey, Long> countDStreamPair = filteredIotDataStream
            .mapToPair(iot -> new Tuple2<>(new AggregateKey(iot.getRouteId(), iot.getVehicleType()), 1L))
            .reduceByKey((a, b) -> a + b);

    // Need to keep state for total count
    StateSpec<AggregateKey, Long, Long, Tuple2<AggregateKey, Long>> stateSpec =
            StateSpec.function(totalSumFunc).timeout(Durations.seconds(3600));

    JavaMapWithStateDStream<AggregateKey, Long, Long, Tuple2<AggregateKey, Long>> countDStreamWithStatePair =
            countDStreamPair.mapWithState(stateSpec);//maintain state for one hour

    // Transform to dstream of TrafficData
    JavaDStream<Tuple2<AggregateKey, Long>> countDStream = countDStreamWithStatePair.map(tuple2 -> tuple2);
    JavaDStream<TotalTrafficData> trafficDStream = countDStream.map(totalTrafficDataFunc);

    // Map Cassandra table column
    Map<String, String> columnNameMappings = new HashMap<String, String>();
    columnNameMappings.put("routeId", "routeid");
    columnNameMappings.put("vehicleType", "vehicletype");
    columnNameMappings.put("totalCount", "totalcount");
    columnNameMappings.put("timeStamp", "timestamp");
    columnNameMappings.put("recordDate", "recorddate");

    // call CassandraStreamingJavaUtil function to save in DB
    javaFunctions(trafficDStream).writerBuilder(
            "traffickeyspace",
            "total_traffic",
            CassandraJavaUtil.mapToRow(TotalTrafficData.class, columnNameMappings)
    ).saveToCassandra();
}
 
Example #8
Source File: RealtimeTrafficDataProcessor.java    From lambda-arch with Apache License 2.0 5 votes vote down vote up
/**
 * Method to get window traffic counts of different type of vehicles for each route.
 * Window duration = 30 seconds and Slide interval = 10 seconds
 *
 * @param filteredIotDataStream IoT data stream
 */
public void processWindowTrafficData(JavaDStream<IoTData> filteredIotDataStream) {

    // reduce by key and window (30 sec window and 10 sec slide).
    JavaPairDStream<AggregateKey, Long> countDStreamPair = filteredIotDataStream
            .mapToPair(iot -> new Tuple2<>(
                    new AggregateKey(iot.getRouteId(), iot.getVehicleType()),
                    1L
            ))
            .reduceByKeyAndWindow((a, b) -> a + b,
                    Durations.seconds(30),
                    Durations.seconds(10)
            );

    // Transform to dstream of TrafficData
    JavaDStream<WindowTrafficData> trafficDStream = countDStreamPair.map(windowTrafficDataFunc);

    // Map Cassandra table column
    Map<String, String> columnNameMappings = new HashMap<String, String>();
    columnNameMappings.put("routeId", "routeid");
    columnNameMappings.put("vehicleType", "vehicletype");
    columnNameMappings.put("totalCount", "totalcount");
    columnNameMappings.put("timeStamp", "timestamp");
    columnNameMappings.put("recordDate", "recorddate");

    // call CassandraStreamingJavaUtil function to save in DB
    javaFunctions(trafficDStream).writerBuilder(
            "traffickeyspace",
            "window_traffic",
            CassandraJavaUtil.mapToRow(WindowTrafficData.class, columnNameMappings)
    ).saveToCassandra();
}
 
Example #9
Source File: RealtimeTrafficDataProcessor.java    From lambda-arch with Apache License 2.0 5 votes vote down vote up
/**
 * Method to get the vehicles which are in radius of POI and their distance from POI.
 *
 * @param nonFilteredIotDataStream original IoT data stream
 * @param broadcastPOIValues       variable containing POI coordinates, route and vehicle types to monitor.
 */
public void processPOIData(
        JavaDStream<IoTData> nonFilteredIotDataStream,
        Broadcast<Tuple3<POIData, String, String>> broadcastPOIValues
) {

    // Filter by routeId,vehicleType and in POI range
    JavaDStream<IoTData> iotDataStreamFiltered = nonFilteredIotDataStream
            .filter(iot -> (iot.getRouteId().equals(broadcastPOIValues.value()._2())
                    && iot.getVehicleType().contains(broadcastPOIValues.value()._3())
                    && GeoDistanceCalculator.isInPOIRadius(Double.valueOf(iot.getLatitude()),
                    Double.valueOf(iot.getLongitude()), broadcastPOIValues.value()._1().getLatitude(),
                    broadcastPOIValues.value()._1().getLongitude(),
                    broadcastPOIValues.value()._1().getRadius())));

    // pair with poi
    JavaPairDStream<IoTData, POIData> poiDStreamPair = iotDataStreamFiltered.mapToPair(
            iot -> new Tuple2<>(iot, broadcastPOIValues.value()._1())
    );

    // Transform to dstream of POITrafficData
    JavaDStream<POITrafficData> trafficDStream = poiDStreamPair.map(poiTrafficDataFunc);

    // Map Cassandra table column
    Map<String, String> columnNameMappings = new HashMap<String, String>();
    columnNameMappings.put("vehicleId", "vehicleid");
    columnNameMappings.put("distance", "distance");
    columnNameMappings.put("vehicleType", "vehicletype");
    columnNameMappings.put("timeStamp", "timestamp");

    // call CassandraStreamingJavaUtil function to save in DB
    javaFunctions(trafficDStream)
            .writerBuilder(
                    "traffickeyspace",
                    "poi_traffic",
                    CassandraJavaUtil.mapToRow(POITrafficData.class, columnNameMappings)
            )
            .withConstantTTL(120)//keeping data for 2 minutes
            .saveToCassandra();
}
 
Example #10
Source File: IoTTrafficDataProcessor.java    From iot-traffic-monitor with Apache License 2.0 5 votes vote down vote up
/**
 * Method to get total traffic counts of different type of vehicles for each route.
 * 
 * @param filteredIotDataStream IoT data stream
 */
public void processTotalTrafficData(JavaDStream<IoTData> filteredIotDataStream) {

	// We need to get count of vehicle group by routeId and vehicleType
	JavaPairDStream<AggregateKey, Long> countDStreamPair = filteredIotDataStream
			.mapToPair(iot -> new Tuple2<>(new AggregateKey(iot.getRouteId(), iot.getVehicleType()), 1L))
			.reduceByKey((a, b) -> a + b);
	
	// Need to keep state for total count
	JavaMapWithStateDStream<AggregateKey, Long, Long, Tuple2<AggregateKey, Long>> countDStreamWithStatePair = countDStreamPair
			.mapWithState(StateSpec.function(totalSumFunc).timeout(Durations.seconds(3600)));//maintain state for one hour

	// Transform to dstream of TrafficData
	JavaDStream<Tuple2<AggregateKey, Long>> countDStream = countDStreamWithStatePair.map(tuple2 -> tuple2);
	JavaDStream<TotalTrafficData> trafficDStream = countDStream.map(totalTrafficDataFunc);

	// Map Cassandra table column
	Map<String, String> columnNameMappings = new HashMap<String, String>();
	columnNameMappings.put("routeId", "routeid");
	columnNameMappings.put("vehicleType", "vehicletype");
	columnNameMappings.put("totalCount", "totalcount");
	columnNameMappings.put("timeStamp", "timestamp");
	columnNameMappings.put("recordDate", "recorddate");

	// call CassandraStreamingJavaUtil function to save in DB
	javaFunctions(trafficDStream).writerBuilder("traffickeyspace", "total_traffic",
			CassandraJavaUtil.mapToRow(TotalTrafficData.class, columnNameMappings)).saveToCassandra();
}
 
Example #11
Source File: IoTTrafficDataProcessor.java    From iot-traffic-monitor with Apache License 2.0 5 votes vote down vote up
/**
 * Method to get the vehicles which are in radius of POI and their distance from POI.
 * 
 * @param nonFilteredIotDataStream original IoT data stream
 * @param broadcastPOIValues variable containing POI coordinates, route and vehicle types to monitor.
 */
public void processPOIData(JavaDStream<IoTData> nonFilteredIotDataStream,Broadcast<Tuple3<POIData, String, String>> broadcastPOIValues) {
	 
	// Filter by routeId,vehicleType and in POI range
	JavaDStream<IoTData> iotDataStreamFiltered = nonFilteredIotDataStream
			.filter(iot -> (iot.getRouteId().equals(broadcastPOIValues.value()._2())
					&& iot.getVehicleType().contains(broadcastPOIValues.value()._3())
					&& GeoDistanceCalculator.isInPOIRadius(Double.valueOf(iot.getLatitude()),
							Double.valueOf(iot.getLongitude()), broadcastPOIValues.value()._1().getLatitude(),
							broadcastPOIValues.value()._1().getLongitude(),
							broadcastPOIValues.value()._1().getRadius())));

	// pair with poi
	JavaPairDStream<IoTData, POIData> poiDStreamPair = iotDataStreamFiltered
			.mapToPair(iot -> new Tuple2<>(iot, broadcastPOIValues.value()._1()));

	// Transform to dstream of POITrafficData
	JavaDStream<POITrafficData> trafficDStream = poiDStreamPair.map(poiTrafficDataFunc);

	// Map Cassandra table column
	Map<String, String> columnNameMappings = new HashMap<String, String>();
	columnNameMappings.put("vehicleId", "vehicleid");
	columnNameMappings.put("distance", "distance");
	columnNameMappings.put("vehicleType", "vehicletype");
	columnNameMappings.put("timeStamp", "timestamp");

	// call CassandraStreamingJavaUtil function to save in DB
	javaFunctions(trafficDStream)
			.writerBuilder("traffickeyspace", "poi_traffic",CassandraJavaUtil.mapToRow(POITrafficData.class, columnNameMappings))
			.withConstantTTL(120)//keeping data for 2 minutes
			.saveToCassandra();
}
 
Example #12
Source File: CollabFilterCassandraDriver.java    From Spark-Cassandra-Collabfiltering with Apache License 2.0 4 votes vote down vote up
private void saveToCassandra(JavaRDD<RatingDO> rdd, String table) {
	RDDAndDStreamCommonJavaFunctions<RatingDO>.WriterBuilder writerBuilder = CassandraJavaUtil.javaFunctions(rdd).writerBuilder(RatingDO.EMPLOYERRATINGS_KEYSPACE, table, CassandraJavaUtil.mapToRow(RatingDO.class));
	writerBuilder.saveToCassandra();
}
 
Example #13
Source File: CassandraExample.java    From Apache-Spark-2x-for-Java-Developers with MIT License 3 votes vote down vote up
@SuppressWarnings("deprecation")
public static void main(String[] args) {
	System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");
	SparkConf conf =new SparkConf().setMaster("local").setAppName("Cassandra Example");
	conf.set("spark.cassandra.connection.host", "127.0.0.1");
	//conf.set("spark.sql.warehouse.dir", "file:////C:/Users/sgulati/spark-warehouse");
	
	JavaSparkContext jsc=new JavaSparkContext(conf);
	JavaRDD<Employee> cassandraTable = CassandraJavaUtil.javaFunctions(jsc).cassandraTable("my_keyspace", "emp",CassandraJavaUtil.mapRowTo(Employee.class));
	
    JavaRDD<String> selectEmpDept = CassandraJavaUtil.javaFunctions(jsc).cassandraTable("my_keyspace", "emp",CassandraJavaUtil.mapColumnTo(String.class)).select("emp_dept","emp_name");
	
	 
    cassandraTable.collect().forEach(System.out::println);
    //selectEmpDept.collect().forEach(System.out::println);
	
    
    CassandraJavaUtil.javaFunctions(cassandraTable)
       .writerBuilder("my_keyspace", "emp1", CassandraJavaUtil.mapToRow(Employee.class)).saveToCassandra();
    
	/*SQLContext sqlContext = new SQLContext(jsc);
	
	Map<String,String> map =new HashMap<>();
	map.put("table" , "emp");
	map.put("keyspace", "my_keyspace");
	
	Dataset<Row> df = sqlContext.read().format("org.apache.spark.sql.cassandra")
	  .options(map)
	  .load();
	
	df.show();*/
	
	
	
}