org.apache.spark.streaming.flume.FlumeUtils Java Examples

The following examples show how to use org.apache.spark.streaming.flume.FlumeUtils. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SparkStreamServiceImpl.java    From searchanalytics-bigdata with MIT License 6 votes vote down vote up
@Override
	public void startFlumeStream() {
		JavaDStream<SparkFlumeEvent> flumeStream = FlumeUtils.createStream(
				jssc, "localhost", 41111, StorageLevels.MEMORY_AND_DISK);

		QueryStringJDStreams queryStringJDStreams = new QueryStringJDStreams();

		// Run top top search query string stream
		queryStringJDStreams
				.topQueryStringsCountInLastOneHourUsingSparkFlumeEvent(flumeStream);

		// Run top product view stream
		//TODO: uncomment to get both stats.
//		queryStringJDStreams
//				.topProductViewsCountInLastOneHourUsingSparkFlumeEvent(flumeStream);
		jssc.start();
	}
 
Example #2
Source File: JavaFlumeEventCount.java    From SparkDemo with MIT License 5 votes vote down vote up
public static void main(String[] args) throws Exception {
  if (args.length != 2) {
    System.err.println("Usage: JavaFlumeEventCount <host> <port>");
    System.exit(1);
  }

  StreamingExamples.setStreamingLogLevels();

  String host = args[0];
  int port = Integer.parseInt(args[1]);

  Duration batchInterval = new Duration(2000);
  SparkConf sparkConf = new SparkConf().setAppName("JavaFlumeEventCount");
  JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, batchInterval);
  JavaReceiverInputDStream<SparkFlumeEvent> flumeStream =
    FlumeUtils.createStream(ssc, host, port);

  flumeStream.count();

  flumeStream.count().map(new Function<Long, String>() {
    @Override
    public String call(Long in) {
      return "Received " + in + " flume events.";
    }
  }).print();

  ssc.start();
  ssc.awaitTermination();
}
 
Example #3
Source File: SparkStreamingFromFlumeToHBaseWindowingExample.java    From SparkOnALog with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {
	if (args.length == 0) {
		System.err
				.println("Usage: SparkStreamingFromFlumeToHBaseWindowingExample {master} {host} {port} {table} {columnFamily} {windowInSeconds} {slideInSeconds");
		System.exit(1);
	}

	String master = args[0];
	String host = args[1];
	int port = Integer.parseInt(args[2]);
	String tableName = args[3];
	String columnFamily = args[4];
	int windowInSeconds = Integer.parseInt(args[5]);
	int slideInSeconds = Integer.parseInt(args[5]);
	
	Duration batchInterval = new Duration(2000);
	Duration windowInterval = new Duration(windowInSeconds * 1000);
	Duration slideInterval = new Duration(slideInSeconds * 1000);

	JavaStreamingContext sc = new JavaStreamingContext(master,
			"FlumeEventCount", batchInterval,
			System.getenv("SPARK_HOME"), "/home/cloudera/SparkOnALog.jar");
	
	final Broadcast<String> broadcastTableName = sc.sparkContext().broadcast(tableName);
	final Broadcast<String> broadcastColumnFamily = sc.sparkContext().broadcast(columnFamily);
	
	//JavaDStream<SparkFlumeEvent> flumeStream = sc.flumeStream(host, port);
	
	JavaDStream<SparkFlumeEvent> flumeStream = FlumeUtils.createStream(sc, host, port);
	
	
	JavaPairDStream<String, Integer> lastCounts = flumeStream
			.flatMap(new FlatMapFunction<SparkFlumeEvent, String>() {

				@Override
				public Iterable<String> call(SparkFlumeEvent event)
						throws Exception {
					String bodyString = new String(event.event().getBody()
							.array(), "UTF-8");
					return Arrays.asList(bodyString.split(" "));
				}
			}).map(new PairFunction<String, String, Integer>() {
				@Override
				public Tuple2<String, Integer> call(String str)
						throws Exception {
					return new Tuple2(str, 1);
				}
			}).reduceByKeyAndWindow(new Function2<Integer, Integer, Integer>() {

				@Override
				public Integer call(Integer x, Integer y) throws Exception {
					// TODO Auto-generated method stub
					return x.intValue() + y.intValue();
				}
			}, windowInterval, slideInterval);
			
			
			lastCounts.foreach(new Function2<JavaPairRDD<String,Integer>, Time, Void>() {

				@Override
				public Void call(JavaPairRDD<String, Integer> values,
						Time time) throws Exception {
					
					values.foreach(new VoidFunction<Tuple2<String, Integer>> () {

						@Override
						public void call(Tuple2<String, Integer> tuple)
								throws Exception {
							HBaseCounterIncrementor incrementor = 
									HBaseCounterIncrementor.getInstance(broadcastTableName.value(), broadcastColumnFamily.value());
							incrementor.incerment("Counter", tuple._1(), tuple._2());
							System.out.println("Counter:" + tuple._1() + "," + tuple._2());
							
						}} );
					
					return null;
				}});
	
	

	sc.start();

}
 
Example #4
Source File: SparkStreamingFromFlumeToHBaseExample.java    From SparkOnALog with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {
	if (args.length == 0) {
		System.err
				.println("Usage: SparkStreamingFromFlumeToHBaseExample {master} {host} {port} {table} {columnFamily}");
		System.exit(1);
	}

	String master = args[0];
	String host = args[1];
	int port = Integer.parseInt(args[2]);
	String tableName = args[3];
	String columnFamily = args[4];
	
	Duration batchInterval = new Duration(2000);

	JavaStreamingContext sc = new JavaStreamingContext(master,
			"FlumeEventCount", batchInterval,
			System.getenv("SPARK_HOME"), "/home/cloudera/SparkOnALog.jar");
	
	final Broadcast<String> broadcastTableName = sc.sparkContext().broadcast(tableName);
	final Broadcast<String> broadcastColumnFamily = sc.sparkContext().broadcast(columnFamily);
	
	//JavaDStream<SparkFlumeEvent> flumeStream = sc.flumeStream(host, port);
	
	JavaDStream<SparkFlumeEvent> flumeStream = FlumeUtils.createStream(sc, host, port);
	
	JavaPairDStream<String, Integer> lastCounts = flumeStream
			.flatMap(new FlatMapFunction<SparkFlumeEvent, String>() {

				@Override
				public Iterable<String> call(SparkFlumeEvent event)
						throws Exception {
					String bodyString = new String(event.event().getBody()
							.array(), "UTF-8");
					return Arrays.asList(bodyString.split(" "));
				}
			}).map(new PairFunction<String, String, Integer>() {
				@Override
				public Tuple2<String, Integer> call(String str)
						throws Exception {
					return new Tuple2(str, 1);
				}
			}).reduceByKey(new Function2<Integer, Integer, Integer>() {

				@Override
				public Integer call(Integer x, Integer y) throws Exception {
					// TODO Auto-generated method stub
					return x.intValue() + y.intValue();
				}
			});
			
			
			lastCounts.foreach(new Function2<JavaPairRDD<String,Integer>, Time, Void>() {

				@Override
				public Void call(JavaPairRDD<String, Integer> values,
						Time time) throws Exception {
					
					values.foreach(new VoidFunction<Tuple2<String, Integer>> () {

						@Override
						public void call(Tuple2<String, Integer> tuple)
								throws Exception {
							HBaseCounterIncrementor incrementor = 
									HBaseCounterIncrementor.getInstance(broadcastTableName.value(), broadcastColumnFamily.value());
							incrementor.incerment("Counter", tuple._1(), tuple._2());
							System.out.println("Counter:" + tuple._1() + "," + tuple._2());
							
						}} );
					
					return null;
				}});
	
	

	sc.start();

}
 
Example #5
Source File: SparkStreamingFromFlumeExampleOld.java    From SparkOnALog with Apache License 2.0 2 votes vote down vote up
public static void main(String[] args) {
	if (args.length == 0) {
		System.err
				.println("Usage: JavaFlumeEventCount <master> <host> <port> <nameOfJar>");
		System.exit(1);
	}

	String master = args[0];
	String host = args[1];
	int port = Integer.parseInt(args[2]);
	String nameOfJar = args[3];

	Duration batchInterval = new Duration(2000);

	System.out.println("-Starting Spark Context");
	
	
	JavaStreamingContext sc = new JavaStreamingContext(master,
			"FlumeEventCount", batchInterval, master, nameOfJar);

	//sc.ssc()
	
	//JavaDStream<SparkFlumeEvent> flumeStream = sc.flumeStream("localhost",
	//		port);

	System.out.println("-Setting up Flume Stream");
	
	JavaDStream<SparkFlumeEvent> flumeStream = FlumeUtils.createStream(sc, host, port);
	
	//flumeStream.count();

	System.out.println("-count.map");
	
	flumeStream.count().map(new Function<Long, String>() {
		@Override
		public String call(Long in) {
			return "Received " + in + " flume events.";
		}
	}).print();

	System.out.println("-Starting Spark Context");
	
	sc.start();
	
	System.out.println("-Finished");
}
 
Example #6
Source File: SparkStreamingFromFlumeExample.java    From SparkOnALog with Apache License 2.0 2 votes vote down vote up
public static void main(String[] args) {
	if (args.length == 0) {
		System.err
				.println("Usage: JavaFlumeEventCount <master> <host> <port>");
		System.exit(1);
	}

	String master = args[0];
	String host = args[1];
	int port = Integer.parseInt(args[2]);

	
	Duration batchInterval = new Duration(5000);

	System.out.println("-Starting Spark Context");
	System.out.println("-Spark_home:" + System.getenv("SPARK_HOME"));
	
	JavaStreamingContext sc = new JavaStreamingContext(master,
			"FlumeEventCount", batchInterval,
			System.getenv("SPARK_HOME"), "/home/cloudera/SparkOnALog.jar");

	//sc.ssc()
	
	//JavaDStream<SparkFlumeEvent> flumeStream = sc.flumeStream("localhost",
	//		port);

	System.out.println("-Setting up Flume Stream: " + host + " " + port);
	
	//JavaDStream<SparkFlumeEvent> flumeStream = sc.flumeStream(host, port);
	
	JavaDStream<SparkFlumeEvent> flumeStream = FlumeUtils.createStream(sc, host, port);
	
	//flumeStream.count();

	System.out.println("-count.map");
	
	flumeStream.count().print();
	
	flumeStream.count().map(new Function<Long, String>() {
		@Override
		public String call(Long in) {
			return "????????????? Received " + in + " flume events.";
		}
	}).print();

	System.out.println("-Starting Spark Context");
	
	sc.start();
	
	System.out.println("-Finished");
}