Java Code Examples for org.apache.spark.streaming.api.java.JavaStreamingContext#awaitTermination()
The following examples show how to use
org.apache.spark.streaming.api.java.JavaStreamingContext#awaitTermination() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: WordCountRecoverableEx.java From Apache-Spark-2x-for-Java-Developers with MIT License | 6 votes |
public static void main(String[] args) throws Exception { System.setProperty("hadoop.home.dir", "E:\\hadoop"); final String ip = "10.0.75.1"; final int port = Integer.parseInt("9000"); final String checkpointDirectory = "E:\\hadoop\\checkpoint"; // Function to create JavaStreamingContext without any output operations // (used to detect the new context) Function0<JavaStreamingContext> createContextFunc = new Function0<JavaStreamingContext>() { @Override public JavaStreamingContext call() { return createContext(ip, port, checkpointDirectory); } }; JavaStreamingContext ssc = JavaStreamingContext.getOrCreate(checkpointDirectory, createContextFunc); ssc.start(); ssc.awaitTermination(); }
Example 2
Source File: BlurLoadSparkProcessor.java From incubator-retired-blur with Apache License 2.0 | 6 votes |
public void run() throws IOException { SparkConf conf = new SparkConf(); conf.setAppName(getAppName()); conf.set(SPARK_SERIALIZER, ORG_APACHE_SPARK_SERIALIZER_KRYO_SERIALIZER); JavaSparkUtil.packProjectJars(conf); setupSparkConf(conf); JavaStreamingContext ssc = new JavaStreamingContext(conf, getDuration()); List<JavaDStream<T>> streamsList = getStreamsList(ssc); // Union all the streams if there is more than 1 stream JavaDStream<T> streams = unionStreams(ssc, streamsList); JavaPairDStream<String, RowMutation> pairDStream = streams.mapToPair(new PairFunction<T, String, RowMutation>() { public Tuple2<String, RowMutation> call(T t) { RowMutation rowMutation = convert(t); return new Tuple2<String, RowMutation>(rowMutation.getRowId(), rowMutation); } }); pairDStream.foreachRDD(getFunction()); ssc.start(); ssc.awaitTermination(); }
Example 3
Source File: StreamingRsvpsDStreamCountWindow.java From -Data-Stream-Development-with-Apache-Spark-Kafka-and-Spring-Boot with MIT License | 6 votes |
public static void main(String[] args) throws InterruptedException { System.setProperty("hadoop.home.dir", HADOOP_HOME_DIR_VALUE); final SparkConf conf = new SparkConf() .setMaster(RUN_LOCAL_WITH_AVAILABLE_CORES) .setAppName(APPLICATION_NAME) .set("spark.mongodb.output.uri", MONGODB_OUTPUT_URI) .set("spark.streaming.kafka.consumer.cache.enabled", "false"); final JavaStreamingContext streamingContext = new JavaStreamingContext(conf, new Duration(BATCH_DURATION_INTERVAL_MS)); streamingContext.checkpoint(CHECKPOINT_FOLDER); final JavaInputDStream<ConsumerRecord<String, String>> meetupStream = KafkaUtils.createDirectStream( streamingContext, LocationStrategies.PreferConsistent(), ConsumerStrategies.<String, String>Subscribe(TOPICS, KAFKA_CONSUMER_PROPERTIES) ); // transformations, streaming algorithms, etc JavaDStream<Long> countStream = meetupStream.countByWindow( new Duration(WINDOW_LENGTH_MS), new Duration(SLIDING_INTERVAL_MS)); countStream.foreachRDD((JavaRDD<Long> countRDD) -> { MongoSpark.save( countRDD.map( r -> Document.parse("{\"rsvps_count\":\"" + String.valueOf(r) + "\"}") ) ); }); // some time later, after outputs have completed meetupStream.foreachRDD((JavaRDD<ConsumerRecord<String, String>> meetupRDD) -> { OffsetRange[] offsetRanges = ((HasOffsetRanges) meetupRDD.rdd()).offsetRanges(); ((CanCommitOffsets) meetupStream.inputDStream()) .commitAsync(offsetRanges, new MeetupOffsetCommitCallback()); }); streamingContext.start(); streamingContext.awaitTermination(); }
Example 4
Source File: StreamingIngestionFileSystemTextFileToDataframeMultipleClassesApp.java From net.jgp.labs.spark with Apache License 2.0 | 6 votes |
private void start() { // Create a local StreamingContext with two working thread and batch // interval of // 1 second SparkConf conf = new SparkConf().setMaster("local[2]").setAppName( "Streaming Ingestion File System Text File to Dataframe"); JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations .seconds(5)); JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils .getInputDirectory()); msgDataStream.print(); // Create JavaRDD<Row> msgDataStream.foreachRDD(new RowProcessor()); jssc.start(); try { jssc.awaitTermination(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
Example 5
Source File: SparkStreaming.java From kafka-spark-avro-example with Apache License 2.0 | 5 votes |
public static void main(String... args) {
SparkConf conf = new SparkConf();
conf.setMaster("local[2]");
conf.setAppName("Spark Streaming Test Java");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaStreamingContext ssc = new JavaStreamingContext(sc, Durations.seconds(10));
processStream(ssc, sc);
ssc.start();
ssc.awaitTermination();
}
Example 6
Source File: BatchLayer.java From oryx with Apache License 2.0 | 5 votes |
public void await() throws InterruptedException {
JavaStreamingContext theStreamingContext;
synchronized (this) {
theStreamingContext = streamingContext;
Preconditions.checkState(theStreamingContext != null);
}
log.info("Spark Streaming is running");
theStreamingContext.awaitTermination(); // Can't do this with lock
}
Example 7
Source File: WordCountSocketStateful.java From Apache-Spark-2x-for-Java-Developers with MIT License | 5 votes |
public static void main(String[] args) throws Exception {
System.setProperty("hadoop.home.dir", "E:\\hadoop");
SparkConf sparkConf = new SparkConf().setAppName("WordCountSocketEx").setMaster("local[*]");
JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(1));
streamingContext.checkpoint("E:\\hadoop\\checkpoint");
// Initial state RDD input to mapWithState
@SuppressWarnings("unchecked")
List<Tuple2<String, Integer>> tuples =Arrays.asList(new Tuple2<>("hello", 1), new Tuple2<>("world", 1));
JavaPairRDD<String, Integer> initialRDD = streamingContext.sparkContext().parallelizePairs(tuples);
JavaReceiverInputDStream<String> StreamingLines = streamingContext.socketTextStream( "10.0.75.1", Integer.parseInt("9000"), StorageLevels.MEMORY_AND_DISK_SER);
JavaDStream<String> words = StreamingLines.flatMap( str -> Arrays.asList(str.split(" ")).iterator() );
JavaPairDStream<String, Integer> wordCounts = words.mapToPair(str-> new Tuple2<>(str, 1)).reduceByKey((count1,count2) ->count1+count2 );
// Update the cumulative count function
Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc =
new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> call(String word, Optional<Integer> one,
State<Integer> state) {
int sum = one.orElse(0) + (state.exists() ? state.get() : 0);
Tuple2<String, Integer> output = new Tuple2<>(word, sum);
state.update(sum);
return output;
}
};
// DStream made of get cumulative counts that get updated in every batch
JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordCounts.mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD));
stateDstream.print();
streamingContext.start();
streamingContext.awaitTermination();
}
Example 8
Source File: AppMain.java From SparkToParquet with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws IOException {
Flags.setFromCommandLineArgs(THE_OPTIONS, args);
// 初始化Spark Conf.
SparkConf conf = new SparkConf().setAppName("A SECTONG Application: Apache Log Analysis with Spark");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaStreamingContext jssc = new JavaStreamingContext(sc, Flags.getInstance().getSlideInterval());
SQLContext sqlContext = new SQLContext(sc);
// 初始化参数
HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(Flags.getInstance().getKafka_topic().split(",")));
HashMap<String, String> kafkaParams = new HashMap<String, String>();
kafkaParams.put("metadata.broker.list", Flags.getInstance().getKafka_broker());
// 从Kafka Stream获取数据
JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(jssc, String.class, String.class,
StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet);
JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
private static final long serialVersionUID = 5266880065425088203L;
public String call(Tuple2<String, String> tuple2) {
return tuple2._2();
}
});
JavaDStream<ApacheAccessLog> accessLogsDStream = lines.flatMap(line -> {
List<ApacheAccessLog> list = new ArrayList<>();
try {
// 映射每一行
list.add(ApacheAccessLog.parseFromLogLine(line));
return list;
} catch (RuntimeException e) {
return list;
}
}).cache();
accessLogsDStream.foreachRDD(rdd -> {
// rdd to DataFrame
DataFrame df = sqlContext.createDataFrame(rdd, ApacheAccessLog.class);
// 写入Parquet文件
df.write().partitionBy("ipAddress", "method", "responseCode").mode(SaveMode.Append).parquet(Flags.getInstance().getParquetFile());
return null;
});
// 启动Streaming服务器
jssc.start(); // 启动计算
jssc.awaitTermination(); // 等待终止
}
Example 9
Source File: JavaSqlNetworkWordCount.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) throws Exception {
if (args.length < 2) {
System.err.println("Usage: JavaNetworkWordCount <hostname> <port>");
System.exit(1);
}
StreamingExamples.setStreamingLogLevels();
// Create the context with a 1 second batch size
SparkConf sparkConf = new SparkConf().setAppName("JavaSqlNetworkWordCount");
JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
// Create a JavaReceiverInputDStream on target ip:port and count the
// words in input stream of \n delimited text (eg. generated by 'nc')
// Note that no duplication in storage level only for running locally.
// Replication necessary in distributed scenario for fault tolerance.
JavaReceiverInputDStream<String> lines = ssc.socketTextStream(
args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER);
JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
@Override
public Iterator<String> call(String x) {
return Arrays.asList(SPACE.split(x)).iterator();
}
});
// Convert RDDs of the words DStream to DataFrame and run SQL query
words.foreachRDD(new VoidFunction2<JavaRDD<String>, Time>() {
@Override
public void call(JavaRDD<String> rdd, Time time) {
SparkSession spark = JavaSparkSessionSingleton.getInstance(rdd.context().getConf());
// Convert JavaRDD[String] to JavaRDD[bean class] to DataFrame
JavaRDD<JavaRecord> rowRDD = rdd.map(new Function<String, JavaRecord>() {
@Override
public JavaRecord call(String word) {
JavaRecord record = new JavaRecord();
record.setWord(word);
return record;
}
});
Dataset<Row> wordsDataFrame = spark.createDataFrame(rowRDD, JavaRecord.class);
// Creates a temporary view using the DataFrame
wordsDataFrame.createOrReplaceTempView("words");
// Do word count on table using SQL and print it
Dataset<Row> wordCountsDataFrame =
spark.sql("select word, count(*) as total from words group by word");
System.out.println("========= " + time + "=========");
wordCountsDataFrame.show();
}
});
ssc.start();
ssc.awaitTermination();
}
Example 10
Source File: JavaStreamingTestExample.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) throws Exception {
if (args.length != 3) {
System.err.println("Usage: JavaStreamingTestExample " +
"<dataDir> <batchDuration> <numBatchesTimeout>");
System.exit(1);
}
String dataDir = args[0];
Duration batchDuration = Seconds.apply(Long.parseLong(args[1]));
int numBatchesTimeout = Integer.parseInt(args[2]);
SparkConf conf = new SparkConf().setMaster("local").setAppName("StreamingTestExample");
JavaStreamingContext ssc = new JavaStreamingContext(conf, batchDuration);
ssc.checkpoint(Utils.createTempDir(System.getProperty("java.io.tmpdir"), "spark").toString());
// $example on$
JavaDStream<BinarySample> data = ssc.textFileStream(dataDir).map(
new Function<String, BinarySample>() {
@Override
public BinarySample call(String line) {
String[] ts = line.split(",");
boolean label = Boolean.parseBoolean(ts[0]);
double value = Double.parseDouble(ts[1]);
return new BinarySample(label, value);
}
});
StreamingTest streamingTest = new StreamingTest()
.setPeacePeriod(0)
.setWindowSize(0)
.setTestMethod("welch");
JavaDStream<StreamingTestResult> out = streamingTest.registerStream(data);
out.print();
// $example off$
// Stop processing if test becomes significant or we time out
timeoutCounter = numBatchesTimeout;
out.foreachRDD(new VoidFunction<JavaRDD<StreamingTestResult>>() {
@Override
public void call(JavaRDD<StreamingTestResult> rdd) {
timeoutCounter -= 1;
boolean anySignificant = !rdd.filter(new Function<StreamingTestResult, Boolean>() {
@Override
public Boolean call(StreamingTestResult v) {
return v.pValue() < 0.05;
}
}).isEmpty();
if (timeoutCounter <= 0 || anySignificant) {
rdd.context().stop();
}
}
});
ssc.start();
ssc.awaitTermination();
}
Example 11
Source File: StateFulProcessingExample.java From Apache-Spark-2x-for-Java-Developers with MIT License | 4 votes |
public static void main(String[] args) throws InterruptedException {
System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");
SparkSession sparkSession = SparkSession.builder().master("local[*]").appName("Stateful Streaming Example")
.config("spark.sql.warehouse.dir", "file:////C:/Users/sgulati/spark-warehouse").getOrCreate();
JavaStreamingContext jssc= new JavaStreamingContext(new JavaSparkContext(sparkSession.sparkContext()),
Durations.milliseconds(1000));
JavaReceiverInputDStream<String> inStream = jssc.socketTextStream("10.204.136.223", 9999);
jssc.checkpoint("C:\\Users\\sgulati\\spark-checkpoint");
JavaDStream<FlightDetails> flightDetailsStream = inStream.map(x -> {
ObjectMapper mapper = new ObjectMapper();
return mapper.readValue(x, FlightDetails.class);
});
JavaPairDStream<String, FlightDetails> flightDetailsPairStream = flightDetailsStream
.mapToPair(f -> new Tuple2<String, FlightDetails>(f.getFlightId(), f));
Function3<String, Optional<FlightDetails>, State<List<FlightDetails>>, Tuple2<String, Double>> mappingFunc = (
flightId, curFlightDetail, state) -> {
List<FlightDetails> details = state.exists() ? state.get() : new ArrayList<>();
boolean isLanded = false;
if (curFlightDetail.isPresent()) {
details.add(curFlightDetail.get());
if (curFlightDetail.get().isLanded()) {
isLanded = true;
}
}
Double avgSpeed = details.stream().mapToDouble(f -> f.getTemperature()).average().orElse(0.0);
if (isLanded) {
state.remove();
} else {
state.update(details);
}
return new Tuple2<String, Double>(flightId, avgSpeed);
};
JavaMapWithStateDStream<String, FlightDetails, List<FlightDetails>, Tuple2<String, Double>> streamWithState = flightDetailsPairStream
.mapWithState(StateSpec.function(mappingFunc).timeout(Durations.minutes(5)));
streamWithState.print();
jssc.start();
jssc.awaitTermination();
}
Example 12
Source File: Runner.java From envelope with Apache License 2.0 | 4 votes |
/**
* Run the Envelope pipeline as a Spark Streaming job.
* @param steps The full configuration of the Envelope pipeline
*/
@SuppressWarnings("unchecked")
private void runStreaming(final Set<Step> steps) throws Exception {
final Set<Step> independentNonStreamingSteps = StepUtils.getIndependentNonStreamingSteps(steps);
runBatch(independentNonStreamingSteps);
Set<StreamingStep> streamingSteps = StepUtils.getStreamingSteps(steps);
for (final StreamingStep streamingStep : streamingSteps) {
LOG.debug("Setting up streaming step: " + streamingStep.getName());
JavaDStream stream = streamingStep.getStream();
stream.foreachRDD(new VoidFunction<JavaRDD<?>>() {
@Override
public void call(JavaRDD<?> raw) throws Exception {
// Some independent steps might be repeating steps that have been flagged for reload
StepUtils.resetRepeatingSteps(steps);
// This will run any batch steps (and dependents) that are not submitted
runBatch(independentNonStreamingSteps);
streamingStep.setData(streamingStep.translate(raw));
streamingStep.writeData();
streamingStep.setState(StepState.FINISHED);
Set<Step> batchSteps = StepUtils.mergeLoadedSteps(steps, streamingStep, baseConfig);
Set<Step> dependentSteps = StepUtils.getAllDependentSteps(streamingStep, batchSteps);
batchSteps.add(streamingStep);
batchSteps.addAll(streamingStep.loadNewBatchSteps());
batchSteps.addAll(independentNonStreamingSteps);
runBatch(batchSteps);
StepUtils.resetSteps(dependentSteps);
streamingStep.recordProgress(raw);
}
});
LOG.debug("Finished setting up streaming step: " + streamingStep.getName());
}
JavaStreamingContext jsc = Contexts.getJavaStreamingContext();
jsc.start();
LOG.debug("Streaming context started");
jsc.awaitTermination();
LOG.debug("Streaming context terminated");
}
Example 13
Source File: JavaKafkaDirectWordCount.java From SparkDemo with MIT License | 4 votes |
/**
* 1.一对一
* 2.高效
* 3.准确的只计算一次
*
* @param args
*/
public static void main(String[] args) {
StreamingExamples.setStreamingLogLevels();
SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaDirectWordCount").setMaster("local[1]");
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(6));
Map<String, String> kafkaParams = new HashMap<String, String>(); // key是topic名称,value是线程数量
kafkaParams.put("metadata.broker.list", "master:9092,slave1:9092,slave2:9092"); // 指定broker在哪
HashSet<String> topicsSet = new HashSet<String>();
topicsSet.add("2017-7-26"); // 指定操作的topic
// Create direct kafka stream with brokers and topics createDirectStream()
JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(
jssc,
String.class,
String.class,
StringDecoder.class,
StringDecoder.class,
kafkaParams,
topicsSet
);
JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
@Override
public String call(Tuple2<String, String> tuple2) {
return tuple2._2();
}
});
JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
@Override
public Iterator<String> call(String x) {
return Lists.newArrayList(SPACE.split(x)).iterator();
}
});
JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() {
@Override
public Tuple2<String, Integer> call(String s) {
return new Tuple2<String, Integer>(s, 1);
}
}).reduceByKey(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer i1, Integer i2) {
return i1 + i2;
}
});
wordCounts.print();
jssc.start();
try {
jssc.awaitTermination();
} catch (Exception e) {
e.printStackTrace();
}
}
Example 14
Source File: KafkaReceiverWordCountJava.java From Building-Data-Streaming-Applications-with-Apache-Kafka with MIT License | 4 votes |
public static void main(String[] args) throws Exception {
String zkQuorum = "localhost:2181";
String groupName = "stream";
int numThreads = 3;
String topicsName = "test1";
SparkConf sparkConf = new SparkConf().setAppName("WordCountKafkaStream");
JavaStreamingContext javaStreamingContext = new JavaStreamingContext(sparkConf, new Duration(5000));
Map<String, Integer> topicToBeUsedBySpark = new HashMap<>();
String[] topics = topicsName.split(",");
for (String topic : topics) {
topicToBeUsedBySpark.put(topic, numThreads);
}
JavaPairReceiverInputDStream<String, String> streamMessages =
KafkaUtils.createStream(javaStreamingContext, zkQuorum, groupName, topicToBeUsedBySpark);
JavaDStream<String> lines = streamMessages.map(new Function<Tuple2<String, String>, String>() {
@Override
public String call(Tuple2<String, String> tuple2) {
return tuple2._2();
}
});
JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
@Override
public Iterator<String> call(String x) {
return Arrays.asList(WORD_DELIMETER.split(x)).iterator();
}
});
JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
new PairFunction<String, String, Integer>() {
@Override
public Tuple2<String, Integer> call(String s) {
return new Tuple2<>(s, 1);
}
}).reduceByKey(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer i1, Integer i2) {
return i1 + i2;
}
});
wordCounts.print();
javaStreamingContext.start();
javaStreamingContext.awaitTermination();
}
Example 15
Source File: StreamingRsvpsDStream.java From -Data-Stream-Development-with-Apache-Spark-Kafka-and-Spring-Boot with MIT License | 4 votes |
public static void main(String[] args) throws InterruptedException {
System.setProperty("hadoop.home.dir", HADOOP_HOME_DIR_VALUE);
final SparkConf conf = new SparkConf()
.setMaster(RUN_LOCAL_WITH_AVAILABLE_CORES)
.setAppName(APPLICATION_NAME)
.set("spark.mongodb.output.uri", MONGODB_OUTPUT_URI);
final JavaStreamingContext streamingContext
= new JavaStreamingContext(conf, new Duration(BATCH_DURATION_INTERVAL_MS));
final JavaInputDStream<ConsumerRecord<String, String>> meetupStream =
KafkaUtils.createDirectStream(
streamingContext,
LocationStrategies.PreferConsistent(),
ConsumerStrategies.<String, String>Subscribe(TOPICS, KAFKA_CONSUMER_PROPERTIES)
);
// transformations, streaming algorithms, etc
JavaDStream<ConsumerRecord<String, String>> rsvpsWithGuestsStream =
meetupStream.filter(f -> !f.value().contains("\"guests\":0"));
rsvpsWithGuestsStream.foreachRDD((JavaRDD<ConsumerRecord<String, String>> r) -> {
MongoSpark.save(
r.map(
e -> Document.parse(e.value())
)
);
});
// some time later, after outputs have completed
meetupStream.foreachRDD((JavaRDD<ConsumerRecord<String, String>> meetupRDD) -> {
OffsetRange[] offsetRanges = ((HasOffsetRanges) meetupRDD.rdd()).offsetRanges();
((CanCommitOffsets) meetupStream.inputDStream())
.commitAsync(offsetRanges, new MeetupOffsetCommitCallback());
});
streamingContext.start();
streamingContext.awaitTermination();
}
Example 16
Source File: SparkRunner.java From jaeger-analytics-java with Apache License 2.0 | 4 votes |
public static void main(String []args) throws InterruptedException, IOException {
HTTPServer server = new HTTPServer(Integer.valueOf(getPropOrEnv("PROMETHEUS_PORT", "9111")));
SparkConf sparkConf = new SparkConf()
.setAppName("Trace DSL")
.setMaster(getPropOrEnv("SPARK_MASTER","local[*]"));
JavaSparkContext sc = new JavaSparkContext(sparkConf);
JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(Integer.parseInt(getPropOrEnv("SPARK_STREAMING_BATCH_DURATION", "5000"))));
Set<String> topics = Collections.singleton(getPropOrEnv("KAFKA_JAEGER_TOPIC", "jaeger-spans"));
Map<String, Object> kafkaParams = new HashMap<>();
kafkaParams.put("bootstrap.servers", getPropOrEnv("KAFKA_BOOTSTRAP_SERVER", "localhost:9092"));
kafkaParams.put("key.deserializer", StringDeserializer.class);
kafkaParams.put("value.deserializer", ProtoSpanDeserializer.class);
// hack to start always from beginning
kafkaParams.put("group.id", "jaeger-trace-aggregation-" + System.currentTimeMillis());
if (Boolean.parseBoolean(getPropOrEnv("KAFKA_START_FROM_BEGINNING", "true"))) {
kafkaParams.put("auto.offset.reset", "earliest");
kafkaParams.put("enable.auto.commit", false);
kafkaParams.put("startingOffsets", "earliest");
}
JavaInputDStream<ConsumerRecord<String, Span>> messages =
KafkaUtils.createDirectStream(
ssc,
LocationStrategies.PreferConsistent(),
ConsumerStrategies.Subscribe(topics, kafkaParams));
JavaPairDStream<String, Span> traceIdSpanTuple = messages.mapToPair(record -> {
return new Tuple2<>(record.value().traceId, record.value());
});
JavaDStream<Trace> tracesStream = traceIdSpanTuple.groupByKey().map(traceIdSpans -> {
System.out.printf("traceID: %s\n", traceIdSpans._1);
Iterable<Span> spans = traceIdSpans._2();
Trace trace = new Trace();
trace.traceId = traceIdSpans._1();
trace.spans = StreamSupport.stream(spans.spliterator(), false)
.collect(Collectors.toList());
return trace;
});
MinimumClientVersion minimumClientVersion = MinimumClientVersion.builder()
.withJavaVersion(getPropOrEnv("TRACE_QUALITY_JAVA_VERSION", "1.0.0"))
.withGoVersion(getPropOrEnv("TRACE_QUALITY_GO_VERSION", "2.22.0"))
.withNodeVersion(getPropOrEnv("TRACE_QUALITY_NODE_VERSION", "3.17.1"))
.withPythonVersion(getPropOrEnv("TRACE_QUALITY_PYTHON_VERSION", "4.0.0"))
.build();
List<ModelRunner> modelRunner = Arrays.asList(
new TraceHeight(),
new ServiceDepth(),
new ServiceHeight(),
new NetworkLatency(),
new NumberOfErrors(),
new DirectDependencies(),
// trace quality
minimumClientVersion,
new HasClientServerSpans(),
new UniqueSpanId());
tracesStream.foreachRDD((traceRDD, time) -> {
traceRDD.foreach(trace -> {
Graph graph = GraphCreator.create(trace);
for (ModelRunner model: modelRunner) {
model.runWithMetrics(graph);
}
});
});
ssc.start();
ssc.awaitTermination();
}
Example 17
Source File: SampleConsumer.java From kafka-spark-consumer with Apache License 2.0 | 4 votes |
@SuppressWarnings("deprecation")
private void run() {
Properties props = new Properties();
props.put("zookeeper.hosts", "zkhost");
props.put("zookeeper.port", "2181");
props.put("kafka.topic", "topicA,topicB,topicC");
props.put("kafka.consumer.id", "kafka-consumer");
// Optional Properties
props.put("zookeeper.broker.path", "/brokers");
props.put("zookeeper.consumer.path", "/consumers");
props.put("consumer.forcefromstart", "false");
props.put("max.poll.records", "10");
props.put("consumer.fillfreqms", "500");
props.put("consumer.backpressure.enabled", "true");
//Kafka properties
props.put("bootstrap.servers", "kafkahost-1:6667,"
+ "kafkahost-2:6667,"
+ "kafkahost-3:6667,"
+ "kafkahost-4:6667");
props.put("security.protocol", "SSL");
props.put("ssl.truststore.location","~/kafka-securitykafka.server.truststore.jks");
props.put("ssl.truststore.password", "test1234");
SparkConf _sparkConf = new SparkConf();
JavaStreamingContext jsc = new JavaStreamingContext(_sparkConf, Durations.seconds(30));
// Specify number of Receivers you need.
int numberOfReceivers = 6;
JavaDStream<MessageAndMetadata<byte[]>> unionStreams = ReceiverLauncher.launch(
jsc, props, numberOfReceivers, StorageLevel.MEMORY_ONLY());
unionStreams.foreachRDD(new VoidFunction<JavaRDD<MessageAndMetadata<byte[]>>>() {
@Override
public void call(JavaRDD<MessageAndMetadata<byte[]>> rdd) throws Exception {
//Start Application Logic
rdd.foreachPartition(new VoidFunction<Iterator<MessageAndMetadata<byte[]>>>() {
@Override
public void call(Iterator<MessageAndMetadata<byte[]>> mmItr) throws Exception {
int countTopicA = 0;
int countTopicB = 0;
int countTopicC = 0;
while(mmItr.hasNext()) {
MessageAndMetadata<byte[]> mm = mmItr.next();
if(mm.getTopic().equals("topicA")) {
countTopicA++;
}
else if (mm.getTopic().equals("topicB")) {
countTopicB++;
}
else if (mm.getTopic().equals("topicC")) {
countTopicC++;
}
}
System.out.println("topicA count " + countTopicA);
System.out.println("topicB count " + countTopicB);
System.out.println("topicC count " + countTopicC);
}
});
System.out.println("RDD count " + rdd.count());
//End Application Logic
//commit offset
System.out.println("Commiting Offset");
ProcessedOffsetManager.persistsPartition(rdd, props);
}
});
try {
jsc.start();
jsc.awaitTermination();
}catch (Exception ex ) {
jsc.ssc().sc().cancelAllJobs();
jsc.stop(true, false);
System.exit(-1);
}
}
Example 18
Source File: Server.java From cxf with Apache License 2.0 | 4 votes |
protected Server(String[] args) throws Exception {
ServerSocket sparkServerSocket = new ServerSocket(9999);
ServerSocket jaxrsResponseServerSocket = new ServerSocket(10000);
Socket jaxrsResponseClientSocket = new Socket("localhost", 10000);
SparkConf sparkConf = new SparkConf().setMaster("local[*]")
.setAppName("JAX-RS Spark Socket Connect");
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
SparkStreamingOutput streamOut = new SparkStreamingOutput(jssc);
SparkStreamingListener sparkListener = new SparkStreamingListener(streamOut);
jssc.addStreamingListener(sparkListener);
JavaDStream<String> receiverStream = jssc.socketTextStream(
"localhost", 9999, StorageLevels.MEMORY_ONLY);
JavaPairDStream<String, Integer> wordCounts = SparkUtils.createOutputDStream(receiverStream, true);
PrintStream sparkResponseOutputStream = new PrintStream(jaxrsResponseClientSocket.getOutputStream(), true);
wordCounts.foreachRDD(new SocketOutputFunction(sparkResponseOutputStream));
jssc.start();
Socket receiverClientSocket = sparkServerSocket.accept();
PrintStream sparkOutputStream = new PrintStream(receiverClientSocket.getOutputStream(), true);
BufferedReader sparkInputStream =
new BufferedReader(new InputStreamReader(jaxrsResponseServerSocket.accept().getInputStream()));
JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean();
sf.setResourceClasses(StreamingService.class);
sf.setResourceProvider(StreamingService.class,
new SingletonResourceProvider(new StreamingService(sparkInputStream,
sparkOutputStream)));
sf.setAddress("http://localhost:9000/spark");
sf.create();
jssc.awaitTermination();
sparkServerSocket.close();
jaxrsResponseServerSocket.close();
jaxrsResponseClientSocket.close();
}
Example 19
Source File: FraudDetectionApp.java From Building-Data-Streaming-Applications-with-Apache-Kafka with MIT License | 4 votes |
public static void main(String[] args) throws Exception {
String brokers = "localhost:9092";
String topics = "iplog";
CacheIPLookup cacheIPLookup = new CacheIPLookup();
SparkConf sparkConf = new SparkConf().setAppName("IP_FRAUD");
JavaStreamingContext javaStreamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(2));
Set<String> topicsSet = new HashSet<>(Arrays.asList(topics.split(",")));
Map<String, String> kafkaConfiguration = new HashMap<>();
kafkaConfiguration.put("metadata.broker.list", brokers);
kafkaConfiguration.put("group.id", "ipfraud");
kafkaConfiguration.put("auto.offset.reset", "smallest");
JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(
javaStreamingContext,
String.class,
String.class,
StringDecoder.class,
StringDecoder.class,
kafkaConfiguration,
topicsSet
);
JavaDStream<String> lines = messages.map(Tuple2::_2);
JavaDStream<String> fraudIPs = lines.filter(new Function<String, Boolean>() {
@Override
public Boolean call(String s) throws Exception {
String IP = s.split(" ")[0];
String[] ranges = IP.split("\\.");
String range = null;
try {
range = ranges[0] + "." + ranges[1];
} catch (ArrayIndexOutOfBoundsException ex) {
}
return cacheIPLookup.isFraudIP(range);
}
});
DStream<String> fraudDstream = fraudIPs.dstream();
fraudDstream.saveAsTextFiles("FraudRecord", "");
javaStreamingContext.start();
javaStreamingContext.awaitTermination();
}
Example 20
Source File: StateLessProcessingExample.java From Apache-Spark-2x-for-Java-Developers with MIT License | 3 votes |
public static void main(String[] args) throws InterruptedException {
System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");
SparkSession sparkSession = SparkSession.builder().master("local[*]").appName("stateless Streaming Example")
.config("spark.sql.warehouse.dir", "file:////C:/Users/sgulati/spark-warehouse").getOrCreate();
JavaStreamingContext jssc = new JavaStreamingContext(new JavaSparkContext(sparkSession.sparkContext()),
Durations.milliseconds(1000));
JavaReceiverInputDStream<String> inStream = jssc.socketTextStream("10.204.136.223", 9999);
JavaDStream<FlightDetails> flightDetailsStream = inStream.map(x -> {
ObjectMapper mapper = new ObjectMapper();
return mapper.readValue(x, FlightDetails.class);
});
//flightDetailsStream.print();
//flightDetailsStream.foreachRDD((VoidFunction<JavaRDD<FlightDetails>>) rdd -> rdd.saveAsTextFile("hdfs://namenode:port/path"));
JavaDStream<FlightDetails> window = flightDetailsStream.window(Durations.minutes(5),Durations.minutes(1));
JavaPairDStream<String, Double> transfomedWindow = window.mapToPair(f->new Tuple2<String,Double>(f.getFlightId(),f.getTemperature())).
mapValues(t->new Tuple2<Double,Integer>(t,1))
.reduceByKey((t1, t2) -> new Tuple2<Double, Integer>(t1._1()+t2._1(), t1._2()+t2._2())).mapValues(t -> t._1()/t._2());
transfomedWindow.cache();
transfomedWindow.print();
jssc.start();
jssc.awaitTermination();
}