org.apache.spark.streaming.api.java.JavaStreamingContext#addStreamingListener

Source File: StreamingService.java From cxf with Apache License 2.0

5 votes

private void processStream(AsyncResponse async, List<String> inputStrings) {
    try {
        SparkConf sparkConf = new SparkConf().setMaster("local[*]")
            .setAppName("JAX-RS Spark Connect " + SparkUtils.getRandomId());
        JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));

        SparkStreamingOutput streamOut = new SparkStreamingOutput(jssc);
        SparkStreamingListener sparkListener = new SparkStreamingListener(streamOut);
        jssc.addStreamingListener(sparkListener);

        JavaDStream<String> receiverStream = null;
        if ("queue".equals(receiverType)) {
            Queue<JavaRDD<String>> rddQueue = new LinkedList<>();
            for (int i = 0; i < 30; i++) {
                rddQueue.add(jssc.sparkContext().parallelize(inputStrings));
            }
            receiverStream = jssc.queueStream(rddQueue);
        } else {
            receiverStream = jssc.receiverStream(new StringListReceiver(inputStrings));
        }

        JavaPairDStream<String, Integer> wordCounts = SparkUtils.createOutputDStream(receiverStream, false);
        wordCounts.foreachRDD(new OutputFunction(streamOut));
        jssc.start();

        executor.execute(new SparkJob(async, sparkListener));
    } catch (Exception ex) {
        // the compiler does not allow to catch SparkException directly
        if (ex instanceof SparkException) {
            async.cancel(60);
        } else {
            async.resume(new WebApplicationException(ex));
        }
    }
}

Source File: SparkRunner.java From beam with Apache License 2.0

4 votes

@Override
public SparkPipelineResult run(final Pipeline pipeline) {
  LOG.info("Executing pipeline using the SparkRunner.");

  final SparkPipelineResult result;
  final Future<?> startPipeline;

  final SparkPipelineTranslator translator;

  final ExecutorService executorService = Executors.newSingleThreadExecutor();

  MetricsEnvironment.setMetricsSupported(true);

  // visit the pipeline to determine the translation mode
  detectTranslationMode(pipeline);

  pipeline.replaceAll(SparkTransformOverrides.getDefaultOverrides(mOptions.isStreaming()));

  prepareFilesToStage(mOptions);

  if (mOptions.isStreaming()) {
    CheckpointDir checkpointDir = new CheckpointDir(mOptions.getCheckpointDir());
    SparkRunnerStreamingContextFactory streamingContextFactory =
        new SparkRunnerStreamingContextFactory(pipeline, mOptions, checkpointDir);
    final JavaStreamingContext jssc =
        JavaStreamingContext.getOrCreate(
            checkpointDir.getSparkCheckpointDir().toString(), streamingContextFactory);

    // Checkpoint aggregator/metrics values
    jssc.addStreamingListener(
        new JavaStreamingListenerWrapper(
            new AggregatorsAccumulator.AccumulatorCheckpointingSparkListener()));
    jssc.addStreamingListener(
        new JavaStreamingListenerWrapper(
            new MetricsAccumulator.AccumulatorCheckpointingSparkListener()));

    // register user-defined listeners.
    for (JavaStreamingListener listener : mOptions.as(SparkContextOptions.class).getListeners()) {
      LOG.info("Registered listener {}." + listener.getClass().getSimpleName());
      jssc.addStreamingListener(new JavaStreamingListenerWrapper(listener));
    }

    // register Watermarks listener to broadcast the advanced WMs.
    jssc.addStreamingListener(
        new JavaStreamingListenerWrapper(new WatermarkAdvancingStreamingListener()));

    // The reason we call initAccumulators here even though it is called in
    // SparkRunnerStreamingContextFactory is because the factory is not called when resuming
    // from checkpoint (When not resuming from checkpoint initAccumulators will be called twice
    // but this is fine since it is idempotent).
    initAccumulators(mOptions, jssc.sparkContext());

    startPipeline =
        executorService.submit(
            () -> {
              LOG.info("Starting streaming pipeline execution.");
              jssc.start();
            });
    executorService.shutdown();

    result = new SparkPipelineResult.StreamingMode(startPipeline, jssc);
  } else {
    // create the evaluation context
    final JavaSparkContext jsc = SparkContextFactory.getSparkContext(mOptions);
    final EvaluationContext evaluationContext = new EvaluationContext(jsc, pipeline, mOptions);
    translator = new TransformTranslator.Translator();

    // update the cache candidates
    updateCacheCandidates(pipeline, translator, evaluationContext);

    initAccumulators(mOptions, jsc);
    startPipeline =
        executorService.submit(
            () -> {
              pipeline.traverseTopologically(new Evaluator(translator, evaluationContext));
              evaluationContext.computeOutputs();
              LOG.info("Batch pipeline execution complete.");
            });
    executorService.shutdown();

    result = new SparkPipelineResult.BatchMode(startPipeline, jsc);
  }

  if (mOptions.getEnableSparkMetricSinks()) {
    registerMetricsSource(mOptions.getAppName());
  }

  // it would have been better to create MetricsPusher from runner-core but we need
  // runner-specific
  // MetricsContainerStepMap
  MetricsPusher metricsPusher =
      new MetricsPusher(
          MetricsAccumulator.getInstance().value(), mOptions.as(MetricsOptions.class), result);
  metricsPusher.start();
  return result;
}

Source File: Server.java From cxf with Apache License 2.0

4 votes

protected Server(String[] args) throws Exception {

        ServerSocket sparkServerSocket = new ServerSocket(9999);
        ServerSocket jaxrsResponseServerSocket = new ServerSocket(10000);
        Socket jaxrsResponseClientSocket = new Socket("localhost", 10000);


        SparkConf sparkConf = new SparkConf().setMaster("local[*]")
            .setAppName("JAX-RS Spark Socket Connect");
        JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));

        SparkStreamingOutput streamOut = new SparkStreamingOutput(jssc);
        SparkStreamingListener sparkListener = new SparkStreamingListener(streamOut);
        jssc.addStreamingListener(sparkListener);

        JavaDStream<String> receiverStream = jssc.socketTextStream(
            "localhost", 9999, StorageLevels.MEMORY_ONLY);

        JavaPairDStream<String, Integer> wordCounts = SparkUtils.createOutputDStream(receiverStream, true);
        PrintStream sparkResponseOutputStream = new PrintStream(jaxrsResponseClientSocket.getOutputStream(), true);
        wordCounts.foreachRDD(new SocketOutputFunction(sparkResponseOutputStream));

        jssc.start();

        Socket receiverClientSocket = sparkServerSocket.accept();
        PrintStream sparkOutputStream = new PrintStream(receiverClientSocket.getOutputStream(), true);
        BufferedReader sparkInputStream =
            new BufferedReader(new InputStreamReader(jaxrsResponseServerSocket.accept().getInputStream()));


        JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean();

        sf.setResourceClasses(StreamingService.class);
        sf.setResourceProvider(StreamingService.class,
            new SingletonResourceProvider(new StreamingService(sparkInputStream,
                                                                     sparkOutputStream)));
        sf.setAddress("http://localhost:9000/spark");
        sf.create();

        jssc.awaitTermination();
        sparkServerSocket.close();
        jaxrsResponseServerSocket.close();
        jaxrsResponseClientSocket.close();

    }

Source File: ReceiverLauncher.java From kafka-spark-consumer with Apache License 2.0

4 votes

private static <E> JavaDStream<MessageAndMetadata<E>> createStream(
        JavaStreamingContext jsc, Properties props, int numberOfReceivers, StorageLevel storageLevel,
        KafkaMessageHandler<E> messageHandler) {

    List<JavaDStream<MessageAndMetadata<E>>> streamsList =
            new ArrayList<>();
    JavaDStream<MessageAndMetadata<E>> unionStreams;
    KafkaConfig globalConfig = new KafkaConfig(props);
    _zkPath = (String) globalConfig.brokerZkPath;
    String[] topicList = props.getProperty(Config.KAFKA_TOPIC).split(",");
    int totalPartitions = 0;
    Map<String, KafkaConfig> topicConfigMap = new HashMap<>();

    for(String topic : topicList) {
        Properties property = new Properties();
        property.putAll(props);
        property.replace(Config.KAFKA_TOPIC, topic.trim());
        KafkaConfig kafkaConfig = new KafkaConfig(property);
        ZkState zkState = new ZkState(kafkaConfig);
        int numberOfPartition = getNumPartitions(zkState, topic.trim());
        totalPartitions = totalPartitions + numberOfPartition;
        zkState.close();
        topicConfigMap.put(topic + ":" + numberOfPartition, kafkaConfig);
    }

    for(Map.Entry<String, KafkaConfig> entry : topicConfigMap.entrySet()) {
        String[] tp = entry.getKey().split(":");
        int partitions = Integer.parseInt(tp[1]);
        KafkaConfig config = entry.getValue();
        int assignedReceivers = (int)Math.round(((partitions/(double)totalPartitions) * numberOfReceivers));
        if(assignedReceivers == 0)
            assignedReceivers = 1;

        assignReceiversToPartitions(assignedReceivers,partitions,streamsList, config, storageLevel, messageHandler, jsc);
    }

    
    // Union all the streams if there is more than 1 stream
    if (streamsList.size() > 1) {
        unionStreams =
                jsc.union(
                        streamsList.get(0), streamsList.subList(1, streamsList.size()));
    } else {
        // Otherwise, just use the 1 stream
        unionStreams = streamsList.get(0);
    }
    final long batchDuration = jsc.ssc().graph().batchDuration().milliseconds();
    ReceiverStreamListener listener = new ReceiverStreamListener(globalConfig, batchDuration);
    jsc.addStreamingListener(listener);
    //Reset the fetch size
    Utils.setFetchRate(globalConfig, globalConfig._pollRecords);
    return unionStreams;
}

Source File: AbstractJavaEsSparkStreamingTest.java From elasticsearch-hadoop with Apache License 2.0

4 votes

public ExpectingToThrow from(JavaStreamingContext ssc) {
    ssc.addStreamingListener(this);
    return this;
}

Source File: AbstractJavaEsSparkStreamingTest.java From elasticsearch-hadoop with Apache License 2.0

4 votes

public ExpectingToThrow from(JavaStreamingContext ssc) {
    ssc.addStreamingListener(this);
    return this;
}

Java Code Examples for org.apache.spark.streaming.api.java.JavaStreamingContext#addStreamingListener()