org.apache.spark.streaming.StreamingContext Java Exaples

Source File: SparkAppMain.java From sylph with Apache License 2.0

6 votes

public static void main(String[] args)
        throws Exception
{
    System.out.println("spark on yarn app starting...");

    @SuppressWarnings("unchecked")
    Supplier<?> sparkJobHandle = (Supplier<?>) byteToObject(new FileInputStream("job.graph"));

    Object appContext = requireNonNull(sparkJobHandle, "sparkJobHandle is null").get();
    if (appContext instanceof SparkSession) {
        checkArgument(((SparkSession) appContext).streams().active().length > 0, "no stream pipeline");
        ((SparkSession) appContext).streams().awaitAnyTermination();
    }
    else if (appContext instanceof StreamingContext) {
        ((StreamingContext) appContext).start();
        ((StreamingContext) appContext).awaitTermination();
    }
}

Source File: SparkStreamingSqlEngine.java From sylph with Apache License 2.0

5 votes

private static Serializable compile(String jobId, SqlFlow sqlFlow, ConnectorStore connectorStore, SparkJobConfig sparkJobConfig, URLClassLoader jobClassLoader)
        throws JVMException
{
    int batchDuration = sparkJobConfig.getSparkStreamingBatchDuration();
    final AtomicBoolean isCompile = new AtomicBoolean(true);
    final Supplier<StreamingContext> appGetter = (Supplier<StreamingContext> & Serializable) () -> {
        logger.info("========create spark StreamingContext mode isCompile = " + isCompile.get() + "============");
        SparkConf sparkConf = isCompile.get() ?
                new SparkConf().setMaster("local[*]").setAppName("sparkCompile")
                : new SparkConf();
        SparkSession sparkSession = SparkSession.builder().config(sparkConf).getOrCreate();
        StreamingContext ssc = new StreamingContext(sparkSession.sparkContext(), Duration.apply(batchDuration));

        //build sql
        SqlAnalyse analyse = new SparkStreamingSqlAnalyse(ssc, connectorStore, isCompile.get());
        try {
            buildSql(analyse, jobId, sqlFlow);
        }
        catch (Exception e) {
            throwsException(e);
        }
        return ssc;
    };

    JVMLauncher<Boolean> launcher = JVMLaunchers.<Boolean>newJvm()
            .setConsole((line) -> System.out.println(new Ansi().fg(YELLOW).a("[" + jobId + "] ").fg(GREEN).a(line).reset()))
            .setCallable(() -> {
                System.out.println("************ job start ***************");
                appGetter.get();
                return true;
            })
            .addUserURLClassLoader(jobClassLoader)
            .setClassLoader(jobClassLoader)
            .notDepThisJvmClassPath()
            .build();

    launcher.startAndGet();
    isCompile.set(false);
    return (Serializable) appGetter;
}

Source File: JobHelper.java From sylph with Apache License 2.0

5 votes

static Serializable build1xJob(String jobId, EtlFlow flow, URLClassLoader jobClassLoader, ConnectorStore connectorStore)
        throws Exception
{
    final AtomicBoolean isCompile = new AtomicBoolean(true);
    final Supplier<StreamingContext> appGetter = (Supplier<StreamingContext> & Serializable) () -> {
        logger.info("========create spark StreamingContext mode isCompile = " + isCompile.get() + "============");
        SparkConf sparkConf = isCompile.get() ?
                new SparkConf().setMaster("local[*]").setAppName("sparkCompile")
                : new SparkConf();
        //todo: 5s is default
        SparkSession sparkSession = SparkSession.builder().config(sparkConf).getOrCreate();
        StreamingContext spark = new StreamingContext(sparkSession.sparkContext(), Seconds.apply(5));

        Bean bean = binder -> binder.bind(StreamingContext.class, spark);
        StreamNodeLoader loader = new StreamNodeLoader(connectorStore, IocFactory.create(bean));
        buildGraph(loader, flow);
        return spark;
    };

    JVMLauncher<Integer> launcher = JVMLaunchers.<Integer>newJvm()
            .setCallable(() -> {
                appGetter.get();
                return 1;
            })
            .setConsole((line) -> System.out.println(new Ansi().fg(YELLOW).a("[" + jobId + "] ").fg(GREEN).a(line).reset()))
            .addUserURLClassLoader(jobClassLoader)
            .notDepThisJvmClassPath()
            .setClassLoader(jobClassLoader)
            .build();
    launcher.startAndGet();
    isCompile.set(false);
    return (Serializable) appGetter;
}

Source File: SparkStreamingSqlAnalyse.java From sylph with Apache License 2.0

5 votes

public SparkStreamingSqlAnalyse(StreamingContext ssc,
        ConnectorStore connectorStore,
        boolean isCompile)
{
    this.ssc = ssc;
    this.connectorStore = connectorStore;
    this.sparkBean = binder -> {
        binder.bind(StreamingContext.class, ssc);
        binder.bind(JavaStreamingContext.class, new JavaStreamingContext(ssc));
    };
    this.isCompile = isCompile;
}

Source File: SparkBatchProcessingTest.java From OSTMap with Apache License 2.0

5 votes

private static StreamingContext createSparkStreamingContext(){
    SparkConf conf = new SparkConf()
            .setAppName("Spark Batch Processing Test")
            .set("spark.serializer", KryoSerializer.class.getCanonicalName())
            .set("spark.eventLog.enabled", "true");
    return new StreamingContext(conf, Durations.seconds(15));
}

Source File: WatermarkSyncedDStream.java From beam with Apache License 2.0

5 votes

public WatermarkSyncedDStream(
    final Queue<JavaRDD<WindowedValue<T>>> rdds,
    final Long batchDuration,
    final StreamingContext ssc) {
  super(ssc, JavaSparkContext$.MODULE$.fakeClassTag());
  this.rdds = rdds;
  this.batchDuration = batchDuration;
}

Source File: SourceDStream.java From beam with Apache License 2.0

5 votes

SourceDStream(
    StreamingContext ssc,
    UnboundedSource<T, CheckpointMarkT> unboundedSource,
    SerializablePipelineOptions options,
    Long boundMaxRecords) {
  super(ssc, JavaSparkContext$.MODULE$.fakeClassTag());
  this.unboundedSource = unboundedSource;
  this.options = options;

  SparkPipelineOptions sparkOptions = options.get().as(SparkPipelineOptions.class);

  // Reader cache expiration interval. 50% of batch interval is added to accommodate latency.
  this.readerCacheInterval = 1.5 * sparkOptions.getBatchIntervalMillis();

  this.boundReadDuration =
      boundReadDuration(
          sparkOptions.getReadTimePercentage(), sparkOptions.getMinReadTimeMillis());
  // set initial parallelism once.
  this.initialParallelism = ssc().sparkContext().defaultParallelism();
  checkArgument(this.initialParallelism > 0, "Number of partitions must be greater than zero.");

  this.boundMaxRecords = boundMaxRecords;

  try {
    this.numPartitions = createMicrobatchSource().split(sparkOptions).size();
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}

Source File: SparkScheduler.java From oodt with Apache License 2.0

5 votes

public SparkScheduler(JobQueue queue) {
    SparkConf conf = new SparkConf();
    conf.setMaster(System.getProperty("resource.runner.spark.host","local"));
    conf.setAppName("OODT Spark Job");

    URL location = SparkScheduler.class.getResource('/'+SparkScheduler.class.getName().replace('.', '/')+".class");
    conf.setJars(new String[]{"../lib/cas-resource-0.8-SNAPSHOT.jar"});
    sc = new SparkContext(conf);
    ssc = new StreamingContext(sc,new Duration(10000));
    this.queue = queue;
}

Source File: SparkContainerFactory.java From sylph with Apache License 2.0

4 votes

@Override
    public JobContainer createLocalContainer(Job job, String lastRunid)
    {
        AtomicReference<String> url = new AtomicReference<>();
        JVMLauncher<Boolean> launcher = JVMLaunchers.<Boolean>newJvm()
//                .setXms("512m")
                .setXmx("512m")
                .setConsole(line -> {
                    String logo = "Bound SparkUI to 0.0.0.0, and started at";
                    if (url.get() == null && line.contains(logo)) {
                        url.set(line.split(logo)[1].trim());
                    }
                    System.out.println(line);
                })
                .notDepThisJvmClassPath()
                .addUserjars(job.getDepends())
                .build();

        return new LocalContainer()
        {
            @Override
            public String getJobUrl()
            {
                return url.get();
            }

            @Override
            public VmFuture startAsyncExecutor()
                    throws Exception
            {
                Supplier<?> jobDAG = job.getJobDAG();
                url.set(null);
                return launcher.startAsync(() -> {
                    SparkConf sparkConf = new SparkConf().setMaster("local[*]").setAppName("spark_local");
                    SparkContext sparkContext = new SparkContext(sparkConf);

                    Object appContext = requireNonNull(jobDAG.get(), "sparkJobHandle is null");
                    sparkContext.setLogLevel("WARN");
                    if (appContext instanceof SparkSession) {
                        SparkSession sparkSession = (SparkSession) appContext;
                        checkArgument(sparkSession.streams().active().length > 0, "no stream pipeline");
                        sparkSession.streams().awaitAnyTermination();
                    }
                    else if (appContext instanceof StreamingContext) {
                        StreamingContext ssc = (StreamingContext) appContext;
                        ssc.start();
                        ssc.awaitTermination();
                    }
                    else {
                        throw new IllegalAccessException();
                    }
                    System.out.println("sleep........");
                    TimeUnit.SECONDS.sleep(99999);
                    return true;
                });
            }
        };
    }

Source File: ReceiverLauncher.java From kafka-spark-consumer with Apache License 2.0

4 votes

public static <E> DStream<MessageAndMetadata<E>> launch(
        StreamingContext ssc, Properties pros, int numberOfReceivers,
        StorageLevel storageLevel, KafkaMessageHandler<E> messageHandler) {
    JavaStreamingContext jsc = new JavaStreamingContext(ssc);
    return createStream(jsc, pros, numberOfReceivers, storageLevel, messageHandler).dstream();
}

Source File: ReceiverLauncher.java From kafka-spark-consumer with Apache License 2.0

4 votes

public static DStream<MessageAndMetadata<byte[]>> launch(
    StreamingContext ssc, Properties pros, int numberOfReceivers, StorageLevel storageLevel) {
  JavaStreamingContext jsc = new JavaStreamingContext(ssc);
  return createStream(jsc, pros, numberOfReceivers, storageLevel, new IdentityMessageHandler()).dstream();
}

Source File: StreamingPalindromeExample.java From oodt with Apache License 2.0

4 votes

@Override
public void setStreamingContext(StreamingContext context) {
    this.ssc = new JavaStreamingContext(context);
}

Source File: StreamingInstance.java From oodt with Apache License 2.0

2 votes

/**
 * Set the context to run by.
 * @param context
 */
public void setStreamingContext(StreamingContext context);

org.apache.spark.streaming.StreamingContext Java Examples