org.apache.spark.streaming.Seconds Java Exaples

Source File: JobHelper.java From sylph with Apache License 2.0

5 votes

static Serializable build1xJob(String jobId, EtlFlow flow, URLClassLoader jobClassLoader, ConnectorStore connectorStore)
        throws Exception
{
    final AtomicBoolean isCompile = new AtomicBoolean(true);
    final Supplier<StreamingContext> appGetter = (Supplier<StreamingContext> & Serializable) () -> {
        logger.info("========create spark StreamingContext mode isCompile = " + isCompile.get() + "============");
        SparkConf sparkConf = isCompile.get() ?
                new SparkConf().setMaster("local[*]").setAppName("sparkCompile")
                : new SparkConf();
        //todo: 5s is default
        SparkSession sparkSession = SparkSession.builder().config(sparkConf).getOrCreate();
        StreamingContext spark = new StreamingContext(sparkSession.sparkContext(), Seconds.apply(5));

        Bean bean = binder -> binder.bind(StreamingContext.class, spark);
        StreamNodeLoader loader = new StreamNodeLoader(connectorStore, IocFactory.create(bean));
        buildGraph(loader, flow);
        return spark;
    };

    JVMLauncher<Integer> launcher = JVMLaunchers.<Integer>newJvm()
            .setCallable(() -> {
                appGetter.get();
                return 1;
            })
            .setConsole((line) -> System.out.println(new Ansi().fg(YELLOW).a("[" + jobId + "] ").fg(GREEN).a(line).reset()))
            .addUserURLClassLoader(jobClassLoader)
            .notDepThisJvmClassPath()
            .setClassLoader(jobClassLoader)
            .build();
    launcher.startAndGet();
    isCompile.set(false);
    return (Serializable) appGetter;
}

Source File: CloudPubSubStreamingWordCount.java From spark-on-k8s-gcp-examples with Apache License 2.0

4 votes

public static void main(String[] args) throws InterruptedException {
  if (args.length != 4) {
    System.err.println("Usage: CloudPubSubStreamingWordCount <GCP project ID> " +
        "<Cloud PubSub subscription> <GCS output dir path> <job duration in seconds>");
    System.exit(1);
  }

  Preconditions.checkArgument(
      !Strings.isNullOrEmpty(args[0]), "GCP project ID must not be null or empty");
  Preconditions.checkArgument(
      !Strings.isNullOrEmpty(args[1]), "Cloud PubSub topic name must not be empty");

  JavaStreamingContext jsc = new JavaStreamingContext(
      new SparkConf().setAppName("Cloud PubSub Spark Streaming Word Count"),
      Seconds.apply(30) // Batch duration
  );

  Configuration hadoopConf = jsc.sparkContext().hadoopConfiguration();
  // Use service account for authentication. The service account key file is located at the path
  // specified by the configuration property google.cloud.auth.service.account.json.keyfile.
  hadoopConf.set(
      EntriesCredentialConfiguration.BASE_KEY_PREFIX +
          EntriesCredentialConfiguration.ENABLE_SERVICE_ACCOUNTS_SUFFIX,
      "true");
  // Use the service account Json key file shared with the GCS connector.
  String serviceAccountJsonKeyFilePath = hadoopConf.get(
      EntriesCredentialConfiguration.BASE_KEY_PREFIX +
          EntriesCredentialConfiguration.JSON_KEYFILE_SUFFIX);
  Preconditions.checkArgument(!Strings.isNullOrEmpty(serviceAccountJsonKeyFilePath),
      "Service account Json key file path must be specified");

  // This will create a subscription to the given topic.
  JavaReceiverInputDStream<SparkPubsubMessage> pubSubStream = PubsubUtils.createStream(
      jsc,
      args[0], // GCP project ID
      args[1], // Cloud PubSub subscription
      new SparkGCPCredentials.Builder()
          .jsonServiceAccount(serviceAccountJsonKeyFilePath)
          .build(),
      StorageLevel.MEMORY_AND_DISK_SER());

  JavaPairDStream<String, Long> wordCounts = pubSubStream
      .mapToPair(message -> new Tuple2<>(new String(message.getData()), 1L))
      .reduceByKey((count1, count2) -> count1 + count2);

  final String gcsFilePathTemplate = args[2] + "/batch-%d";
  wordCounts
      .mapToPair(tuple -> new Tuple2<>(new Text(tuple._1), new LongWritable(tuple._2)))
      .foreachRDD(rdd -> rdd
          .saveAsNewAPIHadoopFile(String.format(gcsFilePathTemplate, rdd.id()),
              Text.class,
              LongWritable.class,
              TextOutputFormat.class));

  try {
    jsc.start();
    // Let the job run for the given duration and then terminate it.
    jsc.awaitTerminationOrTimeout(TimeUnit.SECONDS.toMillis(Long.parseLong(args[3])));
  } finally {
    jsc.stop(true, true);
  }
}

Source File: JavaStreamingTestExample.java From SparkDemo with MIT License

4 votes

public static void main(String[] args) throws Exception {
  if (args.length != 3) {
    System.err.println("Usage: JavaStreamingTestExample " +
      "<dataDir> <batchDuration> <numBatchesTimeout>");
      System.exit(1);
  }

  String dataDir = args[0];
  Duration batchDuration = Seconds.apply(Long.parseLong(args[1]));
  int numBatchesTimeout = Integer.parseInt(args[2]);

  SparkConf conf = new SparkConf().setMaster("local").setAppName("StreamingTestExample");
  JavaStreamingContext ssc = new JavaStreamingContext(conf, batchDuration);

  ssc.checkpoint(Utils.createTempDir(System.getProperty("java.io.tmpdir"), "spark").toString());

  // $example on$
  JavaDStream<BinarySample> data = ssc.textFileStream(dataDir).map(
    new Function<String, BinarySample>() {
      @Override
      public BinarySample call(String line) {
        String[] ts = line.split(",");
        boolean label = Boolean.parseBoolean(ts[0]);
        double value = Double.parseDouble(ts[1]);
        return new BinarySample(label, value);
      }
    });

  StreamingTest streamingTest = new StreamingTest()
    .setPeacePeriod(0)
    .setWindowSize(0)
    .setTestMethod("welch");

  JavaDStream<StreamingTestResult> out = streamingTest.registerStream(data);
  out.print();
  // $example off$

  // Stop processing if test becomes significant or we time out
  timeoutCounter = numBatchesTimeout;

  out.foreachRDD(new VoidFunction<JavaRDD<StreamingTestResult>>() {
    @Override
    public void call(JavaRDD<StreamingTestResult> rdd) {
      timeoutCounter -= 1;

      boolean anySignificant = !rdd.filter(new Function<StreamingTestResult, Boolean>() {
        @Override
        public Boolean call(StreamingTestResult v) {
          return v.pValue() < 0.05;
        }
      }).isEmpty();

      if (timeoutCounter <= 0 || anySignificant) {
        rdd.context().stop();
      }
    }
  });

  ssc.start();
  ssc.awaitTermination();
}

Source File: AbstractJavaEsSparkStreamingTest.java From elasticsearch-hadoop with Apache License 2.0

4 votes

@Before
public void createStreamingContext() throws Exception {
    ssc = new JavaStreamingContext(sc, Seconds.apply(1));
}

Source File: AbstractJavaEsSparkStreamingTest.java From elasticsearch-hadoop with Apache License 2.0

4 votes

@Before
public void createStreamingContext() throws Exception {
    ssc = new JavaStreamingContext(sc, Seconds.apply(1));
}

org.apache.spark.streaming.Seconds Java Examples