org.apache.spark.streaming.Seconds Java Examples
The following examples show how to use
org.apache.spark.streaming.Seconds.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JobHelper.java From sylph with Apache License 2.0 | 5 votes |
static Serializable build1xJob(String jobId, EtlFlow flow, URLClassLoader jobClassLoader, ConnectorStore connectorStore) throws Exception { final AtomicBoolean isCompile = new AtomicBoolean(true); final Supplier<StreamingContext> appGetter = (Supplier<StreamingContext> & Serializable) () -> { logger.info("========create spark StreamingContext mode isCompile = " + isCompile.get() + "============"); SparkConf sparkConf = isCompile.get() ? new SparkConf().setMaster("local[*]").setAppName("sparkCompile") : new SparkConf(); //todo: 5s is default SparkSession sparkSession = SparkSession.builder().config(sparkConf).getOrCreate(); StreamingContext spark = new StreamingContext(sparkSession.sparkContext(), Seconds.apply(5)); Bean bean = binder -> binder.bind(StreamingContext.class, spark); StreamNodeLoader loader = new StreamNodeLoader(connectorStore, IocFactory.create(bean)); buildGraph(loader, flow); return spark; }; JVMLauncher<Integer> launcher = JVMLaunchers.<Integer>newJvm() .setCallable(() -> { appGetter.get(); return 1; }) .setConsole((line) -> System.out.println(new Ansi().fg(YELLOW).a("[" + jobId + "] ").fg(GREEN).a(line).reset())) .addUserURLClassLoader(jobClassLoader) .notDepThisJvmClassPath() .setClassLoader(jobClassLoader) .build(); launcher.startAndGet(); isCompile.set(false); return (Serializable) appGetter; }
Example #2
Source File: CloudPubSubStreamingWordCount.java From spark-on-k8s-gcp-examples with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws InterruptedException { if (args.length != 4) { System.err.println("Usage: CloudPubSubStreamingWordCount <GCP project ID> " + "<Cloud PubSub subscription> <GCS output dir path> <job duration in seconds>"); System.exit(1); } Preconditions.checkArgument( !Strings.isNullOrEmpty(args[0]), "GCP project ID must not be null or empty"); Preconditions.checkArgument( !Strings.isNullOrEmpty(args[1]), "Cloud PubSub topic name must not be empty"); JavaStreamingContext jsc = new JavaStreamingContext( new SparkConf().setAppName("Cloud PubSub Spark Streaming Word Count"), Seconds.apply(30) // Batch duration ); Configuration hadoopConf = jsc.sparkContext().hadoopConfiguration(); // Use service account for authentication. The service account key file is located at the path // specified by the configuration property google.cloud.auth.service.account.json.keyfile. hadoopConf.set( EntriesCredentialConfiguration.BASE_KEY_PREFIX + EntriesCredentialConfiguration.ENABLE_SERVICE_ACCOUNTS_SUFFIX, "true"); // Use the service account Json key file shared with the GCS connector. String serviceAccountJsonKeyFilePath = hadoopConf.get( EntriesCredentialConfiguration.BASE_KEY_PREFIX + EntriesCredentialConfiguration.JSON_KEYFILE_SUFFIX); Preconditions.checkArgument(!Strings.isNullOrEmpty(serviceAccountJsonKeyFilePath), "Service account Json key file path must be specified"); // This will create a subscription to the given topic. JavaReceiverInputDStream<SparkPubsubMessage> pubSubStream = PubsubUtils.createStream( jsc, args[0], // GCP project ID args[1], // Cloud PubSub subscription new SparkGCPCredentials.Builder() .jsonServiceAccount(serviceAccountJsonKeyFilePath) .build(), StorageLevel.MEMORY_AND_DISK_SER()); JavaPairDStream<String, Long> wordCounts = pubSubStream .mapToPair(message -> new Tuple2<>(new String(message.getData()), 1L)) .reduceByKey((count1, count2) -> count1 + count2); final String gcsFilePathTemplate = args[2] + "/batch-%d"; wordCounts .mapToPair(tuple -> new Tuple2<>(new Text(tuple._1), new LongWritable(tuple._2))) .foreachRDD(rdd -> rdd .saveAsNewAPIHadoopFile(String.format(gcsFilePathTemplate, rdd.id()), Text.class, LongWritable.class, TextOutputFormat.class)); try { jsc.start(); // Let the job run for the given duration and then terminate it. jsc.awaitTerminationOrTimeout(TimeUnit.SECONDS.toMillis(Long.parseLong(args[3]))); } finally { jsc.stop(true, true); } }
Example #3
Source File: JavaStreamingTestExample.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) throws Exception { if (args.length != 3) { System.err.println("Usage: JavaStreamingTestExample " + "<dataDir> <batchDuration> <numBatchesTimeout>"); System.exit(1); } String dataDir = args[0]; Duration batchDuration = Seconds.apply(Long.parseLong(args[1])); int numBatchesTimeout = Integer.parseInt(args[2]); SparkConf conf = new SparkConf().setMaster("local").setAppName("StreamingTestExample"); JavaStreamingContext ssc = new JavaStreamingContext(conf, batchDuration); ssc.checkpoint(Utils.createTempDir(System.getProperty("java.io.tmpdir"), "spark").toString()); // $example on$ JavaDStream<BinarySample> data = ssc.textFileStream(dataDir).map( new Function<String, BinarySample>() { @Override public BinarySample call(String line) { String[] ts = line.split(","); boolean label = Boolean.parseBoolean(ts[0]); double value = Double.parseDouble(ts[1]); return new BinarySample(label, value); } }); StreamingTest streamingTest = new StreamingTest() .setPeacePeriod(0) .setWindowSize(0) .setTestMethod("welch"); JavaDStream<StreamingTestResult> out = streamingTest.registerStream(data); out.print(); // $example off$ // Stop processing if test becomes significant or we time out timeoutCounter = numBatchesTimeout; out.foreachRDD(new VoidFunction<JavaRDD<StreamingTestResult>>() { @Override public void call(JavaRDD<StreamingTestResult> rdd) { timeoutCounter -= 1; boolean anySignificant = !rdd.filter(new Function<StreamingTestResult, Boolean>() { @Override public Boolean call(StreamingTestResult v) { return v.pValue() < 0.05; } }).isEmpty(); if (timeoutCounter <= 0 || anySignificant) { rdd.context().stop(); } } }); ssc.start(); ssc.awaitTermination(); }
Example #4
Source File: AbstractJavaEsSparkStreamingTest.java From elasticsearch-hadoop with Apache License 2.0 | 4 votes |
@Before public void createStreamingContext() throws Exception { ssc = new JavaStreamingContext(sc, Seconds.apply(1)); }
Example #5
Source File: AbstractJavaEsSparkStreamingTest.java From elasticsearch-hadoop with Apache License 2.0 | 4 votes |
@Before public void createStreamingContext() throws Exception { ssc = new JavaStreamingContext(sc, Seconds.apply(1)); }