org.apache.spark.streaming.Duration Java Examples
The following examples show how to use
org.apache.spark.streaming.Duration.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: StreamingRsvpsDStreamCountWindow.java From -Data-Stream-Development-with-Apache-Spark-Kafka-and-Spring-Boot with MIT License | 6 votes |
public static void main(String[] args) throws InterruptedException { System.setProperty("hadoop.home.dir", HADOOP_HOME_DIR_VALUE); final SparkConf conf = new SparkConf() .setMaster(RUN_LOCAL_WITH_AVAILABLE_CORES) .setAppName(APPLICATION_NAME) .set("spark.mongodb.output.uri", MONGODB_OUTPUT_URI) .set("spark.streaming.kafka.consumer.cache.enabled", "false"); final JavaStreamingContext streamingContext = new JavaStreamingContext(conf, new Duration(BATCH_DURATION_INTERVAL_MS)); streamingContext.checkpoint(CHECKPOINT_FOLDER); final JavaInputDStream<ConsumerRecord<String, String>> meetupStream = KafkaUtils.createDirectStream( streamingContext, LocationStrategies.PreferConsistent(), ConsumerStrategies.<String, String>Subscribe(TOPICS, KAFKA_CONSUMER_PROPERTIES) ); // transformations, streaming algorithms, etc JavaDStream<Long> countStream = meetupStream.countByWindow( new Duration(WINDOW_LENGTH_MS), new Duration(SLIDING_INTERVAL_MS)); countStream.foreachRDD((JavaRDD<Long> countRDD) -> { MongoSpark.save( countRDD.map( r -> Document.parse("{\"rsvps_count\":\"" + String.valueOf(r) + "\"}") ) ); }); // some time later, after outputs have completed meetupStream.foreachRDD((JavaRDD<ConsumerRecord<String, String>> meetupRDD) -> { OffsetRange[] offsetRanges = ((HasOffsetRanges) meetupRDD.rdd()).offsetRanges(); ((CanCommitOffsets) meetupStream.inputDStream()) .commitAsync(offsetRanges, new MeetupOffsetCommitCallback()); }); streamingContext.start(); streamingContext.awaitTermination(); }
Example #2
Source File: SparkStreamingBinding.java From datacollector with Apache License 2.0 | 6 votes |
@Override @SuppressWarnings("unchecked") public JavaStreamingContext create() { sparkConf.set("spark.streaming.kafka.maxRatePerPartition", String.valueOf(maxRatePerPartition)); // Use our classpath first, since we ship a newer version of Jackson and possibly other deps in the future. sparkConf.set("spark.driver.userClassPathFirst", "true"); sparkConf.set("spark.executor.userClassPathFirst", "true"); session = SparkSession.builder().config(sparkConf).getOrCreate(); JavaStreamingContext result = new JavaStreamingContext(new JavaSparkContext(session.sparkContext()), new Duration(duration)); Map<String, Object> props = new HashMap<>(); props.put("group.id", groupId); props.put("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer"); props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer"); for (Map.Entry<String, Object> map : props.entrySet()) { logMessage(Utils.format("Adding extra kafka config, {}:{}", map.getKey(), map.getValue()), isRunningInMesos); } logMessage("Meta data broker list " + metaDataBrokerList, isRunningInMesos); logMessage("Topic is " + topic, isRunningInMesos); logMessage("Auto offset reset is set to " + autoOffsetValue, isRunningInMesos); return createDStream(result, props); }
Example #3
Source File: Flags.java From SparkToParquet with Apache License 2.0 | 6 votes |
public static void setFromCommandLineArgs(Options options, String[] args) { CommandLineParser parser = new PosixParser(); try { CommandLine cl = parser.parse(options, args); // 参数默认值 THE_INSTANCE.windowLength = new Duration( Integer.parseInt(cl.getOptionValue(AppMain.WINDOW_LENGTH, "30")) * 1000); THE_INSTANCE.slideInterval = new Duration( Integer.parseInt(cl.getOptionValue(AppMain.SLIDE_INTERVAL, "5")) * 1000); THE_INSTANCE.kafka_broker = cl.getOptionValue(AppMain.KAFKA_BROKER, "kafka:9092"); THE_INSTANCE.kafka_topic = cl.getOptionValue(AppMain.KAFKA_TOPIC, "apache"); THE_INSTANCE.parquet_file = cl.getOptionValue(AppMain.PARQUET_FILE, "/user/spark/"); THE_INSTANCE.initialized = true; } catch (ParseException e) { THE_INSTANCE.initialized = false; System.err.println("Parsing failed. Reason: " + e.getMessage()); } }
Example #4
Source File: StreamingContextConfiguration.java From Decision with Apache License 2.0 | 6 votes |
private JavaStreamingContext create(String streamingContextName, int port, long streamingBatchTime, String sparkHost) { SparkConf conf = new SparkConf(); conf.set("spark.ui.port", String.valueOf(port)); conf.setAppName(streamingContextName); conf.setJars(JavaStreamingContext.jarOfClass(StreamingEngine.class)); conf.setMaster(sparkHost); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.registerKryoClasses(new Class[] { StratioStreamingMessage.class, InsertMessage.class, ColumnType.class, Action.class}); HashMap<String, String> tuningProperties = configurationContext.getSparkTunningProperties(); if (tuningProperties != null && tuningProperties.size() > 0) { tuningProperties.forEach( (key, value) -> conf.set(key, value)); } JavaStreamingContext streamingContext = new JavaStreamingContext(conf, new Duration(streamingBatchTime)); return streamingContext; }
Example #5
Source File: SparkStreamServiceImpl.java From searchanalytics-bigdata with MIT License | 6 votes |
@Override public void setup() { // Create a StreamingContext with a SparkConf configuration SparkConf sparkConf = new SparkConf(false) .setAppName("JaiSpark") .setSparkHome("target/sparkhome") .setMaster("local") .set("spark.executor.memory", "128m") .set("spark.local.dir", new File("target/sparkhome/tmp").getAbsolutePath()) .set("spark.cores.max", "2").set("spark.akka.threads", "2") .set("spark.akka.timeout", "60").set("spark.logConf", "true") .set("spark.cleaner.delay", "3700") .set("spark.cleaner.ttl", "86400") .set("spark.shuffle.spill", "false") .set("spark.driver.host", "localhost") .set("spark.driver.port", "43214"); jssc = new JavaStreamingContext(sparkConf, new Duration(5000)); String checkpointDir = hadoopClusterService.getHDFSUri() + "/sparkcheckpoint"; jssc.checkpoint(checkpointDir); startFlumeStream(); }
Example #6
Source File: AbstractSparkLayer.java From oryx with Apache License 2.0 | 5 votes |
protected final JavaStreamingContext buildStreamingContext() { log.info("Starting SparkContext with interval {} seconds", generationIntervalSec); SparkConf sparkConf = new SparkConf(); // Only for tests, really if (sparkConf.getOption("spark.master").isEmpty()) { log.info("Overriding master to {} for tests", streamingMaster); sparkConf.setMaster(streamingMaster); } // Only for tests, really if (sparkConf.getOption("spark.app.name").isEmpty()) { String appName = "Oryx" + getLayerName(); if (id != null) { appName = appName + '-' + id; } log.info("Overriding app name to {} for tests", appName); sparkConf.setAppName(appName); } extraSparkConfig.forEach((key, value) -> sparkConf.setIfMissing(key, value.toString())); // Turn this down to prevent long blocking at shutdown sparkConf.setIfMissing( "spark.streaming.gracefulStopTimeout", Long.toString(TimeUnit.MILLISECONDS.convert(generationIntervalSec, TimeUnit.SECONDS))); sparkConf.setIfMissing("spark.cleaner.ttl", Integer.toString(20 * generationIntervalSec)); long generationIntervalMS = TimeUnit.MILLISECONDS.convert(generationIntervalSec, TimeUnit.SECONDS); JavaSparkContext jsc = JavaSparkContext.fromSparkContext(SparkContext.getOrCreate(sparkConf)); return new JavaStreamingContext(jsc, new Duration(generationIntervalMS)); }
Example #7
Source File: JavaHBaseStreamingBulkPutExample.java From learning-hadoop with Apache License 2.0 | 5 votes |
public static void main(String args[]) { if (args.length == 0) { System.out .println("JavaHBaseBulkPutExample {master} {host} {post} {tableName} {columnFamily}"); } String master = args[0]; String host = args[1]; String port = args[2]; String tableName = args[3]; String columnFamily = args[4]; System.out.println("master:" + master); System.out.println("host:" + host); System.out.println("port:" + Integer.parseInt(port)); System.out.println("tableName:" + tableName); System.out.println("columnFamily:" + columnFamily); SparkConf sparkConf = new SparkConf(); sparkConf.set("spark.cleaner.ttl", "120000"); JavaSparkContext jsc = new JavaSparkContext(master, "JavaHBaseBulkPutExample"); jsc.addJar("SparkHBase.jar"); JavaStreamingContext jssc = new JavaStreamingContext(jsc, new Duration(1000)); JavaReceiverInputDStream<String> javaDstream = jssc.socketTextStream(host, Integer.parseInt(port)); Configuration conf = HBaseConfiguration.create(); conf.addResource(new Path("/etc/hbase/conf/core-site.xml")); conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml")); JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); hbaseContext.streamBulkPut(javaDstream, tableName, new PutFunction(), true); }
Example #8
Source File: SparkRunnerStreamingContextFactory.java From beam with Apache License 2.0 | 5 votes |
@Override public JavaStreamingContext call() throws Exception { LOG.info("Creating a new Spark Streaming Context"); // validate unbounded read properties. checkArgument( options.getMinReadTimeMillis() < options.getBatchIntervalMillis(), "Minimum read time has to be less than batch time."); checkArgument( options.getReadTimePercentage() > 0 && options.getReadTimePercentage() < 1, "Read time percentage is bound to (0, 1)."); SparkPipelineTranslator translator = new StreamingTransformTranslator.Translator(new TransformTranslator.Translator()); Duration batchDuration = new Duration(options.getBatchIntervalMillis()); LOG.info("Setting Spark streaming batchDuration to {} msec", batchDuration.milliseconds()); JavaSparkContext jsc = SparkContextFactory.getSparkContext(options); JavaStreamingContext jssc = new JavaStreamingContext(jsc, batchDuration); // We must first init accumulators since translators expect them to be instantiated. SparkRunner.initAccumulators(options, jsc); // do not need to create a MetricsPusher instance here because if is called in SparkRunner.run() EvaluationContext ctxt = new EvaluationContext(jsc, pipeline, options, jssc); // update cache candidates SparkRunner.updateCacheCandidates(pipeline, translator, ctxt); pipeline.traverseTopologically(new SparkRunner.Evaluator(translator, ctxt)); ctxt.computeOutputs(); checkpoint(jssc, checkpointDir); return jssc; }
Example #9
Source File: SparkGroupAlsoByWindowViaWindowSet.java From beam with Apache License 2.0 | 5 votes |
private static void checkpointIfNeeded( final DStream<Tuple2<ByteArray, Tuple2<StateAndTimers, List<byte[]>>>> firedStream, final SerializablePipelineOptions options) { final Long checkpointDurationMillis = getBatchDuration(options); if (checkpointDurationMillis > 0) { firedStream.checkpoint(new Duration(checkpointDurationMillis)); } }
Example #10
Source File: KafkaInput.java From envelope with Apache License 2.0 | 5 votes |
@Override public JavaDStream<?> getDStream() throws Exception { if (dStream == null) { JavaStreamingContext jssc = Contexts.getJavaStreamingContext(); Map<TopicPartition, Long> lastOffsets = null; if (doesRecordProgress(config) && !usingKafkaManagedOffsets(config)) { lastOffsets = getLastOffsets(); } if (lastOffsets != null) { dStream = KafkaUtils.createDirectStream(jssc, LocationStrategies.PreferConsistent(), ConsumerStrategies.Subscribe(topics, kafkaParams, lastOffsets)); } else { dStream = KafkaUtils.createDirectStream(jssc, LocationStrategies.PreferConsistent(), ConsumerStrategies.Subscribe(topics, kafkaParams)); } if (ConfigUtils.getOrElse(config, WINDOW_ENABLED_CONFIG, false)) { int windowDuration = config.getInt(WINDOW_MILLISECONDS_CONFIG); if (config.hasPath(WINDOW_SLIDE_MILLISECONDS_CONFIG)) { int slideDuration = config.getInt(WINDOW_SLIDE_MILLISECONDS_CONFIG); dStream = dStream.window(new Duration(windowDuration), new Duration(slideDuration)); } else { dStream = dStream.window(new Duration(windowDuration)); } } } return dStream; }
Example #11
Source File: SparkStreamingFromNetworkExample.java From SparkOnALog with Apache License 2.0 | 5 votes |
public static void main(String[] args) { if (args.length < 3) { System.err.println("Usage: NetworkWordCount <master> <hostname> <port>\n" + "In local mode, <master> should be 'local[n]' with n > 1"); System.exit(1); } // Create the context with a 1 second batch size JavaStreamingContext ssc = new JavaStreamingContext(args[0], "NetworkWordCount", new Duration(5000), System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR")); // Create a NetworkInputDStream on target ip:port and count the // words in input stream of \n delimited test (eg. generated by 'nc') JavaDStream<String> lines = ssc.socketTextStream(args[1], Integer.parseInt(args[2])); lines.map(new Function<String, String> () { @Override public String call(String arg0) throws Exception { System.out.println("arg0" + arg0); return arg0; }}).print(); lines.print(); ssc.start(); }
Example #12
Source File: JavaCustomReceiver.java From SparkDemo with MIT License | 5 votes |
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: JavaCustomReceiver <hostname> <port>"); System.exit(1); } StreamingExamples.setStreamingLogLevels(); // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaCustomReceiver"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000)); // Create an input stream with the custom receiver on target ip:port and count the // words in input stream of \n delimited text (eg. generated by 'nc') JavaReceiverInputDStream<String> lines = ssc.receiverStream( new JavaCustomReceiver(args[0], Integer.parseInt(args[1]))); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Arrays.asList(SPACE.split(x)).iterator(); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); ssc.start(); ssc.awaitTermination(); }
Example #13
Source File: SparkStreamingJob.java From zipkin-sparkstreaming with Apache License 2.0 | 5 votes |
@Memoized JavaStreamingContext jsc() { SparkConf conf = new SparkConf(true) .setMaster(master()) .setAppName(getClass().getName()); if (!jars().isEmpty()) conf.setJars(jars().toArray(new String[0])); for (Map.Entry<String, String> entry : conf().entrySet()) { conf.set(entry.getKey(), entry.getValue()); } return new JavaStreamingContext(conf, new Duration(batchDuration())); }
Example #14
Source File: SparkStreamingSqlEngine.java From sylph with Apache License 2.0 | 5 votes |
private static Serializable compile(String jobId, SqlFlow sqlFlow, ConnectorStore connectorStore, SparkJobConfig sparkJobConfig, URLClassLoader jobClassLoader) throws JVMException { int batchDuration = sparkJobConfig.getSparkStreamingBatchDuration(); final AtomicBoolean isCompile = new AtomicBoolean(true); final Supplier<StreamingContext> appGetter = (Supplier<StreamingContext> & Serializable) () -> { logger.info("========create spark StreamingContext mode isCompile = " + isCompile.get() + "============"); SparkConf sparkConf = isCompile.get() ? new SparkConf().setMaster("local[*]").setAppName("sparkCompile") : new SparkConf(); SparkSession sparkSession = SparkSession.builder().config(sparkConf).getOrCreate(); StreamingContext ssc = new StreamingContext(sparkSession.sparkContext(), Duration.apply(batchDuration)); //build sql SqlAnalyse analyse = new SparkStreamingSqlAnalyse(ssc, connectorStore, isCompile.get()); try { buildSql(analyse, jobId, sqlFlow); } catch (Exception e) { throwsException(e); } return ssc; }; JVMLauncher<Boolean> launcher = JVMLaunchers.<Boolean>newJvm() .setConsole((line) -> System.out.println(new Ansi().fg(YELLOW).a("[" + jobId + "] ").fg(GREEN).a(line).reset())) .setCallable(() -> { System.out.println("************ job start ***************"); appGetter.get(); return true; }) .addUserURLClassLoader(jobClassLoader) .setClassLoader(jobClassLoader) .notDepThisJvmClassPath() .build(); launcher.startAndGet(); isCompile.set(false); return (Serializable) appGetter; }
Example #15
Source File: SparkScheduler.java From oodt with Apache License 2.0 | 5 votes |
public SparkScheduler(JobQueue queue) { SparkConf conf = new SparkConf(); conf.setMaster(System.getProperty("resource.runner.spark.host","local")); conf.setAppName("OODT Spark Job"); URL location = SparkScheduler.class.getResource('/'+SparkScheduler.class.getName().replace('.', '/')+".class"); conf.setJars(new String[]{"../lib/cas-resource-0.8-SNAPSHOT.jar"}); sc = new SparkContext(conf); ssc = new StreamingContext(sc,new Duration(10000)); this.queue = queue; }
Example #16
Source File: ReaderWriterExample.java From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
public static void main(String[] args) throws Exception { final String dbUrl = args[0]; final String hostname = args[1]; final String port = args[2]; final String inTargetSchema = args[3]; final String inTargetTable = args[4]; SparkConf conf = new SparkConf(); JavaStreamingContext ssc = new JavaStreamingContext(conf, new Duration(500)); JavaReceiverInputDStream<String> stream = ssc.socketTextStream(hostname, Integer.parseInt(port)); SparkSession spark = SparkSession.builder().getOrCreate(); // Create a SplicemachineContext based on the provided DB connection SplicemachineContext splicemachineContext = new SplicemachineContext(dbUrl); // Set target tablename and schemaname final String table = inTargetSchema + "." + inTargetTable; stream.foreachRDD((VoidFunction<JavaRDD<String>>) rdd -> { JavaRDD<Row> rowRDD = rdd.map((Function<String, Row>) s -> RowFactory.create(s)); Dataset<Row> df = spark.createDataFrame(rowRDD, splicemachineContext.getSchema(table)); splicemachineContext.insert(df, table); }); ssc.start(); ssc.awaitTermination(); }
Example #17
Source File: ReaderWriterExample.java From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
public static void main(String[] args) throws Exception { final String dbUrl = args[0]; final String hostname = args[1]; final String port = args[2]; final String inTargetSchema = args[3]; final String inTargetTable = args[4]; SparkConf conf = new SparkConf(); JavaStreamingContext ssc = new JavaStreamingContext(conf, new Duration(500)); SpliceSpark.setContext(ssc.sparkContext()); SparkSession spark = SpliceSpark.getSessionUnsafe(); JavaReceiverInputDStream<String> stream = ssc.socketTextStream(hostname, Integer.parseInt(port)); // Create a SplicemachineContext based on the provided DB connection SplicemachineContext splicemachineContext = new SplicemachineContext(dbUrl); // Set target tablename and schemaname final String table = inTargetSchema + "." + inTargetTable; stream.foreachRDD((VoidFunction<JavaRDD<String>>) rdd -> { JavaRDD<Row> rowRDD = rdd.map((Function<String, Row>) s -> RowFactory.create(s)); Dataset<Row> df = spark.createDataFrame(rowRDD, splicemachineContext.getSchema(table)); splicemachineContext.insert(df, table); }); ssc.start(); ssc.awaitTermination(); }
Example #18
Source File: SparkUnboundedSource.java From beam with Apache License 2.0 | 4 votes |
private static void checkpointStream(JavaDStream<?> dStream, SparkPipelineOptions options) { long checkpointDurationMillis = options.getCheckpointDurationMillis(); if (checkpointDurationMillis > 0) { dStream.checkpoint(new Duration(checkpointDurationMillis)); } }
Example #19
Source File: KafkaStreamRestHandler.java From elasticsearch-rest-command with The Unlicense | 4 votes |
@Override protected void handleRequest(RestRequest request, RestChannel channel, Client client) throws Exception { final String topic = request.param("topic", ""); final boolean schema = request.paramAsBoolean("schema", false); final String master = request.param("masterAddress", "local"); final String hdfs = request.param("hdfs", "hdfs://localhost:50070"); final String memory = request.param("memory", "2g"); final String appName = request.param("appName", "appName-"+topic); final int duration = request.paramAsInt("duration", 1000); Thread exec = new Thread(new Runnable(){ @Override public void run() { SparkConf sparkConf = new SparkConf().setAppName(appName).setMaster(master).set("spark.executor.memory", memory); JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(duration)); Map<String, Integer> topicMap = new HashMap<String, Integer>(); topicMap.put(topic, 3); JavaPairReceiverInputDStream<String, byte[]> kafkaStream = KafkaUtils.createStream(jssc, String.class, byte[].class, kafka.serializer.DefaultDecoder.class, kafka.serializer.DefaultDecoder.class, null, topicMap, StorageLevel.MEMORY_ONLY()); //JobConf confHadoop = new JobConf(); //confHadoop.set("mapred.output.compress", "true"); //confHadoop.set("mapred.output.compression.codec", "com.hadoop.compression.lzo.LzopCodec"); kafkaStream.saveAsHadoopFiles(hdfs, "seq", Text.class, BytesWritable.class, KafkaStreamSeqOutputFormat.class); topicContextMap.put(topic, jssc); jssc.start(); jssc.awaitTermination(); } }); exec.start(); channel.sendResponse(new BytesRestResponse(RestStatus.OK, String.format("{\"topic\":\"%s\"}", topic))); }
Example #20
Source File: Throughput.java From flink-perf with Apache License 2.0 | 4 votes |
public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("throughput").setMaster("local[8]"); JavaStreamingContext ssc = new JavaStreamingContext(conf, new Duration(2000)); JavaReceiverInputDStream<Tuple4<Long, Integer, Long, byte[]>> source = ssc.receiverStream(new Source(StorageLevel.MEMORY_ONLY())); JavaPairDStream<Long, Tuple3<Integer, Long, byte[]>> kvsource = source.mapToPair(new PairFunction<Tuple4<Long, Integer, Long, byte[]>, Long, Tuple3<Integer, Long, byte[]>>() { @Override public Tuple2<Long, Tuple3<Integer, Long, byte[]>> call(Tuple4<Long, Integer, Long, byte[]> longIntegerLongTuple4) throws Exception { return new Tuple2<Long, Tuple3<Integer, Long, byte[]>>(longIntegerLongTuple4._1(), new Tuple3<Integer, Long, byte[]>(longIntegerLongTuple4._2(), longIntegerLongTuple4._3(), longIntegerLongTuple4._4())); } }); JavaDStream<Long> res = kvsource.repartition(3).mapPartitions(new FlatMapFunction<Iterator<Tuple2<Long,Tuple3<Integer,Long,byte[]>>>, Long>() { @Override public Iterable<Long> call(Iterator<Tuple2<Long, Tuple3<Integer, Long, byte[]>>> tuple2Iterator) throws Exception { long start = System.currentTimeMillis(); long received = 0; while(tuple2Iterator.hasNext()) { received++; Tuple2<Long, Tuple3<Integer, Long, byte[]>> el = tuple2Iterator.next(); if (el._2()._2() != 0) { long lat = System.currentTimeMillis() - el._2()._2(); System.out.println("Latency " + lat + " ms"); } } long sinceMs = (System.currentTimeMillis() - start); System.out.println("Finished Batch. Processed "+received+" elements in "+sinceMs+" ms."); return new Iterable<Long>() { @Override public Iterator<Long> iterator() { return new Iterator<Long>() { @Override public boolean hasNext() { return false; } @Override public Long next() { return 1L; } @Override public void remove() { } }; } }; } /* @Override public Long call(Tuple2<Long, Tuple3<Integer, Long, byte[]>> v1) throws Exception { // System.out.println("Recevied " + v1); if (start == 0) { } received++; if (received % logfreq == 0) { if (sinceSec == 0) { System.out.println("received " + received + " elements since 0"); return 0L; } System.out.println("Received " + received + " elements since " + sinceSec + ". " + "Elements per second " + received / sinceSec + ", GB received " + ((received * (8 + 4 + 12)) / 1024 / 1024 / 1024)); } if (v1._2()._2() != 0) { long lat = System.currentTimeMillis() - v1._2()._2(); System.out.println("Latency " + lat + " ms"); } return received; } */ }); //res.print(); /*res.foreachRDD(new Function2<JavaRDD<Long>, Time, Void>() { @Override public Void call(JavaRDD<Long> integerJavaRDD, Time t) throws Exception { integerJavaRDD.saveAsTextFile("/home/robert/flink-workdir/flink-perf/out/"+t.toString()); return null; } }); */ res.print(); // res.print(); ssc.start(); }
Example #21
Source File: SparkStreamingFromFlumeToHBaseExample.java From SparkOnALog with Apache License 2.0 | 4 votes |
public static void main(String[] args) { if (args.length == 0) { System.err .println("Usage: SparkStreamingFromFlumeToHBaseExample {master} {host} {port} {table} {columnFamily}"); System.exit(1); } String master = args[0]; String host = args[1]; int port = Integer.parseInt(args[2]); String tableName = args[3]; String columnFamily = args[4]; Duration batchInterval = new Duration(2000); JavaStreamingContext sc = new JavaStreamingContext(master, "FlumeEventCount", batchInterval, System.getenv("SPARK_HOME"), "/home/cloudera/SparkOnALog.jar"); final Broadcast<String> broadcastTableName = sc.sparkContext().broadcast(tableName); final Broadcast<String> broadcastColumnFamily = sc.sparkContext().broadcast(columnFamily); //JavaDStream<SparkFlumeEvent> flumeStream = sc.flumeStream(host, port); JavaDStream<SparkFlumeEvent> flumeStream = FlumeUtils.createStream(sc, host, port); JavaPairDStream<String, Integer> lastCounts = flumeStream .flatMap(new FlatMapFunction<SparkFlumeEvent, String>() { @Override public Iterable<String> call(SparkFlumeEvent event) throws Exception { String bodyString = new String(event.event().getBody() .array(), "UTF-8"); return Arrays.asList(bodyString.split(" ")); } }).map(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String str) throws Exception { return new Tuple2(str, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer x, Integer y) throws Exception { // TODO Auto-generated method stub return x.intValue() + y.intValue(); } }); lastCounts.foreach(new Function2<JavaPairRDD<String,Integer>, Time, Void>() { @Override public Void call(JavaPairRDD<String, Integer> values, Time time) throws Exception { values.foreach(new VoidFunction<Tuple2<String, Integer>> () { @Override public void call(Tuple2<String, Integer> tuple) throws Exception { HBaseCounterIncrementor incrementor = HBaseCounterIncrementor.getInstance(broadcastTableName.value(), broadcastColumnFamily.value()); incrementor.incerment("Counter", tuple._1(), tuple._2()); System.out.println("Counter:" + tuple._1() + "," + tuple._2()); }} ); return null; }}); sc.start(); }
Example #22
Source File: SparkStreamingFromFlumeToHBaseWindowingExample.java From SparkOnALog with Apache License 2.0 | 4 votes |
public static void main(String[] args) { if (args.length == 0) { System.err .println("Usage: SparkStreamingFromFlumeToHBaseWindowingExample {master} {host} {port} {table} {columnFamily} {windowInSeconds} {slideInSeconds"); System.exit(1); } String master = args[0]; String host = args[1]; int port = Integer.parseInt(args[2]); String tableName = args[3]; String columnFamily = args[4]; int windowInSeconds = Integer.parseInt(args[5]); int slideInSeconds = Integer.parseInt(args[5]); Duration batchInterval = new Duration(2000); Duration windowInterval = new Duration(windowInSeconds * 1000); Duration slideInterval = new Duration(slideInSeconds * 1000); JavaStreamingContext sc = new JavaStreamingContext(master, "FlumeEventCount", batchInterval, System.getenv("SPARK_HOME"), "/home/cloudera/SparkOnALog.jar"); final Broadcast<String> broadcastTableName = sc.sparkContext().broadcast(tableName); final Broadcast<String> broadcastColumnFamily = sc.sparkContext().broadcast(columnFamily); //JavaDStream<SparkFlumeEvent> flumeStream = sc.flumeStream(host, port); JavaDStream<SparkFlumeEvent> flumeStream = FlumeUtils.createStream(sc, host, port); JavaPairDStream<String, Integer> lastCounts = flumeStream .flatMap(new FlatMapFunction<SparkFlumeEvent, String>() { @Override public Iterable<String> call(SparkFlumeEvent event) throws Exception { String bodyString = new String(event.event().getBody() .array(), "UTF-8"); return Arrays.asList(bodyString.split(" ")); } }).map(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String str) throws Exception { return new Tuple2(str, 1); } }).reduceByKeyAndWindow(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer x, Integer y) throws Exception { // TODO Auto-generated method stub return x.intValue() + y.intValue(); } }, windowInterval, slideInterval); lastCounts.foreach(new Function2<JavaPairRDD<String,Integer>, Time, Void>() { @Override public Void call(JavaPairRDD<String, Integer> values, Time time) throws Exception { values.foreach(new VoidFunction<Tuple2<String, Integer>> () { @Override public void call(Tuple2<String, Integer> tuple) throws Exception { HBaseCounterIncrementor incrementor = HBaseCounterIncrementor.getInstance(broadcastTableName.value(), broadcastColumnFamily.value()); incrementor.incerment("Counter", tuple._1(), tuple._2()); System.out.println("Counter:" + tuple._1() + "," + tuple._2()); }} ); return null; }}); sc.start(); }
Example #23
Source File: SparkUnboundedSource.java From beam with Apache License 2.0 | 4 votes |
@Override public Duration slideDuration() { return parent.slideDuration(); }
Example #24
Source File: SparkRunner.java From jaeger-analytics-java with Apache License 2.0 | 4 votes |
public static void main(String []args) throws InterruptedException, IOException { HTTPServer server = new HTTPServer(Integer.valueOf(getPropOrEnv("PROMETHEUS_PORT", "9111"))); SparkConf sparkConf = new SparkConf() .setAppName("Trace DSL") .setMaster(getPropOrEnv("SPARK_MASTER","local[*]")); JavaSparkContext sc = new JavaSparkContext(sparkConf); JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(Integer.parseInt(getPropOrEnv("SPARK_STREAMING_BATCH_DURATION", "5000")))); Set<String> topics = Collections.singleton(getPropOrEnv("KAFKA_JAEGER_TOPIC", "jaeger-spans")); Map<String, Object> kafkaParams = new HashMap<>(); kafkaParams.put("bootstrap.servers", getPropOrEnv("KAFKA_BOOTSTRAP_SERVER", "localhost:9092")); kafkaParams.put("key.deserializer", StringDeserializer.class); kafkaParams.put("value.deserializer", ProtoSpanDeserializer.class); // hack to start always from beginning kafkaParams.put("group.id", "jaeger-trace-aggregation-" + System.currentTimeMillis()); if (Boolean.parseBoolean(getPropOrEnv("KAFKA_START_FROM_BEGINNING", "true"))) { kafkaParams.put("auto.offset.reset", "earliest"); kafkaParams.put("enable.auto.commit", false); kafkaParams.put("startingOffsets", "earliest"); } JavaInputDStream<ConsumerRecord<String, Span>> messages = KafkaUtils.createDirectStream( ssc, LocationStrategies.PreferConsistent(), ConsumerStrategies.Subscribe(topics, kafkaParams)); JavaPairDStream<String, Span> traceIdSpanTuple = messages.mapToPair(record -> { return new Tuple2<>(record.value().traceId, record.value()); }); JavaDStream<Trace> tracesStream = traceIdSpanTuple.groupByKey().map(traceIdSpans -> { System.out.printf("traceID: %s\n", traceIdSpans._1); Iterable<Span> spans = traceIdSpans._2(); Trace trace = new Trace(); trace.traceId = traceIdSpans._1(); trace.spans = StreamSupport.stream(spans.spliterator(), false) .collect(Collectors.toList()); return trace; }); MinimumClientVersion minimumClientVersion = MinimumClientVersion.builder() .withJavaVersion(getPropOrEnv("TRACE_QUALITY_JAVA_VERSION", "1.0.0")) .withGoVersion(getPropOrEnv("TRACE_QUALITY_GO_VERSION", "2.22.0")) .withNodeVersion(getPropOrEnv("TRACE_QUALITY_NODE_VERSION", "3.17.1")) .withPythonVersion(getPropOrEnv("TRACE_QUALITY_PYTHON_VERSION", "4.0.0")) .build(); List<ModelRunner> modelRunner = Arrays.asList( new TraceHeight(), new ServiceDepth(), new ServiceHeight(), new NetworkLatency(), new NumberOfErrors(), new DirectDependencies(), // trace quality minimumClientVersion, new HasClientServerSpans(), new UniqueSpanId()); tracesStream.foreachRDD((traceRDD, time) -> { traceRDD.foreach(trace -> { Graph graph = GraphCreator.create(trace); for (ModelRunner model: modelRunner) { model.runWithMetrics(graph); } }); }); ssc.start(); ssc.awaitTermination(); }
Example #25
Source File: Flags.java From SparkToParquet with Apache License 2.0 | 4 votes |
public Duration getSlideInterval() { return slideInterval; }
Example #26
Source File: Flags.java From SparkToParquet with Apache License 2.0 | 4 votes |
public Duration getWindowLength() { return windowLength; }
Example #27
Source File: KafkaReceiverWordCountJava.java From Building-Data-Streaming-Applications-with-Apache-Kafka with MIT License | 4 votes |
public static void main(String[] args) throws Exception { String zkQuorum = "localhost:2181"; String groupName = "stream"; int numThreads = 3; String topicsName = "test1"; SparkConf sparkConf = new SparkConf().setAppName("WordCountKafkaStream"); JavaStreamingContext javaStreamingContext = new JavaStreamingContext(sparkConf, new Duration(5000)); Map<String, Integer> topicToBeUsedBySpark = new HashMap<>(); String[] topics = topicsName.split(","); for (String topic : topics) { topicToBeUsedBySpark.put(topic, numThreads); } JavaPairReceiverInputDStream<String, String> streamMessages = KafkaUtils.createStream(javaStreamingContext, zkQuorum, groupName, topicToBeUsedBySpark); JavaDStream<String> lines = streamMessages.map(new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Arrays.asList(WORD_DELIMETER.split(x)).iterator(); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); javaStreamingContext.start(); javaStreamingContext.awaitTermination(); }
Example #28
Source File: JavaKafkaWordCount.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) throws Exception { if (args.length < 4) { System.err.println("Usage: JavaKafkaWordCount <zkQuorum> <group> <topics> <numThreads>"); System.exit(1); } StreamingExamples.setStreamingLogLevels(); SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount"); // Create the context with 2 seconds batch size JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(2000)); int numThreads = Integer.parseInt(args[3]); Map<String, Integer> topicMap = new HashMap<>(); String[] topics = args[2].split(","); for (String topic: topics) { topicMap.put(topic, numThreads); } JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, args[0], args[1], topicMap); JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Arrays.asList(SPACE.split(x)).iterator(); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); jssc.start(); jssc.awaitTermination(); }
Example #29
Source File: JavaStreamingTestExample.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) throws Exception { if (args.length != 3) { System.err.println("Usage: JavaStreamingTestExample " + "<dataDir> <batchDuration> <numBatchesTimeout>"); System.exit(1); } String dataDir = args[0]; Duration batchDuration = Seconds.apply(Long.parseLong(args[1])); int numBatchesTimeout = Integer.parseInt(args[2]); SparkConf conf = new SparkConf().setMaster("local").setAppName("StreamingTestExample"); JavaStreamingContext ssc = new JavaStreamingContext(conf, batchDuration); ssc.checkpoint(Utils.createTempDir(System.getProperty("java.io.tmpdir"), "spark").toString()); // $example on$ JavaDStream<BinarySample> data = ssc.textFileStream(dataDir).map( new Function<String, BinarySample>() { @Override public BinarySample call(String line) { String[] ts = line.split(","); boolean label = Boolean.parseBoolean(ts[0]); double value = Double.parseDouble(ts[1]); return new BinarySample(label, value); } }); StreamingTest streamingTest = new StreamingTest() .setPeacePeriod(0) .setWindowSize(0) .setTestMethod("welch"); JavaDStream<StreamingTestResult> out = streamingTest.registerStream(data); out.print(); // $example off$ // Stop processing if test becomes significant or we time out timeoutCounter = numBatchesTimeout; out.foreachRDD(new VoidFunction<JavaRDD<StreamingTestResult>>() { @Override public void call(JavaRDD<StreamingTestResult> rdd) { timeoutCounter -= 1; boolean anySignificant = !rdd.filter(new Function<StreamingTestResult, Boolean>() { @Override public Boolean call(StreamingTestResult v) { return v.pValue() < 0.05; } }).isEmpty(); if (timeoutCounter <= 0 || anySignificant) { rdd.context().stop(); } } }); ssc.start(); ssc.awaitTermination(); }
Example #30
Source File: SylphKafkaOffset.java From sylph with Apache License 2.0 | 4 votes |
@Override public Duration slideDuration() { return parent.slideDuration(); }