org.apache.flink.api.common.functions.MapFunction Java Examples
The following examples show how to use
org.apache.flink.api.common.functions.MapFunction.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: NiFiSourceMain.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); SiteToSiteClientConfig clientConfig = new SiteToSiteClient.Builder() .url("http://localhost:8080/nifi") .portName("Data for Flink") .requestBatchCount(5) .buildConfig(); SourceFunction<NiFiDataPacket> nifiSource = new NiFiSource(clientConfig); DataStream<NiFiDataPacket> streamSource = env.addSource(nifiSource).setParallelism(2); DataStream<String> dataStream = streamSource.map(new MapFunction<NiFiDataPacket, String>() { @Override public String map(NiFiDataPacket value) throws Exception { return new String(value.getContent(), Charset.defaultCharset()); } }); dataStream.print(); env.execute(); }
Example #2
Source File: BaseComQueue.java From Alink with Apache License 2.0 | 6 votes |
private DataSet<byte[]> clearObjs(DataSet<byte[]> raw) { final int localSessionId = sessionId; DataSet<byte[]> clear = expandDataSet2MaxParallelism( BatchOperator .getExecutionEnvironmentFromDataSets(raw) .fromElements(0)) .mapPartition(new MapPartitionFunction<Integer, byte[]>() { @Override public void mapPartition(Iterable<Integer> values, Collector<byte[]> out) { SessionSharedObjs.clear(localSessionId); } }); return raw .map(new MapFunction<byte[], byte[]>() { @Override public byte[] map(byte[] value) { return value; } }) .withBroadcastSet(clear, "barrier") .name("clearReturn"); }
Example #3
Source File: Main.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); //并行度设置为 1 env.setParallelism(1); // env.setParallelism(4); SingleOutputStreamOperator<Word> data = env.socketTextStream("localhost", 9001) .map(new MapFunction<String, Word>() { @Override public Word map(String value) throws Exception { String[] split = value.split(","); return new Word(split[0], Integer.valueOf(split[1]), Long.valueOf(split[2])); } }); //Punctuated Watermark data.assignTimestampsAndWatermarks(new WordPunctuatedWatermark()); data.print(); env.execute("watermark demo"); }
Example #4
Source File: NoRestartStrategyMain.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setGlobalJobParameters(ParameterTool.fromArgs(args)); env.setRestartStrategy(RestartStrategies.noRestart()); env.addSource(new SourceFunction<Long>() { @Override public void run(SourceContext<Long> sourceContext) throws Exception { while (true) { sourceContext.collect(null); } } @Override public void cancel() { } }) .map((MapFunction<Long, Long>) aLong -> aLong / 1) .print(); env.execute("zhisheng no Restart Strategy example"); }
Example #5
Source File: Main.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args); StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool); Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool); DataStreamSource<String> data = env.addSource(new FlinkKafkaConsumer011<>( parameterTool.get(METRICS_TOPIC), //这个 kafka topic 需要和上面的工具类的 topic 一致 new SimpleStringSchema(), props)); data.map(new MapFunction<String, Object>() { @Override public Object map(String string) throws Exception { writeEventToHbase(string, parameterTool); return string; } }).print(); env.execute("flink learning connectors hbase"); }
Example #6
Source File: Graph.java From flink with Apache License 2.0 | 6 votes |
/** * Apply a function to the attribute of each edge in the graph. * * @param mapper the map function to apply. * @return a new graph */ @SuppressWarnings({ "unchecked", "rawtypes" }) public <NV> Graph<K, VV, NV> mapEdges(final MapFunction<Edge<K, EV>, NV> mapper) { TypeInformation<K> keyType = ((TupleTypeInfo<?>) edges.getType()).getTypeAt(0); TypeInformation<NV> valueType; if (mapper instanceof ResultTypeQueryable) { valueType = ((ResultTypeQueryable) mapper).getProducedType(); } else { valueType = TypeExtractor.createTypeInfo(MapFunction.class, mapper.getClass(), 1, edges.getType(), null); } TypeInformation<Edge<K, NV>> returnType = (TypeInformation<Edge<K, NV>>) new TupleTypeInfo( Edge.class, keyType, keyType, valueType); return mapEdges(mapper, returnType); }
Example #7
Source File: StatisticsHelper.java From Alink with Apache License 2.0 | 5 votes |
/** * calculate correlation. result is tuple2, f0 is summary, f1 is correlation. */ public static DataSet<Tuple2<BaseVectorSummary, CorrelationResult>> vectorPearsonCorrelation(BatchOperator in, String selectedColName) { return vectorSummarizer(in, selectedColName, true) .map(new MapFunction<BaseVectorSummarizer, Tuple2<BaseVectorSummary, CorrelationResult>>() { @Override public Tuple2<BaseVectorSummary, CorrelationResult> map(BaseVectorSummarizer summarizer) { return Tuple2.of(summarizer.toSummary(), summarizer.correlation()); } }); }
Example #8
Source File: MapCancelingITCase.java From flink with Apache License 2.0 | 5 votes |
public void executeTask(MapFunction<Integer, Integer> mapper) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env .createInput(new InfiniteIntegerInputFormat(false)) .map(mapper) .output(new DiscardingOutputFormat<Integer>()); env.setParallelism(PARALLELISM); runAndCancelJob(env.createProgramPlan(), 5 * 1000, 10 * 1000); }
Example #9
Source File: UdfAnalyzerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testPutStaticException() { try { final UdfAnalyzer ua = new UdfAnalyzer(MapFunction.class, PutStaticMapper.class, "operator", BasicTypeInfo.STRING_TYPE_INFO, null, BasicTypeInfo.STRING_TYPE_INFO, null, null, true); ua.analyze(); Assert.fail(); } catch (CodeErrorException e) { // ok } }
Example #10
Source File: WindowTriangles.java From gelly-streaming with Apache License 2.0 | 5 votes |
@SuppressWarnings("serial") private static SimpleEdgeStream<Long, NullValue> getGraphStream(StreamExecutionEnvironment env) { if (fileOutput) { return new SimpleEdgeStream<>(env.readTextFile(edgeInputPath) .map(new MapFunction<String, Edge<Long, Long>>() { @Override public Edge<Long, Long> map(String s) { String[] fields = s.split("\\s"); long src = Long.parseLong(fields[0]); long trg = Long.parseLong(fields[1]); long timestamp = Long.parseLong(fields[2]); return new Edge<>(src, trg, timestamp); } }), new EdgeValueTimestampExtractor(), env).mapEdges(new RemoveEdgeValue()); } return new SimpleEdgeStream<>(env.generateSequence(1, 10).flatMap( new FlatMapFunction<Long, Edge<Long, Long>>() { @Override public void flatMap(Long key, Collector<Edge<Long, Long>> out) throws Exception { for (int i = 1; i < 3; i++) { long target = key + i; out.collect(new Edge<>(key, target, key*100 + (i-1)*50)); } } }), new EdgeValueTimestampExtractor(), env).mapEdges(new RemoveEdgeValue()); }
Example #11
Source File: TypeExtractorTest.java From flink with Apache License 2.0 | 5 votes |
@SuppressWarnings({ "unchecked", "rawtypes" }) @Test public void testInputMismatchWithRawFuntion() { MapFunction<?, ?> function = new MapWithResultTypeQueryable(); TypeInformation<?> ti = TypeExtractor.getMapReturnTypes((MapFunction)function, BasicTypeInfo.INT_TYPE_INFO); Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, ti); }
Example #12
Source File: SimpleEdgeStream.java From gelly-streaming with Apache License 2.0 | 5 votes |
/** * Apply a function to the attribute of each edge in the graph stream. * * @param mapper the map function to apply. * @return a new graph stream. */ public <NV> SimpleEdgeStream<K, NV> mapEdges(final MapFunction<Edge<K, EV>, NV> mapper) { TypeInformation<K> keyType = ((TupleTypeInfo<?>) edges.getType()).getTypeAt(0); DataStream<Edge<K, NV>> mappedEdges = edges.map(new ApplyMapperToEdgeWithType<>(mapper, keyType)); return new SimpleEdgeStream<>(mappedEdges, this.context); }
Example #13
Source File: GroupCombineITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testPartialReduceWithDifferentInputOutputType() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // data DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple2<Long, Tuple3<Integer, Long, String>>> dsWrapped = ds // wrap values as Kv pairs with the grouping key as key .map(new Tuple3KvWrapper()); List<Tuple2<Integer, Long>> result = dsWrapped .groupBy(0) // reduce partially .combineGroup(new Tuple3toTuple2GroupReduce()) .groupBy(0) // reduce fully to check result .reduceGroup(new Tuple2toTuple2GroupReduce()) //unwrap .map(new MapFunction<Tuple2<Long, Tuple2<Integer, Long>>, Tuple2<Integer, Long>>() { @Override public Tuple2<Integer, Long> map(Tuple2<Long, Tuple2<Integer, Long>> value) throws Exception { return value.f1; } }).collect(); String expected = "1,3\n" + "5,20\n" + "15,58\n" + "34,52\n" + "65,70\n" + "111,96\n"; compareResultAsTuples(result, expected); }
Example #14
Source File: ConsumePipelinedAndBlockingResultITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); DataSet<Tuple1<Long>> pipelinedSource = env.fromElements(new Tuple1<Long>(1L)); DataSet<Tuple1<Long>> slowBlockingSource = env.generateSequence(0, 10).map( new MapFunction<Long, Tuple1<Long>>() { @Override public Tuple1<Long> map(Long value) throws Exception { Thread.sleep(200); return new Tuple1<Long>(value); } } ); slowBlockingSource.join(slowBlockingSource) .where(0).equalTo(0).output(new DiscardingOutputFormat<Tuple2<Tuple1<Long>, Tuple1<Long>>>()); // Join the slow blocking and the pipelined source. This test should verify that this works // w/o problems and the blocking result is not requested too early. pipelinedSource.join(slowBlockingSource) .where(0).equalTo(0) .output(new DiscardingOutputFormat<Tuple2<Tuple1<Long>, Tuple1<Long>>>()); env.execute("Consume one pipelined and one blocking result test job"); }
Example #15
Source File: GroupCombineITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testPartialReduceWithIdenticalInputOutputType() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // data DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple2<Long, Tuple3<Integer, Long, String>>> dsWrapped = ds // wrap values as Kv pairs with the grouping key as key .map(new Tuple3KvWrapper()); List<Tuple3<Integer, Long, String>> result = dsWrapped .groupBy(0) // reduce partially .combineGroup(new Tuple3toTuple3GroupReduce()) .groupBy(0) // reduce fully to check result .reduceGroup(new Tuple3toTuple3GroupReduce()) //unwrap .map(new MapFunction<Tuple2<Long, Tuple3<Integer, Long, String>>, Tuple3<Integer, Long, String>>() { @Override public Tuple3<Integer, Long, String> map(Tuple2<Long, Tuple3<Integer, Long, String>> value) throws Exception { return value.f1; } }).collect(); String expected = "1,1,combined\n" + "5,4,combined\n" + "15,9,combined\n" + "34,16,combined\n" + "65,25,combined\n" + "111,36,combined\n"; compareResultAsTuples(result, expected); }
Example #16
Source File: VertexDegree.java From flink with Apache License 2.0 | 5 votes |
@Override public DataSet<Vertex<K, LongValue>> runInternal(Graph<K, VV, EV> input) throws Exception { MapFunction<Edge<K, EV>, Vertex<K, LongValue>> mapEdgeToId = reduceOnTargetId.get() ? new MapEdgeToTargetId<>() : new MapEdgeToSourceId<>(); // v DataSet<Vertex<K, LongValue>> vertexIds = input .getEdges() .map(mapEdgeToId) .setParallelism(parallelism) .name("Edge to vertex ID"); // v, deg(v) DataSet<Vertex<K, LongValue>> degree = vertexIds .groupBy(0) .reduce(new DegreeCount<>()) .setCombineHint(CombineHint.HASH) .setParallelism(parallelism) .name("Degree count"); if (includeZeroDegreeVertices.get()) { degree = input .getVertices() .leftOuterJoin(degree) .where(0) .equalTo(0) .with(new JoinVertexWithVertexDegree<>()) .setParallelism(parallelism) .name("Zero degree vertices"); } return degree; }
Example #17
Source File: StreamingJobGraphGeneratorTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Verifies that the chain start/end is correctly set. */ @Test public void testChainStartEndSetting() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // fromElements -> CHAIN(Map -> Print) env.fromElements(1, 2, 3) .map(new MapFunction<Integer, Integer>() { @Override public Integer map(Integer value) throws Exception { return value; } }) .print(); JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph()); List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources(); JobVertex sourceVertex = verticesSorted.get(0); JobVertex mapPrintVertex = verticesSorted.get(1); assertEquals(ResultPartitionType.PIPELINED_BOUNDED, sourceVertex.getProducedDataSets().get(0).getResultType()); assertEquals(ResultPartitionType.PIPELINED_BOUNDED, mapPrintVertex.getInputs().get(0).getSource().getResultType()); StreamConfig sourceConfig = new StreamConfig(sourceVertex.getConfiguration()); StreamConfig mapConfig = new StreamConfig(mapPrintVertex.getConfiguration()); Map<Integer, StreamConfig> chainedConfigs = mapConfig.getTransitiveChainedTaskConfigs(getClass().getClassLoader()); StreamConfig printConfig = chainedConfigs.values().iterator().next(); assertTrue(sourceConfig.isChainStart()); assertTrue(sourceConfig.isChainEnd()); assertTrue(mapConfig.isChainStart()); assertFalse(mapConfig.isChainEnd()); assertFalse(printConfig.isChainStart()); assertTrue(printConfig.isChainEnd()); }
Example #18
Source File: TypeExtractorTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@SuppressWarnings({ "unchecked", "rawtypes" }) @Test public void testInputInference1() { EdgeMapper<String, Double> em = new EdgeMapper<String, Double>(); TypeInformation<?> ti = TypeExtractor.getMapReturnTypes((MapFunction) em, TypeInformation.of(new TypeHint<Tuple3<String, String, Double>>(){})); Assert.assertTrue(ti.isTupleType()); Assert.assertEquals(3, ti.getArity()); TupleTypeInfo<?> tti = (TupleTypeInfo<?>) ti; Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tti.getTypeAt(0)); Assert.assertEquals(BasicTypeInfo.STRING_TYPE_INFO, tti.getTypeAt(1)); Assert.assertEquals(BasicTypeInfo.DOUBLE_TYPE_INFO, tti.getTypeAt(2)); }
Example #19
Source File: ProjectOperator.java From flink with Apache License 2.0 | 5 votes |
@Override protected org.apache.flink.api.common.operators.base.MapOperatorBase<IN, OUT, MapFunction<IN, OUT>> translateToDataFlow(Operator<IN> input) { String name = getName() != null ? getName() : "Projection " + Arrays.toString(fields); // create operator PlanProjectOperator<IN, OUT> ppo = new PlanProjectOperator<IN, OUT>(fields, name, getInputType(), getResultType(), context.getConfig()); // set input ppo.setInput(input); // set parallelism ppo.setParallelism(this.getParallelism()); ppo.setSemanticProperties(SemanticPropUtil.createProjectionPropertiesSingle(fields, (CompositeType<?>) getInputType())); return ppo; }
Example #20
Source File: LambdaExtractionTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testMapLambda() { MapFunction<Tuple2<Tuple1<Integer>, Boolean>, Tuple2<Tuple1<Integer>, String>> f = (i) -> null; TypeInformation<?> ti = TypeExtractor.getMapReturnTypes(f, NESTED_TUPLE_BOOLEAN_TYPE, null, true); if (!(ti instanceof MissingTypeInfo)) { assertTrue(ti.isTupleType()); assertEquals(2, ti.getArity()); assertTrue(((TupleTypeInfo<?>) ti).getTypeAt(0).isTupleType()); assertEquals(((TupleTypeInfo<?>) ti).getTypeAt(1), BasicTypeInfo.STRING_TYPE_INFO); } }
Example #21
Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0 | 5 votes |
private JobGraph createJobGraphForManagedMemoryFractionTest( final List<ResourceSpec> resourceSpecs, @Nullable final List<Integer> managedMemoryWeights) throws Exception { final Method opMethod = getSetResourcesMethodAndSetAccessible(SingleOutputStreamOperator.class); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); final DataStream<Integer> source = env.addSource(new ParallelSourceFunction<Integer>() { @Override public void run(SourceContext<Integer> ctx) { } @Override public void cancel() { } }); opMethod.invoke(source, resourceSpecs.get(0)); // CHAIN(source -> map1) in default slot sharing group final DataStream<Integer> map1 = source.map((MapFunction<Integer, Integer>) value -> value); opMethod.invoke(map1, resourceSpecs.get(1)); // CHAIN(map2) in default slot sharing group final DataStream<Integer> map2 = map1.rebalance().map((MapFunction<Integer, Integer>) value -> value); opMethod.invoke(map2, resourceSpecs.get(2)); // CHAIN(map3) in test slot sharing group final DataStream<Integer> map3 = map2.rebalance().map(value -> value).slotSharingGroup("test"); opMethod.invoke(map3, resourceSpecs.get(3)); if (managedMemoryWeights != null) { source.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(0)); map1.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(1)); map2.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(2)); map3.getTransformation().setManagedMemoryWeight(managedMemoryWeights.get(3)); } return StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph()); }
Example #22
Source File: LambdaExtractionTest.java From flink with Apache License 2.0 | 5 votes |
@SuppressWarnings("rawtypes") @Test public void testLambdaTypeErasure() { MapFunction<Tuple1<Integer>, Tuple1> f = (i) -> null; TypeInformation<?> ti = TypeExtractor.getMapReturnTypes(f, new TypeHint<Tuple1<Integer>>(){}.getTypeInfo(), null, true); assertTrue(ti instanceof MissingTypeInfo); }
Example #23
Source File: CoGroupConnectedComponentsITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(verticesPath).fieldDelimiter(" ").types(Long.class).name("Vertices"); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class).name("Edges"); DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.map(new MapFunction<Tuple1<Long>, Tuple2<Long, Long>>() { @Override public Tuple2<Long, Long> map(Tuple1<Long> value) throws Exception { return new Tuple2<>(value.f0, value.f0); } }).name("Assign Vertex Ids"); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, MAX_ITERATIONS, 0); JoinOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset() .join(edges).where(0).equalTo(0) .with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() { @Override public Tuple2<Long, Long> join(Tuple2<Long, Long> first, Tuple2<Long, Long> second) throws Exception { return new Tuple2<>(second.f1, first.f1); } }) .name("Join Candidate Id With Neighbor"); CoGroupOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors .coGroup(iteration.getSolutionSet()).where(0).equalTo(0) .with(new MinIdAndUpdate()) .name("min Id and Update"); iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(resultPath, "\n", " ").name("Result"); env.execute("Workset Connected Components"); }
Example #24
Source File: DataStreamAllroundTestJobFactory.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
static <IN, OUT, STATE> ArtificialKeyedStateMapper<IN, OUT> createArtificialKeyedStateMapper( MapFunction<IN, OUT> mapFunction, JoinFunction<IN, STATE, STATE> inputAndOldStateToNewState, List<TypeSerializer<STATE>> stateSerializers, List<Class<STATE>> stateClasses) { List<ArtificialStateBuilder<IN>> artificialStateBuilders = new ArrayList<>(stateSerializers.size()); for (TypeSerializer<STATE> typeSerializer : stateSerializers) { artificialStateBuilders.add(createValueStateBuilder( inputAndOldStateToNewState, new ValueStateDescriptor<>( "valueState-" + typeSerializer.getClass().getSimpleName(), typeSerializer))); artificialStateBuilders.add(createListStateBuilder( inputAndOldStateToNewState, new ListStateDescriptor<>( "listState-" + typeSerializer.getClass().getSimpleName(), typeSerializer))); } for (Class<STATE> stateClass : stateClasses) { artificialStateBuilders.add(createValueStateBuilder( inputAndOldStateToNewState, new ValueStateDescriptor<>( "valueState-" + stateClass.getSimpleName(), stateClass))); artificialStateBuilders.add(createListStateBuilder( inputAndOldStateToNewState, new ListStateDescriptor<>( "listState-" + stateClass.getSimpleName(), stateClass))); } return new ArtificialKeyedStateMapper<>(mapFunction, artificialStateBuilders); }
Example #25
Source File: GroupCombineITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testPartialReduceWithDifferentInputOutputType() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // data DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple2<Long, Tuple3<Integer, Long, String>>> dsWrapped = ds // wrap values as Kv pairs with the grouping key as key .map(new Tuple3KvWrapper()); List<Tuple2<Integer, Long>> result = dsWrapped .groupBy(0) // reduce partially .combineGroup(new Tuple3toTuple2GroupReduce()) .groupBy(0) // reduce fully to check result .reduceGroup(new Tuple2toTuple2GroupReduce()) //unwrap .map(new MapFunction<Tuple2<Long, Tuple2<Integer, Long>>, Tuple2<Integer, Long>>() { @Override public Tuple2<Integer, Long> map(Tuple2<Long, Tuple2<Integer, Long>> value) throws Exception { return value.f1; } }).collect(); String expected = "1,3\n" + "5,20\n" + "15,58\n" + "34,52\n" + "65,70\n" + "111,96\n"; compareResultAsTuples(result, expected); }
Example #26
Source File: SequenceStreamingFileSinkITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testWriteSequenceFile() throws Exception { final File folder = TEMPORARY_FOLDER.newFolder(); final Path testPath = Path.fromLocalFile(folder); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); env.enableCheckpointing(100); DataStream<Tuple2<Long, String>> stream = env.addSource( new FiniteTestSource<>(testData), TypeInformation.of(new TypeHint<Tuple2<Long, String>>() { }) ); stream.map(new MapFunction<Tuple2<Long, String>, Tuple2<LongWritable, Text>>() { @Override public Tuple2<LongWritable, Text> map(Tuple2<Long, String> value) throws Exception { return new Tuple2<>(new LongWritable(value.f0), new Text(value.f1)); } }).addSink( StreamingFileSink.forBulkFormat( testPath, new SequenceFileWriterFactory<>(configuration, LongWritable.class, Text.class, "BZip2") ).build()); env.execute(); validateResults(folder, testData); }
Example #27
Source File: Main4.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(2); DataStreamSource<String> data = env.socketTextStream("localhost", 9001); data.map(new MapFunction<String, Tuple2<String, Long>>() { @Override public Tuple2<String, Long> map(String s) throws Exception { String[] split = s.split(","); return new Tuple2<>(split[0], Long.valueOf(split[1])); } }).assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<Tuple2<String, Long>>() { private long currentTimestamp; @Nullable @Override public Watermark getCurrentWatermark() { return new Watermark(currentTimestamp); } @Override public long extractTimestamp(Tuple2<String, Long> tuple2, long l) { long timestamp = tuple2.f1; currentTimestamp = Math.max(timestamp, currentTimestamp); return timestamp; } }).keyBy(0) .window(EventTimeSessionWindows.withGap(Time.minutes(5))) .sum(1) .print("session "); System.out.println(env.getExecutionPlan()); env.execute(); }
Example #28
Source File: Main2.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); //并行度设置为 1 env.setParallelism(1); // env.setParallelism(4); SingleOutputStreamOperator<Word> data = env.socketTextStream("localhost", 9001) .map(new MapFunction<String, Word>() { @Override public Word map(String value) throws Exception { String[] split = value.split(","); return new Word(split[0], Integer.valueOf(split[1]), Long.valueOf(split[2])); } }); //BoundedOutOfOrdernessTimestampExtractor data.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<Word>(Time.seconds(10)) { @Override public long extractTimestamp(Word element) { return element.getTimestamp(); } }); data.print(); env.execute("watermark demo"); }
Example #29
Source File: Main4.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); //并行度设置为 1 env.setParallelism(1); // env.setParallelism(4); OutputTag<Word> lateDataTag = new OutputTag<Word>("late") { }; SingleOutputStreamOperator<Word> data = env.socketTextStream("localhost", 9001) .map(new MapFunction<String, Word>() { @Override public Word map(String value) throws Exception { String[] split = value.split(","); return new Word(split[0], Integer.valueOf(split[1]), Long.valueOf(split[2])); } }).assignTimestampsAndWatermarks(new WordPeriodicWatermark()); SingleOutputStreamOperator<Word> sum = data.keyBy(0) .timeWindow(Time.seconds(10)) // .allowedLateness(Time.milliseconds(2)) .sideOutputLateData(lateDataTag) .sum(1); sum.print(); sum.getSideOutput(lateDataTag) .print(); env.execute("watermark demo"); }
Example #30
Source File: ConnectedComponents.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private static DataSet<Long> getVertexDataSet(ExecutionEnvironment env, ParameterTool params) { if (params.has("vertices")) { return env.readCsvFile(params.get("vertices")).types(Long.class).map( new MapFunction<Tuple1<Long>, Long>() { public Long map(Tuple1<Long> value) { return value.f0; } }); } else { System.out.println("Executing Connected Components example with default vertices data set."); System.out.println("Use --vertices to specify file input."); return ConnectedComponentsData.getDefaultVertexDataSet(env); } }