Java Code Examples for org.apache.flink.api.java.ExecutionEnvironment#fromElements()
The following examples show how to use
org.apache.flink.api.java.ExecutionEnvironment#fromElements() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IterationWithAllReducerITCase.java From flink with Apache License 2.0 | 6 votes |
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); DataSet<String> initialInput = env.fromElements("1", "1", "1", "1", "1", "1", "1", "1"); IterativeDataSet<String> iteration = initialInput.iterate(5).name("Loop"); DataSet<String> sumReduce = iteration.reduce(new ReduceFunction<String>(){ @Override public String reduce(String value1, String value2) throws Exception { return value1; } }).name("Compute sum (Reduce)"); List<String> result = iteration.closeWith(sumReduce).collect(); compareResultAsText(result, EXPECTED); }
Example 2
Source File: SemanticPropertiesTranslationTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testUnaryFunctionMovingForwardedAnnotation() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(3L, 2L, 1L)); input.map(new ShufflingMapper<Long>()).output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput(); SingleInputSemanticProperties semantics = mapper.getSemanticProperties(); FieldSet fw1 = semantics.getForwardingTargetFields(0, 0); FieldSet fw2 = semantics.getForwardingTargetFields(0, 1); FieldSet fw3 = semantics.getForwardingTargetFields(0, 2); assertNotNull(fw1); assertNotNull(fw2); assertNotNull(fw3); assertTrue(fw1.contains(2)); assertTrue(fw2.contains(0)); assertTrue(fw3.contains(1)); }
Example 3
Source File: ConnectedComponentsWithRandomisedEdgesITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Long> vertexIds = env.generateSequence(1, NUM_VERTICES); DataSet<String> edgeString = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n")); DataSet<Edge<Long, NullValue>> edges = edgeString.map(new EdgeParser()); DataSet<Vertex<Long, Long>> initialVertices = vertexIds.map(new IdAssigner()); Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env); DataSet<Vertex<Long, Long>> result = graph.run(new ConnectedComponents<>(100)); result.writeAsCsv(resultPath, "\n", " "); env.execute(); }
Example 4
Source File: ConnectedComponentsWithRandomisedEdgesITCase.java From flink with Apache License 2.0 | 6 votes |
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Long> vertexIds = env.generateSequence(1, NUM_VERTICES); DataSet<String> edgeString = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n")); DataSet<Edge<Long, NullValue>> edges = edgeString.map(new EdgeParser()); DataSet<Vertex<Long, Long>> initialVertices = vertexIds.map(new IdAssigner()); Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env); DataSet<Vertex<Long, Long>> result = graph.run(new ConnectedComponents<>(100)); result.writeAsCsv(resultPath, "\n", " "); env.execute(); }
Example 5
Source File: Java8WordCount.java From flink-examples with MIT License | 6 votes |
public static void main(String[] args) throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSource<String> lines = env.fromElements( "Apache Flink is a community-driven open source framework for distributed big data analytics,", "like Hadoop and Spark. The core of Apache Flink is a distributed streaming dataflow engine written", " in Java and Scala.[1][2] It aims to bridge the gap between MapReduce-like systems and shared-nothing", "parallel database systems. Therefore, Flink executes arbitrary dataflow programs in a data-parallel and", "pipelined manner.[3] Flink's pipelined runtime system enables the execution of bulk/batch and stream", "processing programs.[4][5] Furthermore, Flink's runtime supports the execution of iterative algorithms natively.[6]" ); lines.flatMap((line, out) -> { String[] words = line.split("\\W+"); for (String word : words) { out.collect(new Tuple2<>(word, 1)); } }) .returns(new TupleTypeInfo(TypeInformation.of(String.class), TypeInformation.of(Integer.class))) .groupBy(0) .sum(1) .print(); }
Example 6
Source File: SemanticPropertiesTranslationTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testUnaryFunctionAllForwardedExceptAnnotation() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(3L, 2L, 1L)); input.map(new AllForwardedExceptMapper<Tuple3<Long, Long, Long>>()).output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput(); SingleInputSemanticProperties semantics = mapper.getSemanticProperties(); FieldSet fw1 = semantics.getForwardingTargetFields(0, 0); FieldSet fw2 = semantics.getForwardingTargetFields(0, 2); assertNotNull(fw1); assertNotNull(fw2); assertTrue(fw1.contains(0)); assertTrue(fw2.contains(2)); }
Example 7
Source File: IterationWithAllReducerITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); DataSet<String> initialInput = env.fromElements("1", "1", "1", "1", "1", "1", "1", "1"); IterativeDataSet<String> iteration = initialInput.iterate(5).name("Loop"); DataSet<String> sumReduce = iteration.reduce(new ReduceFunction<String>(){ @Override public String reduce(String value1, String value2) throws Exception { return value1; } }).name("Compute sum (Reduce)"); List<String> result = iteration.closeWith(sumReduce).collect(); compareResultAsText(result, EXPECTED); }
Example 8
Source File: GraphOperationsITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testNumberOfEdges() throws Exception { /* * Test numberOfEdges() */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env), TestGraphUtils.getLongLongEdgeData(env), env); DataSet<Long> data = env.fromElements(graph.numberOfEdges()); List<Long> result = data.collect(); expectedResult = "7"; compareResultAsText(result, expectedResult); }
Example 9
Source File: BroadcastVariablePipelinebreakerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testNoBreakerForIndependentVariable() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> source1 = env.fromElements("test"); DataSet<String> source2 = env.fromElements("test"); source1.map(new IdentityMapper<String>()).withBroadcastSet(source2, "some name") .output(new DiscardingOutputFormat<String>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource(); assertEquals(TempMode.NONE, mapper.getInput().getTempMode()); assertEquals(TempMode.NONE, mapper.getBroadcastInputs().get(0).getTempMode()); assertEquals(DataExchangeMode.PIPELINED, mapper.getInput().getDataExchangeMode()); assertEquals(DataExchangeMode.PIPELINED, mapper.getBroadcastInputs().get(0).getDataExchangeMode()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 10
Source File: GroupReduceITCase.java From flink with Apache License 2.0 | 5 votes |
/** * Fix for FLINK-2019. * * @throws Exception */ @Test public void testJodatimeDateTimeWithKryo() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Integer, DateTime>> ds = env.fromElements(new Tuple2<>(1, DateTime.now())); DataSet<Tuple2<Integer, DateTime>> reduceDs = ds.groupBy("f1").sum(0).project(0); List<Tuple2<Integer, DateTime>> result = reduceDs.collect(); String expected = "1\n"; compareResultAsTuples(result, expected); }
Example 11
Source File: StaticData.java From flink with Apache License 2.0 | 5 votes |
public static DataSet<String> getDefaultTextLineDataSet(ExecutionEnvironment env) { return env.fromElements( "To be, or not to be,--that is the question:--", "Whether 'tis nobler in the mind to suffer", "The slings and arrows of outrageous fortune", "Or to take arms against a sea of troubles," ); }
Example 12
Source File: ValueStateTransformationTest.java From bravo with Apache License 2.0 | 5 votes |
private Path transformLastSavepoint() throws IOException, Exception { ExecutionEnvironment environment = ExecutionEnvironment.createLocalEnvironment(); Savepoint savepoint = getLastSavepoint(); OperatorStateReader reader = new OperatorStateReader(environment, savepoint, "hello"); DataSet<Tuple2<Integer, Integer>> countState = reader.readKeyedStates( KeyedStateReader.forValueStateKVPairs("Count", new TypeHint<Tuple2<Integer, Integer>>() {})); DataSet<Tuple2<Integer, Integer>> newCountsToAdd = environment .fromElements(Tuple2.of(0, 100), Tuple2.of(3, 1000), Tuple2.of(1, 100), Tuple2.of(2, 1000)); DataSet<Tuple2<Integer, Integer>> newStates = countState.join(newCountsToAdd).where(0).equalTo(0) .map(new SumValues()); Path newCheckpointBasePath = new Path(getCheckpointDir(), "new"); OperatorStateWriter operatorStateWriter = new OperatorStateWriter(savepoint, "hello", newCheckpointBasePath); operatorStateWriter.addValueState("Count", countState.map(t -> Tuple2.of(t.f0, t.f1 * 2)).returns(new TypeHint<Tuple2<Integer, Integer>>() {})); operatorStateWriter.createNewValueState("Count2", newStates, IntSerializer.INSTANCE); operatorStateWriter.addKeyedStateRows(reader.getAllUnreadKeyedStateRows()); OperatorState newOpState = operatorStateWriter.writeAll(); Savepoint newSavepoint = StateMetadataUtils.createNewSavepoint(savepoint, newOpState); StateMetadataUtils.writeSavepointMetadata(newCheckpointBasePath, newSavepoint); return newCheckpointBasePath; }
Example 13
Source File: ConsumePipelinedAndBlockingResultITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); DataSet<Tuple1<Long>> pipelinedSource = env.fromElements(new Tuple1<Long>(1L)); DataSet<Tuple1<Long>> slowBlockingSource = env.generateSequence(0, 10).map( new MapFunction<Long, Tuple1<Long>>() { @Override public Tuple1<Long> map(Long value) throws Exception { Thread.sleep(200); return new Tuple1<Long>(value); } } ); slowBlockingSource.join(slowBlockingSource) .where(0).equalTo(0).output(new DiscardingOutputFormat<Tuple2<Tuple1<Long>, Tuple1<Long>>>()); // Join the slow blocking and the pipelined source. This test should verify that this works // w/o problems and the blocking result is not requested too early. pipelinedSource.join(slowBlockingSource) .where(0).equalTo(0) .output(new DiscardingOutputFormat<Tuple2<Tuple1<Long>, Tuple1<Long>>>()); env.execute("Consume one pipelined and one blocking result test job"); }
Example 14
Source File: AggregateTranslationTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void translateAggregate() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); @SuppressWarnings("unchecked") DataSet<Tuple3<Double, StringValue, Long>> initialData = env.fromElements(new Tuple3<Double, StringValue, Long>(3.141592, new StringValue("foobar"), Long.valueOf(77))); initialData.groupBy(0).aggregate(Aggregations.MIN, 1).and(Aggregations.SUM, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, Long>>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); GroupReduceOperatorBase<?, ?, ?> reducer = (GroupReduceOperatorBase<?, ?, ?>) sink.getInput(); // check keys assertEquals(1, reducer.getKeyColumns(0).length); assertEquals(0, reducer.getKeyColumns(0)[0]); assertEquals(-1, reducer.getParallelism()); assertTrue(reducer.isCombinable()); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
Example 15
Source File: DataSetConversionUtilTest.java From Alink with Apache License 2.0 | 5 votes |
@Test public void testBasicConvert() throws Exception { ExecutionEnvironment env = MLEnvironmentFactory.getDefault().getExecutionEnvironment(); DataSet <Row> input = env.fromElements(Row.of("a")); Table table1 = DataSetConversionUtil.toTable(MLEnvironmentFactory.DEFAULT_ML_ENVIRONMENT_ID, input, new String[] {"word"}); Assert.assertEquals( new TableSchema(new String[] {"word"}, new TypeInformation[] {TypeInformation.of(String.class)}), table1.getSchema() ); List <Row> list = DataSetConversionUtil.fromTable(MLEnvironmentFactory.DEFAULT_ML_ENVIRONMENT_ID, table1).collect(); Assert.assertEquals(Collections.singletonList(Row.of("a")), list); }
Example 16
Source File: JoinOperatorTest.java From flink with Apache License 2.0 | 5 votes |
@Test(expected = InvalidProgramException.class) public void testJoinKeyInvalidAtomic2() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Integer> ds2 = env.fromElements(0, 0, 0); ds1.join(ds2).where(0).equalTo("*", "invalidKey"); }
Example 17
Source File: PartitionOperatorTest.java From flink with Apache License 2.0 | 5 votes |
@Test(expected = IllegalArgumentException.class) public void testRangePartitionWithEmptyIndicesKey() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); final DataSource<Tuple2<Tuple2<Integer, Integer>, Integer>> ds = env.fromElements( new Tuple2<>(new Tuple2<>(1, 1), 1), new Tuple2<>(new Tuple2<>(2, 2), 2), new Tuple2<>(new Tuple2<>(2, 2), 2) ); ds.partitionByRange(new int[]{}); }
Example 18
Source File: IterationsCompilerTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testTwoIterationsDirectlyChained() throws Exception { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(8); DataSet<Tuple2<Long, Long>> verticesWithInitialId = env.fromElements(new Tuple2<Long, Long>(1L, 2L)); DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L)); DataSet<Tuple2<Long, Long>> bulkResult = doBulkIteration(verticesWithInitialId, edges); DataSet<Tuple2<Long, Long>> depResult = doDeltaIteration(bulkResult, edges); depResult.output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); assertEquals(1, op.getDataSinks().size()); assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof WorksetIterationPlanNode); WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource(); BulkIterationPlanNode bipn = (BulkIterationPlanNode)wipn.getInput1().getSource(); // the hash partitioning has been pushed out of the delta iteration into the bulk iteration assertEquals(ShipStrategyType.FORWARD, wipn.getInput1().getShipStrategy()); // the input of the root step function is the last operator of the step function // since the work has been pushed out of the bulk iteration, it has to guarantee the hash partitioning for (Channel c : bipn.getRootOfStepFunction().getInputs()) { assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy()); } assertEquals(DataExchangeMode.BATCH, wipn.getInput1().getDataExchangeMode()); assertEquals(DataExchangeMode.BATCH, wipn.getInput2().getDataExchangeMode()); assertEquals(TempMode.NONE, wipn.getInput1().getTempMode()); assertEquals(TempMode.NONE, wipn.getInput2().getTempMode()); new JobGraphGenerator().compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 19
Source File: PregelCompilerTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@SuppressWarnings("serial") @Test public void testPregelCompilerWithBroadcastVariable() { final String broadcastSetName = "broadcast"; ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); // compose test program { DataSet<Long> bcVar = env.fromElements(1L); DataSet<Vertex<Long, Long>> initialVertices = env.fromElements( new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L)) .map(new Tuple2ToVertexMap<>()); DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L)) .map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() { public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) { return new Edge<>(edge.f0, edge.f1, NullValue.getInstance()); } }); Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env); VertexCentricConfiguration parameters = new VertexCentricConfiguration(); parameters.addBroadcastSet(broadcastSetName, bcVar); DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration( new CCCompute(), null, 100, parameters) .getVertices(); result.output(new DiscardingOutputFormat<>()); } Plan p = env.createProgramPlan("Pregel Connected Components"); OptimizedPlan op = compileNoStats(p); // check the sink SinkPlanNode sink = op.getDataSinks().iterator().next(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); assertEquals(DEFAULT_PARALLELISM, sink.getParallelism()); // check the iteration WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource(); assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism()); // check the solution set delta PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode(); assertTrue(ssDelta instanceof SingleInputPlanNode); SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource(); assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism()); assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy()); // check the computation coGroup DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource()); assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism()); assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy()); assertTrue(computationCoGroup.getInput2().getTempMode().isCached()); assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys()); // check that the initial partitioning is pushed out of the loop assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy()); assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys()); }
Example 20
Source File: HBaseWriteExample.java From flink with Apache License 2.0 | 4 votes |
private static DataSet<String> getDefaultTextLineDataSet(ExecutionEnvironment env) { return env.fromElements(WORDS); }