org.apache.flink.api.common.io.ReplicatingInputFormat Java Examples
The following examples show how to use
org.apache.flink.api.common.io.ReplicatingInputFormat.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Tests compiler fail for join program with replicated data source behind reduce. */ @Test(expected = CompilerException.class) public void checkJoinWithReplicatedSourceInputBehindReduce() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .reduce(new LastReduce()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); }
Example #2
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 6 votes |
/** * Tests compiler fail for join program with replicated data source and changing parallelism. */ @Test(expected = CompilerException.class) public void checkJoinWithReplicatedSourceInputChangingparallelism() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .join(source2).where("*").equalTo("*").setParallelism(DEFAULT_PARALLELISM+2) .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); }
Example #3
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 6 votes |
/** * Tests compiler fail for join program with replicated data source behind map and changing parallelism. */ @Test(expected = CompilerException.class) public void checkJoinWithReplicatedSourceInputBehindMapChangingparallelism() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .map(new IdMap()).setParallelism(DEFAULT_PARALLELISM+1) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); }
Example #4
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 6 votes |
/** * Tests compiler fail for join program with replicated data source behind reduce. */ @Test(expected = CompilerException.class) public void checkJoinWithReplicatedSourceInputBehindReduce() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .reduce(new LastReduce()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); }
Example #5
Source File: ReplicatingDataSourceITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testReplicatedSourceToJoin() throws Exception { /* * Test replicated source going into join */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple1<Long>> source1 = env.createInput(new ReplicatingInputFormat<Long, GenericInputSplit> (new ParallelIteratorInputFormat<Long>(new NumberSequenceIterator(0L, 1000L))), BasicTypeInfo.LONG_TYPE_INFO) .map(new ToTuple()); DataSet<Tuple1<Long>> source2 = env.generateSequence(0L, 1000L).map(new ToTuple()); DataSet<Tuple> pairs = source1.join(source2).where(0).equalTo(0) .projectFirst(0) .sum(0); List<Tuple> result = pairs.collect(); String expectedResult = "(500500)"; compareResultAsText(result, expectedResult); }
Example #6
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 6 votes |
/** * Tests compiler fail for join program with replicated data source behind rebalance. */ @Test(expected = CompilerException.class) public void checkJoinWithReplicatedSourceInputBehindRebalance() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .rebalance() .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); }
Example #7
Source File: ReplicatingDataSourceITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testReplicatedSourceToJoin() throws Exception { /* * Test replicated source going into join */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple1<Long>> source1 = env.createInput(new ReplicatingInputFormat<Long, GenericInputSplit> (new ParallelIteratorInputFormat<Long>(new NumberSequenceIterator(0L, 1000L))), BasicTypeInfo.LONG_TYPE_INFO) .map(new ToTuple()); DataSet<Tuple1<Long>> source2 = env.generateSequence(0L, 1000L).map(new ToTuple()); DataSet<Tuple> pairs = source1.join(source2).where(0).equalTo(0) .projectFirst(0) .sum(0); List<Tuple> result = pairs.collect(); String expectedResult = "(500500)"; compareResultAsText(result, expectedResult); }
Example #8
Source File: ReplicatingDataSourceITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testReplicatedSourceToJoin() throws Exception { /* * Test replicated source going into join */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple1<Long>> source1 = env.createInput(new ReplicatingInputFormat<Long, GenericInputSplit> (new ParallelIteratorInputFormat<Long>(new NumberSequenceIterator(0L, 1000L))), BasicTypeInfo.LONG_TYPE_INFO) .map(new ToTuple()); DataSet<Tuple1<Long>> source2 = env.generateSequence(0L, 1000L).map(new ToTuple()); DataSet<Tuple> pairs = source1.join(source2).where(0).equalTo(0) .projectFirst(0) .sum(0); List<Tuple> result = pairs.collect(); String expectedResult = "(500500)"; compareResultAsText(result, expectedResult); }
Example #9
Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Tests compiler fail for join program with replicated data source behind rebalance. */ @Test(expected = CompilerException.class) public void checkJoinWithReplicatedSourceInputBehindRebalance() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .rebalance() .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); }
Example #10
Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Tests compiler fail for join program with replicated data source behind map and changing parallelism. */ @Test(expected = CompilerException.class) public void checkJoinWithReplicatedSourceInputBehindMapChangingparallelism() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .map(new IdMap()).setParallelism(DEFAULT_PARALLELISM+1) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); }
Example #11
Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Tests compiler fail for join program with replicated data source and changing parallelism. */ @Test(expected = CompilerException.class) public void checkJoinWithReplicatedSourceInputChangingparallelism() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .join(source2).where("*").equalTo("*").setParallelism(DEFAULT_PARALLELISM+2) .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); }
Example #12
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 6 votes |
/** * Tests compiler fail for join program with replicated data source and changing parallelism. */ @Test(expected = CompilerException.class) public void checkJoinWithReplicatedSourceInputChangingparallelism() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .join(source2).where("*").equalTo("*").setParallelism(DEFAULT_PARALLELISM+2) .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); }
Example #13
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 6 votes |
/** * Tests compiler fail for join program with replicated data source behind map and changing parallelism. */ @Test(expected = CompilerException.class) public void checkJoinWithReplicatedSourceInputBehindMapChangingparallelism() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .map(new IdMap()).setParallelism(DEFAULT_PARALLELISM+1) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); }
Example #14
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 6 votes |
/** * Tests compiler fail for join program with replicated data source behind reduce. */ @Test(expected = CompilerException.class) public void checkJoinWithReplicatedSourceInputBehindReduce() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .reduce(new LastReduce()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); }
Example #15
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 6 votes |
/** * Tests compiler fail for join program with replicated data source behind rebalance. */ @Test(expected = CompilerException.class) public void checkJoinWithReplicatedSourceInputBehindRebalance() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .rebalance() .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); }
Example #16
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind flatMap. */ @Test public void checkJoinWithReplicatedSourceInputBehindFlatMap() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .flatMap(new IdFlatMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example #17
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind multiple map ops. */ @Test public void checkJoinWithReplicatedSourceInputBehindMultiMaps() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .filter(new NoFilter()) .mapPartition(new IdPMap()) .flatMap(new IdFlatMap()) .map(new IdMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example #18
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind map partition. */ @Test public void checkJoinWithReplicatedSourceInputBehindMapPartition() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .mapPartition(new IdPMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example #19
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind multiple map ops. */ @Test public void checkJoinWithReplicatedSourceInputBehindMultiMaps() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .filter(new NoFilter()) .mapPartition(new IdPMap()) .flatMap(new IdFlatMap()) .map(new IdMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example #20
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind filter. */ @Test public void checkJoinWithReplicatedSourceInputBehindFilter() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .filter(new NoFilter()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example #21
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind map. */ @Test public void checkJoinWithReplicatedSourceInputBehindMap() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .map(new IdMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example #22
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source. */ @Test public void checkJoinWithReplicatedSourceInput() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example #23
Source File: DataSourceNode.java From flink with Apache License 2.0 | 5 votes |
/** * Creates a new DataSourceNode for the given contract. * * @param pactContract * The data source contract object. */ public DataSourceNode(GenericDataSourceBase<?, ?> pactContract) { super(pactContract); if (pactContract.getUserCodeWrapper().getUserCodeClass() == null) { throw new IllegalArgumentException("Input format has not been set."); } if (NonParallelInput.class.isAssignableFrom(pactContract.getUserCodeWrapper().getUserCodeClass())) { setParallelism(1); this.sequentialInput = true; } else { this.sequentialInput = false; } this.replicatedInput = ReplicatingInputFormat.class.isAssignableFrom( pactContract.getUserCodeWrapper().getUserCodeClass()); this.gprops = new GlobalProperties(); this.lprops = new LocalProperties(); SplitDataProperties<?> splitProps = pactContract.getSplitDataProperties(); if(replicatedInput) { this.gprops.setFullyReplicated(); this.lprops = new LocalProperties(); } else if (splitProps != null) { // configure data properties of data source using split properties setDataPropertiesFromSplitProperties(splitProps); } }
Example #24
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests cross program with replicated data source. */ @Test public void checkCrossWithReplicatedSourceInput() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .cross(source2) .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when cross should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode crossNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType crossIn1 = crossNode.getInput1().getShipStrategy(); ShipStrategyType crossIn2 = crossNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn2); }
Example #25
Source File: ReplicatingDataSourceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests cross program with replicated data source behind map and filter. */ @Test public void checkCrossWithReplicatedSourceInputBehindMap() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .map(new IdMap()) .filter(new NoFilter()) .cross(source2) .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when cross should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode crossNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType crossIn1 = crossNode.getInput1().getShipStrategy(); ShipStrategyType crossIn2 = crossNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn2); }
Example #26
Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests cross program with replicated data source behind map and filter. */ @Test public void checkCrossWithReplicatedSourceInputBehindMap() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .map(new IdMap()) .filter(new NoFilter()) .cross(source2) .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when cross should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode crossNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType crossIn1 = crossNode.getInput1().getShipStrategy(); ShipStrategyType crossIn2 = crossNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn2); }
Example #27
Source File: DataSourceNode.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Creates a new DataSourceNode for the given contract. * * @param pactContract * The data source contract object. */ public DataSourceNode(GenericDataSourceBase<?, ?> pactContract) { super(pactContract); if (pactContract.getUserCodeWrapper().getUserCodeClass() == null) { throw new IllegalArgumentException("Input format has not been set."); } if (NonParallelInput.class.isAssignableFrom(pactContract.getUserCodeWrapper().getUserCodeClass())) { setParallelism(1); this.sequentialInput = true; } else { this.sequentialInput = false; } this.replicatedInput = ReplicatingInputFormat.class.isAssignableFrom( pactContract.getUserCodeWrapper().getUserCodeClass()); this.gprops = new GlobalProperties(); this.lprops = new LocalProperties(); SplitDataProperties<?> splitProps = pactContract.getSplitDataProperties(); if(replicatedInput) { this.gprops.setFullyReplicated(); this.lprops = new LocalProperties(); } else if (splitProps != null) { // configure data properties of data source using split properties setDataPropertiesFromSplitProperties(splitProps); } }
Example #28
Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source. */ @Test public void checkJoinWithReplicatedSourceInput() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example #29
Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind map. */ @Test public void checkJoinWithReplicatedSourceInputBehindMap() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .map(new IdMap()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }
Example #30
Source File: ReplicatingDataSourceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests join program with replicated data source behind filter. */ @Test public void checkJoinWithReplicatedSourceInputBehindFilter() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .filter(new NoFilter()) .join(source2).where("*").equalTo("*") .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); // check the optimized Plan // when join should have forward strategy on both sides SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next(); DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor(); ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy(); ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy(); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1); Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2); }