org.apache.flink.api.common.operators.GenericDataSourceBase Java Examples
The following examples show how to use
org.apache.flink.api.common.operators.GenericDataSourceBase.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DataSource.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
protected GenericDataSourceBase<OUT, ?> translateToDataFlow() { String name = this.name != null ? this.name : "at " + dataSourceLocationName + " (" + inputFormat.getClass().getName() + ")"; if (name.length() > 150) { name = name.substring(0, 150); } @SuppressWarnings({"unchecked", "rawtypes"}) GenericDataSourceBase<OUT, ?> source = new GenericDataSourceBase(this.inputFormat, new OperatorInformation<OUT>(getType()), name); source.setParallelism(parallelism); if (this.parameters != null) { source.getParameters().addAll(this.parameters); } if (this.splitDataProperties != null) { source.setSplitDataProperties(this.splitDataProperties); } return source; }
Example #2
Source File: DataSource.java From flink with Apache License 2.0 | 6 votes |
protected GenericDataSourceBase<OUT, ?> translateToDataFlow() { String name = this.name != null ? this.name : "at " + dataSourceLocationName + " (" + inputFormat.getClass().getName() + ")"; if (name.length() > 150) { name = name.substring(0, 150); } @SuppressWarnings({"unchecked", "rawtypes"}) GenericDataSourceBase<OUT, ?> source = new GenericDataSourceBase(this.inputFormat, new OperatorInformation<OUT>(getType()), name); source.setParallelism(parallelism); if (this.parameters != null) { source.getParameters().addAll(this.parameters); } if (this.splitDataProperties != null) { source.setSplitDataProperties(this.splitDataProperties); } return source; }
Example #3
Source File: DataSource.java From flink with Apache License 2.0 | 6 votes |
protected GenericDataSourceBase<OUT, ?> translateToDataFlow() { String name = this.name != null ? this.name : "at " + dataSourceLocationName + " (" + inputFormat.getClass().getName() + ")"; if (name.length() > 150) { name = name.substring(0, 150); } @SuppressWarnings({"unchecked", "rawtypes"}) GenericDataSourceBase<OUT, ?> source = new GenericDataSourceBase(this.inputFormat, new OperatorInformation<OUT>(getType()), name); source.setParallelism(parallelism); if (this.parameters != null) { source.getParameters().addAll(this.parameters); } if (this.splitDataProperties != null) { source.setSplitDataProperties(this.splitDataProperties); } return source; }
Example #4
Source File: AggregateTranslationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void translateAggregate() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); @SuppressWarnings("unchecked") DataSet<Tuple3<Double, StringValue, Long>> initialData = env.fromElements(new Tuple3<Double, StringValue, Long>(3.141592, new StringValue("foobar"), Long.valueOf(77))); initialData.groupBy(0).aggregate(Aggregations.MIN, 1).and(Aggregations.SUM, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, Long>>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); GroupReduceOperatorBase<?, ?, ?> reducer = (GroupReduceOperatorBase<?, ?, ?>) sink.getInput(); // check keys assertEquals(1, reducer.getKeyColumns(0).length); assertEquals(0, reducer.getKeyColumns(0)[0]); assertEquals(-1, reducer.getParallelism()); assertTrue(reducer.isCombinable()); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
Example #5
Source File: DistinctTranslationTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void translateDistinctPlain() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env); initialData.distinct().output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); // currently distinct is translated to a Reduce ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertArrayEquals(new int[] {0, 1, 2}, reducer.getKeyColumns(0)); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
Example #6
Source File: DataSourceNode.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Creates a new DataSourceNode for the given contract. * * @param pactContract * The data source contract object. */ public DataSourceNode(GenericDataSourceBase<?, ?> pactContract) { super(pactContract); if (pactContract.getUserCodeWrapper().getUserCodeClass() == null) { throw new IllegalArgumentException("Input format has not been set."); } if (NonParallelInput.class.isAssignableFrom(pactContract.getUserCodeWrapper().getUserCodeClass())) { setParallelism(1); this.sequentialInput = true; } else { this.sequentialInput = false; } this.replicatedInput = ReplicatingInputFormat.class.isAssignableFrom( pactContract.getUserCodeWrapper().getUserCodeClass()); this.gprops = new GlobalProperties(); this.lprops = new LocalProperties(); SplitDataProperties<?> splitProps = pactContract.getSplitDataProperties(); if(replicatedInput) { this.gprops.setFullyReplicated(); this.lprops = new LocalProperties(); } else if (splitProps != null) { // configure data properties of data source using split properties setDataPropertiesFromSplitProperties(splitProps); } }
Example #7
Source File: CompilerTestBase.java From flink with Apache License 2.0 | 5 votes |
@Override public boolean preVisit(Operator<?> visitable) { if(visitable instanceof GenericDataSourceBase) { sources.add((GenericDataSourceBase<?, ?>) visitable); } else if(visitable instanceof BulkIterationBase) { ((BulkIterationBase<?>) visitable).getNextPartialSolution().accept(this); } return true; }
Example #8
Source File: AggregateTranslationTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void translateAggregate() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); @SuppressWarnings("unchecked") DataSet<Tuple3<Double, StringValue, Long>> initialData = env.fromElements(new Tuple3<Double, StringValue, Long>(3.141592, new StringValue("foobar"), Long.valueOf(77))); initialData.groupBy(0).aggregate(Aggregations.MIN, 1).and(Aggregations.SUM, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, Long>>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); GroupReduceOperatorBase<?, ?, ?> reducer = (GroupReduceOperatorBase<?, ?, ?>) sink.getInput(); // check keys assertEquals(1, reducer.getKeyColumns(0).length); assertEquals(0, reducer.getKeyColumns(0)[0]); assertEquals(-1, reducer.getParallelism()); assertTrue(reducer.isCombinable()); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
Example #9
Source File: DistinctTranslationTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void translateDistinctPosition() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env); initialData.distinct(1, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); // currently distinct is translated to a Reduce ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertArrayEquals(new int[] {1, 2}, reducer.getKeyColumns(0)); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
Example #10
Source File: ReduceTranslationTests.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void translateNonGroupedReduce() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env); initialData.reduce(new RichReduceFunction<Tuple3<Double, StringValue, LongValue>>() { public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) { return value1; } }).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertTrue(reducer.getKeyColumns(0) == null || reducer.getKeyColumns(0).length == 0); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == ExecutionConfig.PARALLELISM_DEFAULT); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
Example #11
Source File: DistinctTranslationTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void translateDistinctExpressionKey() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<CustomType> initialData = getSourcePojoDataSet(env); initialData.distinct("myInt").output(new DiscardingOutputFormat<CustomType>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); // currently distinct is translated to a Reduce ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertArrayEquals(new int[] {0}, reducer.getKeyColumns(0)); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
Example #12
Source File: JavaApiPostPass.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") private static <T> TypeInformation<T> getTypeInfoFromSource(SourcePlanNode node) { Operator<?> op = node.getOptimizerNode().getOperator(); if (op instanceof GenericDataSourceBase) { return ((GenericDataSourceBase<T, ?>) op).getOperatorInfo().getOutputType(); } else { throw new RuntimeException("Wrong operator type found in post pass."); } }
Example #13
Source File: DistinctTranslationTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void translateDistinctPlain2() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<CustomType> initialData = getSourcePojoDataSet(env); initialData.distinct().output(new DiscardingOutputFormat<CustomType>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); // currently distinct is translated to a Reduce ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertArrayEquals(new int[] {0}, reducer.getKeyColumns(0)); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
Example #14
Source File: DistinctTranslationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void translateDistinctPosition() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env); initialData.distinct(1, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); // currently distinct is translated to a Reduce ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertArrayEquals(new int[] {1, 2}, reducer.getKeyColumns(0)); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
Example #15
Source File: DistinctTranslationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void translateDistinctPlain2() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<CustomType> initialData = getSourcePojoDataSet(env); initialData.distinct().output(new DiscardingOutputFormat<CustomType>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); // currently distinct is translated to a Reduce ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertArrayEquals(new int[] {0}, reducer.getKeyColumns(0)); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
Example #16
Source File: ReduceTranslationTests.java From flink with Apache License 2.0 | 5 votes |
@Test public void translateNonGroupedReduce() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env); initialData.reduce(new RichReduceFunction<Tuple3<Double, StringValue, LongValue>>() { public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) { return value1; } }).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertTrue(reducer.getKeyColumns(0) == null || reducer.getKeyColumns(0).length == 0); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == ExecutionConfig.PARALLELISM_DEFAULT); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
Example #17
Source File: DistinctTranslationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void translateDistinctPlain() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env); initialData.distinct().output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); // currently distinct is translated to a Reduce ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertArrayEquals(new int[] {0, 1, 2}, reducer.getKeyColumns(0)); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
Example #18
Source File: KMeansSingleStepTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testCompileKMeansSingleStepWithStats() { Plan p = getKMeansPlan(); p.setExecutionConfig(new ExecutionConfig()); // set the statistics OperatorResolver cr = getContractResolver(p); GenericDataSourceBase<?, ?> pointsSource = cr.getNode(DATAPOINTS); GenericDataSourceBase<?, ?> centersSource = cr.getNode(CENTERS); setSourceStatistics(pointsSource, 100L * 1024 * 1024 * 1024, 32f); setSourceStatistics(centersSource, 1024 * 1024, 32f); OptimizedPlan plan = compileWithStats(p); checkPlan(plan); }
Example #19
Source File: AggregateTranslationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void translateAggregate() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); @SuppressWarnings("unchecked") DataSet<Tuple3<Double, StringValue, Long>> initialData = env.fromElements(new Tuple3<Double, StringValue, Long>(3.141592, new StringValue("foobar"), Long.valueOf(77))); initialData.groupBy(0).aggregate(Aggregations.MIN, 1).and(Aggregations.SUM, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, Long>>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); GroupReduceOperatorBase<?, ?, ?> reducer = (GroupReduceOperatorBase<?, ?, ?>) sink.getInput(); // check keys assertEquals(1, reducer.getKeyColumns(0).length); assertEquals(0, reducer.getKeyColumns(0)[0]); assertEquals(-1, reducer.getParallelism()); assertTrue(reducer.isCombinable()); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
Example #20
Source File: DistinctTranslationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void translateDistinctPlain() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env); initialData.distinct().output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); // currently distinct is translated to a Reduce ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertArrayEquals(new int[] {0, 1, 2}, reducer.getKeyColumns(0)); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
Example #21
Source File: DistinctTranslationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void translateDistinctPlain2() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<CustomType> initialData = getSourcePojoDataSet(env); initialData.distinct().output(new DiscardingOutputFormat<CustomType>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); // currently distinct is translated to a Reduce ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertArrayEquals(new int[] {0}, reducer.getKeyColumns(0)); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
Example #22
Source File: DistinctTranslationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void translateDistinctPosition() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env); initialData.distinct(1, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); // currently distinct is translated to a Reduce ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertArrayEquals(new int[] {1, 2}, reducer.getKeyColumns(0)); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
Example #23
Source File: KMeansSingleStepTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testCompileKMeansSingleStepWithStats() { Plan p = getKMeansPlan(); p.setExecutionConfig(new ExecutionConfig()); // set the statistics OperatorResolver cr = getContractResolver(p); GenericDataSourceBase<?, ?> pointsSource = cr.getNode(DATAPOINTS); GenericDataSourceBase<?, ?> centersSource = cr.getNode(CENTERS); setSourceStatistics(pointsSource, 100L * 1024 * 1024 * 1024, 32f); setSourceStatistics(centersSource, 1024 * 1024, 32f); OptimizedPlan plan = compileWithStats(p); checkPlan(plan); }
Example #24
Source File: DistinctTranslationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void translateDistinctExpressionKey() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<CustomType> initialData = getSourcePojoDataSet(env); initialData.distinct("myInt").output(new DiscardingOutputFormat<CustomType>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); // currently distinct is translated to a Reduce ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertArrayEquals(new int[] {0}, reducer.getKeyColumns(0)); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
Example #25
Source File: JavaApiPostPass.java From flink with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") private static <T> TypeInformation<T> getTypeInfoFromSource(SourcePlanNode node) { Operator<?> op = node.getOptimizerNode().getOperator(); if (op instanceof GenericDataSourceBase) { return ((GenericDataSourceBase<T, ?>) op).getOperatorInfo().getOutputType(); } else { throw new RuntimeException("Wrong operator type found in post pass."); } }
Example #26
Source File: DataSourceNode.java From flink with Apache License 2.0 | 5 votes |
/** * Creates a new DataSourceNode for the given contract. * * @param pactContract * The data source contract object. */ public DataSourceNode(GenericDataSourceBase<?, ?> pactContract) { super(pactContract); if (pactContract.getUserCodeWrapper().getUserCodeClass() == null) { throw new IllegalArgumentException("Input format has not been set."); } if (NonParallelInput.class.isAssignableFrom(pactContract.getUserCodeWrapper().getUserCodeClass())) { setParallelism(1); this.sequentialInput = true; } else { this.sequentialInput = false; } this.replicatedInput = ReplicatingInputFormat.class.isAssignableFrom( pactContract.getUserCodeWrapper().getUserCodeClass()); this.gprops = new GlobalProperties(); this.lprops = new LocalProperties(); SplitDataProperties<?> splitProps = pactContract.getSplitDataProperties(); if(replicatedInput) { this.gprops.setFullyReplicated(); this.lprops = new LocalProperties(); } else if (splitProps != null) { // configure data properties of data source using split properties setDataPropertiesFromSplitProperties(splitProps); } }
Example #27
Source File: ReduceTranslationTests.java From flink with Apache License 2.0 | 5 votes |
@Test public void translateNonGroupedReduce() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env); initialData.reduce(new RichReduceFunction<Tuple3<Double, StringValue, LongValue>>() { public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) { return value1; } }).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertTrue(reducer.getKeyColumns(0) == null || reducer.getKeyColumns(0).length == 0); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == ExecutionConfig.PARALLELISM_DEFAULT); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
Example #28
Source File: CompilerTestBase.java From flink with Apache License 2.0 | 5 votes |
@Override public boolean preVisit(Operator<?> visitable) { if(visitable instanceof GenericDataSourceBase) { sources.add((GenericDataSourceBase<?, ?>) visitable); } else if(visitable instanceof BulkIterationBase) { ((BulkIterationBase<?>) visitable).getNextPartialSolution().accept(this); } return true; }
Example #29
Source File: CachedMatchStrategyCompilerTest.java From flink with Apache License 2.0 | 5 votes |
/** * This test simulates a join of a big left side with a small right side inside of an iteration, where the small side is on a static path. * Currently the best execution plan is a HYBRIDHASH_BUILD_SECOND_CACHED, where the small side is hashed and cached. * This test also makes sure that all relevant plans are correctly enumerated by the optimizer. */ @Test public void testCorrectChoosing() { try { Plan plan = getTestPlanRightStatic(""); SourceCollectorVisitor sourceCollector = new SourceCollectorVisitor(); plan.accept(sourceCollector); for(GenericDataSourceBase<?, ?> s : sourceCollector.getSources()) { if(s.getName().equals("bigFile")) { this.setSourceStatistics(s, 10000000, 1000); } else if(s.getName().equals("smallFile")) { this.setSourceStatistics(s, 100, 100); } } OptimizedPlan oPlan = compileNoStats(plan); OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan); DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner"); // verify correct join strategy assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND_CACHED, innerJoin.getDriverStrategy()); assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode()); assertEquals(TempMode.NONE, innerJoin.getInput2().getTempMode()); new JobGraphGenerator().compileJobGraph(oPlan); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test errored: " + e.getMessage()); } }
Example #30
Source File: KMeansSingleStepTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testCompileKMeansSingleStepWithStats() throws Exception { Plan p = getKMeansPlan(); p.setExecutionConfig(new ExecutionConfig()); // set the statistics OperatorResolver cr = getContractResolver(p); GenericDataSourceBase<?, ?> pointsSource = cr.getNode(DATAPOINTS); GenericDataSourceBase<?, ?> centersSource = cr.getNode(CENTERS); setSourceStatistics(pointsSource, 100L * 1024 * 1024 * 1024, 32f); setSourceStatistics(centersSource, 1024 * 1024, 32f); OptimizedPlan plan = compileWithStats(p); checkPlan(plan); }