org.apache.flink.api.java.operators.Operator Java Examples
The following examples show how to use
org.apache.flink.api.java.operators.Operator.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OperatorTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testConfigurationOfParallelism() { Operator operator = new MockOperator(); // verify explicit change in parallelism int parallelism = 36; operator.setParallelism(parallelism); assertEquals(parallelism, operator.getParallelism()); // verify that parallelism is reset to default flag value parallelism = ExecutionConfig.PARALLELISM_DEFAULT; operator.setParallelism(parallelism); assertEquals(parallelism, operator.getParallelism()); }
Example #2
Source File: OperatorTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testConfigurationOfParallelism() { Operator operator = new MockOperator(); // verify explicit change in parallelism int parallelism = 36; operator.setParallelism(parallelism); assertEquals(parallelism, operator.getParallelism()); // verify that parallelism is reset to default flag value parallelism = ExecutionConfig.PARALLELISM_DEFAULT; operator.setParallelism(parallelism); assertEquals(parallelism, operator.getParallelism()); }
Example #3
Source File: FlinkFlowStep.java From cascading-flink with Apache License 2.0 | 6 votes |
private DataSet<Tuple> translateMerge(List<DataSet<Tuple>> inputs, FlowNode node) { DataSet<Tuple> unioned = null; TypeInformation<Tuple> type = null; int maxDop = -1; for(DataSet<Tuple> input : inputs) { maxDop = Math.max(maxDop, ((Operator)input).getParallelism()); if(unioned == null) { unioned = input; type = input.getType(); } else { unioned = unioned.union(input); } } return unioned.map(new IdMapper()) .returns(type) .setParallelism(maxDop); }
Example #4
Source File: OperatorTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testConfigurationOfParallelism() { Operator operator = new MockOperator(); // verify explicit change in parallelism int parallelism = 36; operator.setParallelism(parallelism); assertEquals(parallelism, operator.getParallelism()); // verify that parallelism is reset to default flag value parallelism = ExecutionConfig.PARALLELISM_DEFAULT; operator.setParallelism(parallelism); assertEquals(parallelism, operator.getParallelism()); }
Example #5
Source File: OperatorTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testConfigurationOfResource() throws Exception{ Operator operator = new MockOperator(); Method opMethod = Operator.class.getDeclaredMethod("setResources", ResourceSpec.class, ResourceSpec.class); opMethod.setAccessible(true); // verify explicit change in resources ResourceSpec minResources = ResourceSpec.newBuilder().setCpuCores(1.0).setHeapMemoryInMB(100).build(); ResourceSpec preferredResources = ResourceSpec.newBuilder().setCpuCores(2.0).setHeapMemoryInMB(200).build(); opMethod.invoke(operator, minResources, preferredResources); assertEquals(minResources, operator.getMinResources()); assertEquals(preferredResources, operator.getPreferredResources()); }
Example #6
Source File: OperatorTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testConfigurationOfResource() throws Exception{ Operator operator = new MockOperator(); Method opMethod = Operator.class.getDeclaredMethod("setResources", ResourceSpec.class, ResourceSpec.class); opMethod.setAccessible(true); // verify explicit change in resources ResourceSpec minResources = ResourceSpec.newBuilder().setCpuCores(1.0).setHeapMemoryInMB(100).build(); ResourceSpec preferredResources = ResourceSpec.newBuilder().setCpuCores(2.0).setHeapMemoryInMB(200).build(); opMethod.invoke(operator, minResources, preferredResources); assertEquals(minResources, operator.getMinResources()); assertEquals(preferredResources, operator.getPreferredResources()); }
Example #7
Source File: FlinkFlowStep.java From cascading-flink with Apache License 2.0 | 5 votes |
private void translateSink(FlowProcess flowProcess, DataSet<Tuple> input, FlowNode node) { Tap tap = this.getSingle(node.getSinkTaps()); Configuration sinkConfig = this.getNodeConfig(node); tap.sinkConfInit(flowProcess, sinkConfig); int desiredDop = tap.getScheme().getNumSinkParts(); int inputDop = ((Operator)input).getParallelism(); int dop; if (inputDop == 1) { // input operators have dop 1. Probably because they perform a non-keyed reduce or coGroup dop = 1; } else { if (desiredDop > 0) { // output dop explicitly set. if (input instanceof GroupReduceOperator) { // input is a reduce and we must preserve its sorting. // we must set the desired dop also for reduce and related operators adjustDopOfReduceOrCoGroup((GroupReduceOperator) input, desiredDop); } dop = desiredDop; } else { dop = inputDop; } } input .output(new TapOutputFormat(node)) .name(tap.getIdentifier()) .setParallelism(dop) .withParameters(FlinkConfigConverter.toFlinkConfig(sinkConfig)); }
Example #8
Source File: FlinkFlowStep.java From cascading-flink with Apache License 2.0 | 5 votes |
private DataSet<Tuple> translateMap(DataSet<Tuple> input, FlowNode node) { Fields outFields = getOutScope(node).getOutValuesFields(); registerKryoTypes(outFields); int dop = ((Operator)input).getParallelism(); return input .mapPartition(new EachMapper(node)) .returns(new TupleTypeInfo(outFields)) .withParameters(this.getFlinkNodeConfig(node)) .setParallelism(dop) .name("map-" + node.getID()); }
Example #9
Source File: FlinkFlowStep.java From cascading-flink with Apache License 2.0 | 5 votes |
private DataSet<Tuple2<Tuple, Tuple[]>> prepareInnerCrossInput(List<DataSet<Tuple>> inputs, FlowNode node, Fields[] inputFields, int dop) { int numJoinInputs = inputs.size(); TypeInformation<Tuple2<Tuple, Tuple[]>> tupleJoinListsTypeInfo = new org.apache.flink.api.java.typeutils.TupleTypeInfo<>( new TupleTypeInfo(Fields.UNKNOWN), new TupleArrayTypeInfo(numJoinInputs, Arrays.copyOf(inputFields, 1)) ); int mapDop = ((Operator)inputs.get(0)).getParallelism(); // prepare tuple list for join DataSet<Tuple2<Tuple, Tuple[]>> tupleJoinLists = inputs.get(0) .map(new JoinPrepareMapper(numJoinInputs, null, null)) .returns(tupleJoinListsTypeInfo) .setParallelism(mapDop) .name("coGroup-" + node.getID()); for (int i = 1; i < inputs.size(); i++) { tupleJoinListsTypeInfo = new org.apache.flink.api.java.typeutils.TupleTypeInfo<>( new TupleTypeInfo(Fields.UNKNOWN), new TupleArrayTypeInfo(numJoinInputs, Arrays.copyOf(inputFields, i+1)) ); tupleJoinLists = tupleJoinLists.crossWithTiny(inputs.get(i)) .with(new TupleAppendCrosser(i)) .returns(tupleJoinListsTypeInfo) .setParallelism(dop) .name("coGroup-" + node.getID()); } return tupleJoinLists; }
Example #10
Source File: OperatorTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testConfigurationOfResource() throws Exception{ Operator operator = new MockOperator(); Method opMethod = Operator.class.getDeclaredMethod("setResources", ResourceSpec.class, ResourceSpec.class); opMethod.setAccessible(true); // verify explicit change in resources ResourceSpec minResources = ResourceSpec.newBuilder(1.0, 100).build(); ResourceSpec preferredResources = ResourceSpec.newBuilder(2.0, 200).build(); opMethod.invoke(operator, minResources, preferredResources); assertEquals(minResources, operator.getMinResources()); assertEquals(preferredResources, operator.getPreferredResources()); }
Example #11
Source File: BootstrapTransformationTest.java From flink with Apache License 2.0 | 4 votes |
private static <T> int getParallelism(DataSet<T> dataSet) { //All concrete implementations of DataSet are operators so this should always be safe. return ((Operator) dataSet).getParallelism(); }
Example #12
Source File: FlinkFlowStep.java From cascading-flink with Apache License 2.0 | 4 votes |
private DataSet<Tuple3<Tuple, Integer, Tuple>> prepareBufferCoGroupInput(List<DataSet<Tuple>> inputs, FlowNode node, Fields[] inputFields, Fields[] keyFields, String[][] flinkKeys, int dop) { DataSet<Tuple3<Tuple, Integer, Tuple>> coGroupInput = null; for(int i=0; i<inputs.size(); i++) { // get Flink DataSet DataSet<Tuple> input = inputs.get(i); // get keys int[] keyPos = inputFields[i].getPos(keyFields[i]); if(keyFields[i].isNone()) { // set default key keyFields[i] = new Fields("defaultKey"); } TupleTypeInfo keysTypeInfo = inputFields[i].isDefined() ? new TupleTypeInfo(inputFields[i].select(keyFields[i])) : new TupleTypeInfo(Fields.UNKNOWN); TypeInformation<Tuple3<Tuple, Integer, Tuple>> keyedType = new org.apache.flink.api.java.typeutils.TupleTypeInfo<>( keysTypeInfo, BasicTypeInfo.INT_TYPE_INFO, new TupleTypeInfo(inputFields[i]) ); int inputDop = ((Operator)input).getParallelism(); // add mapper DataSet<Tuple3<Tuple, Integer, Tuple>> keyedInput = input .map(new BufferJoinKeyExtractor(i, keyPos)) .returns(keyedType) .setParallelism(inputDop) .name("coGroup-" + node.getID()); // add to groupByInput if(coGroupInput == null) { coGroupInput = keyedInput; } else { coGroupInput = coGroupInput .union(keyedInput); } } return coGroupInput; }
Example #13
Source File: FlinkFlowStep.java From cascading-flink with Apache License 2.0 | 4 votes |
private DataSet<Tuple> translateInnerHashJoin(FlowNode node, List<DataSet<Tuple>> inputs, Fields[] inputFields, Fields[] keyFields, String[][] flinkKeys) { int numJoinInputs = inputs.size(); // get out fields of node Scope outScope = getOutScope(node); Fields outFields; if (outScope.isEvery()) { outFields = outScope.getOutGroupingFields(); } else { outFields = outScope.getOutValuesFields(); } registerKryoTypes(outFields); int probeSideDOP = ((Operator)inputs.get(0)).getParallelism(); if(numJoinInputs == 2) { // binary join return inputs.get(0).join(inputs.get(1), JoinHint.BROADCAST_HASH_SECOND) .where(flinkKeys[0]).equalTo(flinkKeys[1]) .with(new BinaryHashJoinJoiner(node, inputFields[0], keyFields[0])) .withParameters(this.getFlinkNodeConfig(node)) .setParallelism(probeSideDOP) .returns(new TupleTypeInfo(outFields)) .name("hashjoin-" + node.getID()); } else { // nary join TupleTypeInfo keysTypeInfo = inputFields[0].isDefined() ? new TupleTypeInfo(inputFields[0].select(keyFields[0])) : new TupleTypeInfo(Fields.UNKNOWN); keysTypeInfo.registerKeyFields(keyFields[0]); TypeInformation<Tuple2<Tuple, Tuple[]>> tupleJoinListsTypeInfo = new org.apache.flink.api.java.typeutils.TupleTypeInfo<>( keysTypeInfo, new TupleArrayTypeInfo(numJoinInputs-1, Arrays.copyOf(inputFields, 1)) ); int mapDop = ((Operator) inputs.get(0)).getParallelism(); // prepare tuple list for join DataSet<Tuple2<Tuple, Tuple[]>> tupleJoinLists = inputs.get(0) .map(new JoinPrepareMapper(numJoinInputs - 1, inputFields[0], keyFields[0])) .returns(tupleJoinListsTypeInfo) .setParallelism(mapDop) .name("hashjoin-" + node.getID()); for (int i = 0; i < flinkKeys[0].length; i++) { flinkKeys[0][i] = "f0." + i; } // join all inputs except last for (int i = 1; i < inputs.size()-1; i++) { tupleJoinListsTypeInfo = new org.apache.flink.api.java.typeutils.TupleTypeInfo<>( keysTypeInfo, new TupleArrayTypeInfo(numJoinInputs-1, Arrays.copyOf(inputFields, i+1)) ); tupleJoinLists = tupleJoinLists.join(inputs.get(i), JoinHint.BROADCAST_HASH_SECOND) .where(flinkKeys[0]).equalTo(flinkKeys[i]) .with(new TupleAppendJoiner(i)) .returns(tupleJoinListsTypeInfo) .withForwardedFieldsFirst(flinkKeys[0]) .setParallelism(probeSideDOP) .name("hashjoin-" + node.getID()); } // join last input return tupleJoinLists.join(inputs.get(numJoinInputs-1), JoinHint.BROADCAST_HASH_SECOND) .where(flinkKeys[0]).equalTo(flinkKeys[numJoinInputs-1]) .with(new NaryHashJoinJoiner(node, numJoinInputs)) .withParameters(this.getFlinkNodeConfig(node)) .setParallelism(probeSideDOP) .returns(new TupleTypeInfo(outFields)) .name("hashjoin-" + node.getID()); } }
Example #14
Source File: FlinkFlowStep.java From cascading-flink with Apache License 2.0 | 4 votes |
private DataSet<Tuple> translateLeftHashJoin(FlowNode node, List<DataSet<Tuple>> inputs, Fields[] inputFields, Fields[] keyFields, String[][] flinkKeys) { int numJoinInputs = inputs.size(); // get out fields of node Scope outScope = getOutScope(node); Fields outFields; if (outScope.isEvery()) { outFields = outScope.getOutGroupingFields(); } else { outFields = outScope.getOutValuesFields(); } registerKryoTypes(outFields); int probeSideDOP = ((Operator)inputs.get(0)).getParallelism(); if(numJoinInputs == 2) { // binary join return inputs.get(0) .leftOuterJoin(inputs.get(1), JoinHint.BROADCAST_HASH_SECOND) .where(flinkKeys[0]).equalTo(flinkKeys[1]) .with(new BinaryHashJoinJoiner(node, inputFields[0], keyFields[0])) .withParameters(this.getFlinkNodeConfig(node)) .setParallelism(probeSideDOP) .returns(new TupleTypeInfo(outFields)) .name("hashjoin-" + node.getID()); } else { // nary join TupleTypeInfo keysTypeInfo = inputFields[0].isDefined() ? new TupleTypeInfo(inputFields[0].select(keyFields[0])) : new TupleTypeInfo(Fields.UNKNOWN); keysTypeInfo.registerKeyFields(keyFields[0]); TypeInformation<Tuple2<Tuple, Tuple[]>> tupleJoinListsTypeInfo = new org.apache.flink.api.java.typeutils.TupleTypeInfo<>( keysTypeInfo, new TupleArrayTypeInfo(numJoinInputs-1, Arrays.copyOf(inputFields, 1)) ); // prepare tuple list for join DataSet<Tuple2<Tuple, Tuple[]>> tupleJoinLists = inputs.get(0) .map(new JoinPrepareMapper(numJoinInputs - 1, inputFields[0], keyFields[0])) .returns(tupleJoinListsTypeInfo) .setParallelism(probeSideDOP) .name("hashjoin-" + node.getID()); for (int i = 0; i < flinkKeys[0].length; i++) { flinkKeys[0][i] = "f0." + i; } // join all inputs except last for (int i = 1; i < inputs.size()-1; i++) { tupleJoinListsTypeInfo = new org.apache.flink.api.java.typeutils.TupleTypeInfo<>( keysTypeInfo, new TupleArrayTypeInfo(numJoinInputs-1, Arrays.copyOf(inputFields, i+1)) ); tupleJoinLists = tupleJoinLists .join(inputs.get(i), JoinHint.BROADCAST_HASH_SECOND) .where(flinkKeys[0]).equalTo(flinkKeys[i]) .with(new TupleAppendJoiner(i)) .returns(tupleJoinListsTypeInfo) .withForwardedFieldsFirst(flinkKeys[0]) .setParallelism(probeSideDOP) .name("hashjoin-" + node.getID()); } // join last input return tupleJoinLists .leftOuterJoin(inputs.get(numJoinInputs-1), JoinHint.BROADCAST_HASH_SECOND) .where(flinkKeys[0]).equalTo(flinkKeys[numJoinInputs-1]) .with(new NaryHashJoinJoiner(node, numJoinInputs)) .withParameters(this.getFlinkNodeConfig(node)) .setParallelism(probeSideDOP) .returns(new TupleTypeInfo(outFields)) .name("hashjoin-" + node.getID()); } }
Example #15
Source File: FlinkFlowStep.java From cascading-flink with Apache License 2.0 | 4 votes |
private DataSet<Tuple> translateInnerCrossProduct(FlowNode node, List<DataSet<Tuple>> inputs) { int numJoinInputs = inputs.size(); // get out fields of node Scope outScope = getOutScope(node); Fields outFields; if (outScope.isEvery()) { outFields = outScope.getOutGroupingFields(); } else { outFields = outScope.getOutValuesFields(); } registerKryoTypes(outFields); int probeSideDOP = ((Operator)inputs.get(0)).getParallelism(); TypeInformation<Tuple2<Tuple, Tuple[]>> tupleJoinListsTypeInfo = new org.apache.flink.api.java.typeutils.TupleTypeInfo<>( new TupleTypeInfo(Fields.UNKNOWN), ObjectArrayTypeInfo.getInfoFor(new TupleTypeInfo(Fields.UNKNOWN)) ); // prepare tuple list for join DataSet<Tuple2<Tuple, Tuple[]>> tupleJoinLists = inputs.get(0) .map(new JoinPrepareMapper(numJoinInputs, null, null)) .returns(tupleJoinListsTypeInfo) .setParallelism(probeSideDOP) .name("hashjoin-" + node.getID()); for (int i = 1; i < inputs.size(); i++) { tupleJoinLists = tupleJoinLists.crossWithTiny(inputs.get(i)) .with(new TupleAppendCrosser(i)) .returns(tupleJoinListsTypeInfo) .setParallelism(probeSideDOP) .name("hashjoin-" + node.getID()); } return tupleJoinLists .mapPartition(new HashJoinMapper(node)) .withParameters(this.getFlinkNodeConfig(node)) .setParallelism(probeSideDOP) .returns(new TupleTypeInfo(outFields)) .name("hashjoin-" + node.getID()); }
Example #16
Source File: BootstrapTransformationTest.java From flink with Apache License 2.0 | 4 votes |
private static <T> int getParallelism(DataSet<T> dataSet) { //All concrete implementations of DataSet are operators so this should always be safe. return ((Operator) dataSet).getParallelism(); }