org.apache.flink.api.common.functions.util.CopyingListCollector Java Examples
The following examples show how to use
org.apache.flink.api.common.functions.util.CopyingListCollector.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MapPartitionOperatorBase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override protected List<OUT> executeOnCollections(List<IN> inputData, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception { MapPartitionFunction<IN, OUT> function = this.userFunction.getUserCodeObject(); FunctionUtils.setFunctionRuntimeContext(function, ctx); FunctionUtils.openFunction(function, this.parameters); ArrayList<OUT> result = new ArrayList<OUT>(inputData.size() / 4); TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig); TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig); CopyingIterator<IN> source = new CopyingIterator<IN>(inputData.iterator(), inSerializer); CopyingListCollector<OUT> resultCollector = new CopyingListCollector<OUT>(result, outSerializer); function.mapPartition(source, resultCollector); result.trimToSize(); FunctionUtils.closeFunction(function); return result; }
Example #2
Source File: FlatMapOperatorBase.java From flink with Apache License 2.0 | 6 votes |
@Override protected List<OUT> executeOnCollections(List<IN> input, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception { FlatMapFunction<IN, OUT> function = userFunction.getUserCodeObject(); FunctionUtils.setFunctionRuntimeContext(function, ctx); FunctionUtils.openFunction(function, parameters); ArrayList<OUT> result = new ArrayList<OUT>(input.size()); TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig); TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig); CopyingListCollector<OUT> resultCollector = new CopyingListCollector<OUT>(result, outSerializer); for (IN element : input) { IN inCopy = inSerializer.copy(element); function.flatMap(inCopy, resultCollector); } FunctionUtils.closeFunction(function); return result; }
Example #3
Source File: MapPartitionOperatorBase.java From flink with Apache License 2.0 | 6 votes |
@Override protected List<OUT> executeOnCollections(List<IN> inputData, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception { MapPartitionFunction<IN, OUT> function = this.userFunction.getUserCodeObject(); FunctionUtils.setFunctionRuntimeContext(function, ctx); FunctionUtils.openFunction(function, this.parameters); ArrayList<OUT> result = new ArrayList<OUT>(inputData.size() / 4); TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig); TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig); CopyingIterator<IN> source = new CopyingIterator<IN>(inputData.iterator(), inSerializer); CopyingListCollector<OUT> resultCollector = new CopyingListCollector<OUT>(result, outSerializer); function.mapPartition(source, resultCollector); result.trimToSize(); FunctionUtils.closeFunction(function); return result; }
Example #4
Source File: FlatMapOperatorBase.java From flink with Apache License 2.0 | 6 votes |
@Override protected List<OUT> executeOnCollections(List<IN> input, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception { FlatMapFunction<IN, OUT> function = userFunction.getUserCodeObject(); FunctionUtils.setFunctionRuntimeContext(function, ctx); FunctionUtils.openFunction(function, parameters); ArrayList<OUT> result = new ArrayList<OUT>(input.size()); TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig); TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig); CopyingListCollector<OUT> resultCollector = new CopyingListCollector<OUT>(result, outSerializer); for (IN element : input) { IN inCopy = inSerializer.copy(element); function.flatMap(inCopy, resultCollector); } FunctionUtils.closeFunction(function); return result; }
Example #5
Source File: MapPartitionOperatorBase.java From flink with Apache License 2.0 | 6 votes |
@Override protected List<OUT> executeOnCollections(List<IN> inputData, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception { MapPartitionFunction<IN, OUT> function = this.userFunction.getUserCodeObject(); FunctionUtils.setFunctionRuntimeContext(function, ctx); FunctionUtils.openFunction(function, this.parameters); ArrayList<OUT> result = new ArrayList<OUT>(inputData.size() / 4); TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig); TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig); CopyingIterator<IN> source = new CopyingIterator<IN>(inputData.iterator(), inSerializer); CopyingListCollector<OUT> resultCollector = new CopyingListCollector<OUT>(result, outSerializer); function.mapPartition(source, resultCollector); result.trimToSize(); FunctionUtils.closeFunction(function); return result; }
Example #6
Source File: FlatMapOperatorBase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override protected List<OUT> executeOnCollections(List<IN> input, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception { FlatMapFunction<IN, OUT> function = userFunction.getUserCodeObject(); FunctionUtils.setFunctionRuntimeContext(function, ctx); FunctionUtils.openFunction(function, parameters); ArrayList<OUT> result = new ArrayList<OUT>(input.size()); TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig); TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig); CopyingListCollector<OUT> resultCollector = new CopyingListCollector<OUT>(result, outSerializer); for (IN element : input) { IN inCopy = inSerializer.copy(element); function.flatMap(inCopy, resultCollector); } FunctionUtils.closeFunction(function); return result; }
Example #7
Source File: OuterJoinOperatorBase.java From flink with Apache License 2.0 | 5 votes |
@Override protected List<OUT> executeOnCollections(List<IN1> leftInput, List<IN2> rightInput, RuntimeContext runtimeContext, ExecutionConfig executionConfig) throws Exception { TypeInformation<IN1> leftInformation = getOperatorInfo().getFirstInputType(); TypeInformation<IN2> rightInformation = getOperatorInfo().getSecondInputType(); TypeInformation<OUT> outInformation = getOperatorInfo().getOutputType(); TypeComparator<IN1> leftComparator = buildComparatorFor(0, executionConfig, leftInformation); TypeComparator<IN2> rightComparator = buildComparatorFor(1, executionConfig, rightInformation); TypeSerializer<IN1> leftSerializer = leftInformation.createSerializer(executionConfig); TypeSerializer<IN2> rightSerializer = rightInformation.createSerializer(executionConfig); OuterJoinListIterator<IN1, IN2> outerJoinIterator = new OuterJoinListIterator<>(leftInput, leftSerializer, leftComparator, rightInput, rightSerializer, rightComparator, outerJoinType); // -------------------------------------------------------------------- // Run UDF // -------------------------------------------------------------------- FlatJoinFunction<IN1, IN2, OUT> function = userFunction.getUserCodeObject(); FunctionUtils.setFunctionRuntimeContext(function, runtimeContext); FunctionUtils.openFunction(function, this.parameters); List<OUT> result = new ArrayList<>(); Collector<OUT> collector = new CopyingListCollector<>(result, outInformation.createSerializer(executionConfig)); while (outerJoinIterator.next()) { IN1 left = outerJoinIterator.getLeft(); IN2 right = outerJoinIterator.getRight(); function.join(left == null ? null : leftSerializer.copy(left), right == null ? null : rightSerializer.copy(right), collector); } FunctionUtils.closeFunction(function); return result; }
Example #8
Source File: OuterJoinOperatorBase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override protected List<OUT> executeOnCollections(List<IN1> leftInput, List<IN2> rightInput, RuntimeContext runtimeContext, ExecutionConfig executionConfig) throws Exception { TypeInformation<IN1> leftInformation = getOperatorInfo().getFirstInputType(); TypeInformation<IN2> rightInformation = getOperatorInfo().getSecondInputType(); TypeInformation<OUT> outInformation = getOperatorInfo().getOutputType(); TypeComparator<IN1> leftComparator = buildComparatorFor(0, executionConfig, leftInformation); TypeComparator<IN2> rightComparator = buildComparatorFor(1, executionConfig, rightInformation); TypeSerializer<IN1> leftSerializer = leftInformation.createSerializer(executionConfig); TypeSerializer<IN2> rightSerializer = rightInformation.createSerializer(executionConfig); OuterJoinListIterator<IN1, IN2> outerJoinIterator = new OuterJoinListIterator<>(leftInput, leftSerializer, leftComparator, rightInput, rightSerializer, rightComparator, outerJoinType); // -------------------------------------------------------------------- // Run UDF // -------------------------------------------------------------------- FlatJoinFunction<IN1, IN2, OUT> function = userFunction.getUserCodeObject(); FunctionUtils.setFunctionRuntimeContext(function, runtimeContext); FunctionUtils.openFunction(function, this.parameters); List<OUT> result = new ArrayList<>(); Collector<OUT> collector = new CopyingListCollector<>(result, outInformation.createSerializer(executionConfig)); while (outerJoinIterator.next()) { IN1 left = outerJoinIterator.getLeft(); IN2 right = outerJoinIterator.getRight(); function.join(left == null ? null : leftSerializer.copy(left), right == null ? null : rightSerializer.copy(right), collector); } FunctionUtils.closeFunction(function); return result; }
Example #9
Source File: OuterJoinOperatorBase.java From flink with Apache License 2.0 | 5 votes |
@Override protected List<OUT> executeOnCollections(List<IN1> leftInput, List<IN2> rightInput, RuntimeContext runtimeContext, ExecutionConfig executionConfig) throws Exception { TypeInformation<IN1> leftInformation = getOperatorInfo().getFirstInputType(); TypeInformation<IN2> rightInformation = getOperatorInfo().getSecondInputType(); TypeInformation<OUT> outInformation = getOperatorInfo().getOutputType(); TypeComparator<IN1> leftComparator = buildComparatorFor(0, executionConfig, leftInformation); TypeComparator<IN2> rightComparator = buildComparatorFor(1, executionConfig, rightInformation); TypeSerializer<IN1> leftSerializer = leftInformation.createSerializer(executionConfig); TypeSerializer<IN2> rightSerializer = rightInformation.createSerializer(executionConfig); OuterJoinListIterator<IN1, IN2> outerJoinIterator = new OuterJoinListIterator<>(leftInput, leftSerializer, leftComparator, rightInput, rightSerializer, rightComparator, outerJoinType); // -------------------------------------------------------------------- // Run UDF // -------------------------------------------------------------------- FlatJoinFunction<IN1, IN2, OUT> function = userFunction.getUserCodeObject(); FunctionUtils.setFunctionRuntimeContext(function, runtimeContext); FunctionUtils.openFunction(function, this.parameters); List<OUT> result = new ArrayList<>(); Collector<OUT> collector = new CopyingListCollector<>(result, outInformation.createSerializer(executionConfig)); while (outerJoinIterator.next()) { IN1 left = outerJoinIterator.getLeft(); IN2 right = outerJoinIterator.getRight(); function.join(left == null ? null : leftSerializer.copy(left), right == null ? null : rightSerializer.copy(right), collector); } FunctionUtils.closeFunction(function); return result; }
Example #10
Source File: InPlaceMutableHashTableTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testWithLengthChangingReduceFunction() throws Exception { Random rnd = new Random(RANDOM_SEED); final int numKeys = 10000; final int numVals = 10; final int numRecords = numKeys * numVals; StringPairSerializer serializer = new StringPairSerializer(); StringPairComparator comparator = new StringPairComparator(); ReduceFunction<StringPair> reducer = new ConcatReducer(); // Create the InPlaceMutableHashTableWithJavaHashMap, which will provide the correct output. List<StringPair> expectedOutput = new ArrayList<>(); InPlaceMutableHashTableWithJavaHashMap<StringPair, String> reference = new InPlaceMutableHashTableWithJavaHashMap<>( serializer, comparator, reducer, new CopyingListCollector<>(expectedOutput, serializer)); // Create the InPlaceMutableHashTable to test final int numMemPages = numRecords * 10 / PAGE_SIZE; List<StringPair> actualOutput = new ArrayList<>(); InPlaceMutableHashTable<StringPair> table = new InPlaceMutableHashTable<>(serializer, comparator, getMemory(numMemPages, PAGE_SIZE)); InPlaceMutableHashTable<StringPair>.ReduceFacade reduceFacade = table.new ReduceFacade(reducer, new CopyingListCollector<>(actualOutput, serializer), true); // The loop is for checking the feature that multiple open / close are possible. for(int j = 0; j < 3; j++) { table.open(); // Test emit when table is empty reduceFacade.emit(); // Process some manual stuff reference.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "bar")), "foo"); reference.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "baz")), "foo"); reference.updateTableEntryWithReduce(serializer.copy(new StringPair("alma", "xyz")), "alma"); reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "bar"))); reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "baz"))); reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("alma", "xyz"))); for (int i = 0; i < 5; i++) { reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("korte", "abc"))); reference.updateTableEntryWithReduce(serializer.copy(new StringPair("korte", "abc")), "korte"); } reference.emitAndReset(); reduceFacade.emitAndReset(); // Generate some input UniformStringPairGenerator gen = new UniformStringPairGenerator(numKeys, numVals, true); List<StringPair> input = new ArrayList<>(); StringPair cur = new StringPair(); while (gen.next(cur) != null) { input.add(serializer.copy(cur)); } Collections.shuffle(input, rnd); // Process the generated input final int numIntermingledEmits = 5; for (StringPair record : input) { reference.updateTableEntryWithReduce(serializer.copy(record), record.getKey()); reduceFacade.updateTableEntryWithReduce(serializer.copy(record)); if (rnd.nextDouble() < 1.0 / ((double) numRecords / numIntermingledEmits)) { // this will fire approx. numIntermingledEmits times reference.emitAndReset(); reduceFacade.emitAndReset(); } } reference.emitAndReset(); reduceFacade.emit(); table.close(); // Check results assertEquals(expectedOutput.size(), actualOutput.size()); String[] expectedValues = new String[expectedOutput.size()]; for (int i = 0; i < expectedOutput.size(); i++) { expectedValues[i] = expectedOutput.get(i).getValue(); } String[] actualValues = new String[actualOutput.size()]; for (int i = 0; i < actualOutput.size(); i++) { actualValues[i] = actualOutput.get(i).getValue(); } Arrays.sort(expectedValues, Ordering.<String>natural()); Arrays.sort(actualValues, Ordering.<String>natural()); assertArrayEquals(expectedValues, actualValues); expectedOutput.clear(); actualOutput.clear(); } }
Example #11
Source File: CoGroupRawOperatorBase.java From flink with Apache License 2.0 | 4 votes |
@Override protected List<OUT> executeOnCollections(List<IN1> input1, List<IN2> input2, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception { // -------------------------------------------------------------------- // Setup // -------------------------------------------------------------------- TypeInformation<IN1> inputType1 = getOperatorInfo().getFirstInputType(); TypeInformation<IN2> inputType2 = getOperatorInfo().getSecondInputType(); int[] inputKeys1 = getKeyColumns(0); int[] inputKeys2 = getKeyColumns(1); boolean[] inputSortDirections1 = new boolean[inputKeys1.length]; boolean[] inputSortDirections2 = new boolean[inputKeys2.length]; Arrays.fill(inputSortDirections1, true); Arrays.fill(inputSortDirections2, true); final TypeSerializer<IN1> inputSerializer1 = inputType1.createSerializer(executionConfig); final TypeSerializer<IN2> inputSerializer2 = inputType2.createSerializer(executionConfig); final TypeComparator<IN1> inputComparator1 = getTypeComparator(executionConfig, inputType1, inputKeys1, inputSortDirections1); final TypeComparator<IN2> inputComparator2 = getTypeComparator(executionConfig, inputType2, inputKeys2, inputSortDirections2); SimpleListIterable<IN1> iterator1 = new SimpleListIterable<IN1>(input1, inputComparator1, inputSerializer1); SimpleListIterable<IN2> iterator2 = new SimpleListIterable<IN2>(input2, inputComparator2, inputSerializer2); // -------------------------------------------------------------------- // Run UDF // -------------------------------------------------------------------- CoGroupFunction<IN1, IN2, OUT> function = userFunction.getUserCodeObject(); FunctionUtils.setFunctionRuntimeContext(function, ctx); FunctionUtils.openFunction(function, parameters); List<OUT> result = new ArrayList<OUT>(); Collector<OUT> resultCollector = new CopyingListCollector<OUT>(result, getOperatorInfo().getOutputType().createSerializer(executionConfig)); function.coGroup(iterator1, iterator2, resultCollector); FunctionUtils.closeFunction(function); return result; }
Example #12
Source File: InPlaceMutableHashTableTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testWithIntPair() throws Exception { Random rnd = new Random(RANDOM_SEED); // varying the keyRange between 1000 and 1000000 can make a 5x speed difference // (because of cache misses (also in the segment arrays)) final int keyRange = 1000000; final int valueRange = 10; final int numRecords = 1000000; final IntPairSerializer serializer = new IntPairSerializer(); final TypeComparator<IntPair> comparator = new IntPairComparator(); final ReduceFunction<IntPair> reducer = new SumReducer(); // Create the InPlaceMutableHashTableWithJavaHashMap, which will provide the correct output. List<IntPair> expectedOutput = new ArrayList<>(); InPlaceMutableHashTableWithJavaHashMap<IntPair, Integer> reference = new InPlaceMutableHashTableWithJavaHashMap<>( serializer, comparator, reducer, new CopyingListCollector<>(expectedOutput, serializer)); // Create the InPlaceMutableHashTable to test final int numMemPages = keyRange * 32 / PAGE_SIZE; // memory use is proportional to the number of different keys List<IntPair> actualOutput = new ArrayList<>(); InPlaceMutableHashTable<IntPair> table = new InPlaceMutableHashTable<>( serializer, comparator, getMemory(numMemPages, PAGE_SIZE)); InPlaceMutableHashTable<IntPair>.ReduceFacade reduceFacade = table.new ReduceFacade(reducer, new CopyingListCollector<>(actualOutput, serializer), true); table.open(); // Generate some input final List<IntPair> input = new ArrayList<>(); for(int i = 0; i < numRecords; i++) { input.add(new IntPair(rnd.nextInt(keyRange), rnd.nextInt(valueRange))); } //System.out.println("start"); //long start = System.currentTimeMillis(); // Process the generated input final int numIntermingledEmits = 5; for (IntPair record: input) { reduceFacade.updateTableEntryWithReduce(serializer.copy(record)); reference.updateTableEntryWithReduce(serializer.copy(record), record.getKey()); if(rnd.nextDouble() < 1.0 / ((double)numRecords / numIntermingledEmits)) { // this will fire approx. numIntermingledEmits times reference.emitAndReset(); reduceFacade.emitAndReset(); } } reference.emitAndReset(); reduceFacade.emit(); table.close(); //long end = System.currentTimeMillis(); //System.out.println("stop, time: " + (end - start)); // Check results assertEquals(expectedOutput.size(), actualOutput.size()); Integer[] expectedValues = new Integer[expectedOutput.size()]; for (int i = 0; i < expectedOutput.size(); i++) { expectedValues[i] = expectedOutput.get(i).getValue(); } Integer[] actualValues = new Integer[actualOutput.size()]; for (int i = 0; i < actualOutput.size(); i++) { actualValues[i] = actualOutput.get(i).getValue(); } Arrays.sort(expectedValues, Ordering.<Integer>natural()); Arrays.sort(actualValues, Ordering.<Integer>natural()); assertArrayEquals(expectedValues, actualValues); }
Example #13
Source File: InPlaceMutableHashTableTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testWithIntPair() throws Exception { Random rnd = new Random(RANDOM_SEED); // varying the keyRange between 1000 and 1000000 can make a 5x speed difference // (because of cache misses (also in the segment arrays)) final int keyRange = 1000000; final int valueRange = 10; final int numRecords = 1000000; final IntPairSerializer serializer = new IntPairSerializer(); final TypeComparator<IntPair> comparator = new IntPairComparator(); final ReduceFunction<IntPair> reducer = new SumReducer(); // Create the InPlaceMutableHashTableWithJavaHashMap, which will provide the correct output. List<IntPair> expectedOutput = new ArrayList<>(); InPlaceMutableHashTableWithJavaHashMap<IntPair, Integer> reference = new InPlaceMutableHashTableWithJavaHashMap<>( serializer, comparator, reducer, new CopyingListCollector<>(expectedOutput, serializer)); // Create the InPlaceMutableHashTable to test final int numMemPages = keyRange * 32 / PAGE_SIZE; // memory use is proportional to the number of different keys List<IntPair> actualOutput = new ArrayList<>(); InPlaceMutableHashTable<IntPair> table = new InPlaceMutableHashTable<>( serializer, comparator, getMemory(numMemPages, PAGE_SIZE)); InPlaceMutableHashTable<IntPair>.ReduceFacade reduceFacade = table.new ReduceFacade(reducer, new CopyingListCollector<>(actualOutput, serializer), true); table.open(); // Generate some input final List<IntPair> input = new ArrayList<>(); for(int i = 0; i < numRecords; i++) { input.add(new IntPair(rnd.nextInt(keyRange), rnd.nextInt(valueRange))); } //System.out.println("start"); //long start = System.currentTimeMillis(); // Process the generated input final int numIntermingledEmits = 5; for (IntPair record: input) { reduceFacade.updateTableEntryWithReduce(serializer.copy(record)); reference.updateTableEntryWithReduce(serializer.copy(record), record.getKey()); if(rnd.nextDouble() < 1.0 / ((double)numRecords / numIntermingledEmits)) { // this will fire approx. numIntermingledEmits times reference.emitAndReset(); reduceFacade.emitAndReset(); } } reference.emitAndReset(); reduceFacade.emit(); table.close(); //long end = System.currentTimeMillis(); //System.out.println("stop, time: " + (end - start)); // Check results assertEquals(expectedOutput.size(), actualOutput.size()); Integer[] expectedValues = new Integer[expectedOutput.size()]; for (int i = 0; i < expectedOutput.size(); i++) { expectedValues[i] = expectedOutput.get(i).getValue(); } Integer[] actualValues = new Integer[actualOutput.size()]; for (int i = 0; i < actualOutput.size(); i++) { actualValues[i] = actualOutput.get(i).getValue(); } Arrays.sort(expectedValues, Ordering.<Integer>natural()); Arrays.sort(actualValues, Ordering.<Integer>natural()); assertArrayEquals(expectedValues, actualValues); }
Example #14
Source File: InPlaceMutableHashTableTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testWithLengthChangingReduceFunction() throws Exception { Random rnd = new Random(RANDOM_SEED); final int numKeys = 10000; final int numVals = 10; final int numRecords = numKeys * numVals; StringPairSerializer serializer = new StringPairSerializer(); StringPairComparator comparator = new StringPairComparator(); ReduceFunction<StringPair> reducer = new ConcatReducer(); // Create the InPlaceMutableHashTableWithJavaHashMap, which will provide the correct output. List<StringPair> expectedOutput = new ArrayList<>(); InPlaceMutableHashTableWithJavaHashMap<StringPair, String> reference = new InPlaceMutableHashTableWithJavaHashMap<>( serializer, comparator, reducer, new CopyingListCollector<>(expectedOutput, serializer)); // Create the InPlaceMutableHashTable to test final int numMemPages = numRecords * 10 / PAGE_SIZE; List<StringPair> actualOutput = new ArrayList<>(); InPlaceMutableHashTable<StringPair> table = new InPlaceMutableHashTable<>(serializer, comparator, getMemory(numMemPages, PAGE_SIZE)); InPlaceMutableHashTable<StringPair>.ReduceFacade reduceFacade = table.new ReduceFacade(reducer, new CopyingListCollector<>(actualOutput, serializer), true); // The loop is for checking the feature that multiple open / close are possible. for(int j = 0; j < 3; j++) { table.open(); // Test emit when table is empty reduceFacade.emit(); // Process some manual stuff reference.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "bar")), "foo"); reference.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "baz")), "foo"); reference.updateTableEntryWithReduce(serializer.copy(new StringPair("alma", "xyz")), "alma"); reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "bar"))); reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "baz"))); reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("alma", "xyz"))); for (int i = 0; i < 5; i++) { reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("korte", "abc"))); reference.updateTableEntryWithReduce(serializer.copy(new StringPair("korte", "abc")), "korte"); } reference.emitAndReset(); reduceFacade.emitAndReset(); // Generate some input UniformStringPairGenerator gen = new UniformStringPairGenerator(numKeys, numVals, true); List<StringPair> input = new ArrayList<>(); StringPair cur = new StringPair(); while (gen.next(cur) != null) { input.add(serializer.copy(cur)); } Collections.shuffle(input, rnd); // Process the generated input final int numIntermingledEmits = 5; for (StringPair record : input) { reference.updateTableEntryWithReduce(serializer.copy(record), record.getKey()); reduceFacade.updateTableEntryWithReduce(serializer.copy(record)); if (rnd.nextDouble() < 1.0 / ((double) numRecords / numIntermingledEmits)) { // this will fire approx. numIntermingledEmits times reference.emitAndReset(); reduceFacade.emitAndReset(); } } reference.emitAndReset(); reduceFacade.emit(); table.close(); // Check results assertEquals(expectedOutput.size(), actualOutput.size()); String[] expectedValues = new String[expectedOutput.size()]; for (int i = 0; i < expectedOutput.size(); i++) { expectedValues[i] = expectedOutput.get(i).getValue(); } String[] actualValues = new String[actualOutput.size()]; for (int i = 0; i < actualOutput.size(); i++) { actualValues[i] = actualOutput.get(i).getValue(); } Arrays.sort(expectedValues, Ordering.<String>natural()); Arrays.sort(actualValues, Ordering.<String>natural()); assertArrayEquals(expectedValues, actualValues); expectedOutput.clear(); actualOutput.clear(); } }
Example #15
Source File: CoGroupRawOperatorBase.java From flink with Apache License 2.0 | 4 votes |
@Override protected List<OUT> executeOnCollections(List<IN1> input1, List<IN2> input2, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception { // -------------------------------------------------------------------- // Setup // -------------------------------------------------------------------- TypeInformation<IN1> inputType1 = getOperatorInfo().getFirstInputType(); TypeInformation<IN2> inputType2 = getOperatorInfo().getSecondInputType(); int[] inputKeys1 = getKeyColumns(0); int[] inputKeys2 = getKeyColumns(1); boolean[] inputSortDirections1 = new boolean[inputKeys1.length]; boolean[] inputSortDirections2 = new boolean[inputKeys2.length]; Arrays.fill(inputSortDirections1, true); Arrays.fill(inputSortDirections2, true); final TypeSerializer<IN1> inputSerializer1 = inputType1.createSerializer(executionConfig); final TypeSerializer<IN2> inputSerializer2 = inputType2.createSerializer(executionConfig); final TypeComparator<IN1> inputComparator1 = getTypeComparator(executionConfig, inputType1, inputKeys1, inputSortDirections1); final TypeComparator<IN2> inputComparator2 = getTypeComparator(executionConfig, inputType2, inputKeys2, inputSortDirections2); SimpleListIterable<IN1> iterator1 = new SimpleListIterable<IN1>(input1, inputComparator1, inputSerializer1); SimpleListIterable<IN2> iterator2 = new SimpleListIterable<IN2>(input2, inputComparator2, inputSerializer2); // -------------------------------------------------------------------- // Run UDF // -------------------------------------------------------------------- CoGroupFunction<IN1, IN2, OUT> function = userFunction.getUserCodeObject(); FunctionUtils.setFunctionRuntimeContext(function, ctx); FunctionUtils.openFunction(function, parameters); List<OUT> result = new ArrayList<OUT>(); Collector<OUT> resultCollector = new CopyingListCollector<OUT>(result, getOperatorInfo().getOutputType().createSerializer(executionConfig)); function.coGroup(iterator1, iterator2, resultCollector); FunctionUtils.closeFunction(function); return result; }
Example #16
Source File: CoGroupRawOperatorBase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Override protected List<OUT> executeOnCollections(List<IN1> input1, List<IN2> input2, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception { // -------------------------------------------------------------------- // Setup // -------------------------------------------------------------------- TypeInformation<IN1> inputType1 = getOperatorInfo().getFirstInputType(); TypeInformation<IN2> inputType2 = getOperatorInfo().getSecondInputType(); int[] inputKeys1 = getKeyColumns(0); int[] inputKeys2 = getKeyColumns(1); boolean[] inputSortDirections1 = new boolean[inputKeys1.length]; boolean[] inputSortDirections2 = new boolean[inputKeys2.length]; Arrays.fill(inputSortDirections1, true); Arrays.fill(inputSortDirections2, true); final TypeSerializer<IN1> inputSerializer1 = inputType1.createSerializer(executionConfig); final TypeSerializer<IN2> inputSerializer2 = inputType2.createSerializer(executionConfig); final TypeComparator<IN1> inputComparator1 = getTypeComparator(executionConfig, inputType1, inputKeys1, inputSortDirections1); final TypeComparator<IN2> inputComparator2 = getTypeComparator(executionConfig, inputType2, inputKeys2, inputSortDirections2); SimpleListIterable<IN1> iterator1 = new SimpleListIterable<IN1>(input1, inputComparator1, inputSerializer1); SimpleListIterable<IN2> iterator2 = new SimpleListIterable<IN2>(input2, inputComparator2, inputSerializer2); // -------------------------------------------------------------------- // Run UDF // -------------------------------------------------------------------- CoGroupFunction<IN1, IN2, OUT> function = userFunction.getUserCodeObject(); FunctionUtils.setFunctionRuntimeContext(function, ctx); FunctionUtils.openFunction(function, parameters); List<OUT> result = new ArrayList<OUT>(); Collector<OUT> resultCollector = new CopyingListCollector<OUT>(result, getOperatorInfo().getOutputType().createSerializer(executionConfig)); function.coGroup(iterator1, iterator2, resultCollector); FunctionUtils.closeFunction(function); return result; }
Example #17
Source File: InPlaceMutableHashTableTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testWithIntPair() throws Exception { Random rnd = new Random(RANDOM_SEED); // varying the keyRange between 1000 and 1000000 can make a 5x speed difference // (because of cache misses (also in the segment arrays)) final int keyRange = 1000000; final int valueRange = 10; final int numRecords = 1000000; final IntPairSerializer serializer = new IntPairSerializer(); final TypeComparator<IntPair> comparator = new IntPairComparator(); final ReduceFunction<IntPair> reducer = new SumReducer(); // Create the InPlaceMutableHashTableWithJavaHashMap, which will provide the correct output. List<IntPair> expectedOutput = new ArrayList<>(); InPlaceMutableHashTableWithJavaHashMap<IntPair, Integer> reference = new InPlaceMutableHashTableWithJavaHashMap<>( serializer, comparator, reducer, new CopyingListCollector<>(expectedOutput, serializer)); // Create the InPlaceMutableHashTable to test final int numMemPages = keyRange * 32 / PAGE_SIZE; // memory use is proportional to the number of different keys List<IntPair> actualOutput = new ArrayList<>(); InPlaceMutableHashTable<IntPair> table = new InPlaceMutableHashTable<>( serializer, comparator, getMemory(numMemPages, PAGE_SIZE)); InPlaceMutableHashTable<IntPair>.ReduceFacade reduceFacade = table.new ReduceFacade(reducer, new CopyingListCollector<>(actualOutput, serializer), true); table.open(); // Generate some input final List<IntPair> input = new ArrayList<>(); for(int i = 0; i < numRecords; i++) { input.add(new IntPair(rnd.nextInt(keyRange), rnd.nextInt(valueRange))); } //System.out.println("start"); //long start = System.currentTimeMillis(); // Process the generated input final int numIntermingledEmits = 5; for (IntPair record: input) { reduceFacade.updateTableEntryWithReduce(serializer.copy(record)); reference.updateTableEntryWithReduce(serializer.copy(record), record.getKey()); if(rnd.nextDouble() < 1.0 / ((double)numRecords / numIntermingledEmits)) { // this will fire approx. numIntermingledEmits times reference.emitAndReset(); reduceFacade.emitAndReset(); } } reference.emitAndReset(); reduceFacade.emit(); table.close(); //long end = System.currentTimeMillis(); //System.out.println("stop, time: " + (end - start)); // Check results assertEquals(expectedOutput.size(), actualOutput.size()); Integer[] expectedValues = new Integer[expectedOutput.size()]; for (int i = 0; i < expectedOutput.size(); i++) { expectedValues[i] = expectedOutput.get(i).getValue(); } Integer[] actualValues = new Integer[actualOutput.size()]; for (int i = 0; i < actualOutput.size(); i++) { actualValues[i] = actualOutput.get(i).getValue(); } Arrays.sort(expectedValues, Ordering.<Integer>natural()); Arrays.sort(actualValues, Ordering.<Integer>natural()); assertArrayEquals(expectedValues, actualValues); }
Example #18
Source File: InPlaceMutableHashTableTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testWithLengthChangingReduceFunction() throws Exception { Random rnd = new Random(RANDOM_SEED); final int numKeys = 10000; final int numVals = 10; final int numRecords = numKeys * numVals; StringPairSerializer serializer = new StringPairSerializer(); StringPairComparator comparator = new StringPairComparator(); ReduceFunction<StringPair> reducer = new ConcatReducer(); // Create the InPlaceMutableHashTableWithJavaHashMap, which will provide the correct output. List<StringPair> expectedOutput = new ArrayList<>(); InPlaceMutableHashTableWithJavaHashMap<StringPair, String> reference = new InPlaceMutableHashTableWithJavaHashMap<>( serializer, comparator, reducer, new CopyingListCollector<>(expectedOutput, serializer)); // Create the InPlaceMutableHashTable to test final int numMemPages = numRecords * 10 / PAGE_SIZE; List<StringPair> actualOutput = new ArrayList<>(); InPlaceMutableHashTable<StringPair> table = new InPlaceMutableHashTable<>(serializer, comparator, getMemory(numMemPages, PAGE_SIZE)); InPlaceMutableHashTable<StringPair>.ReduceFacade reduceFacade = table.new ReduceFacade(reducer, new CopyingListCollector<>(actualOutput, serializer), true); // The loop is for checking the feature that multiple open / close are possible. for(int j = 0; j < 3; j++) { table.open(); // Test emit when table is empty reduceFacade.emit(); // Process some manual stuff reference.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "bar")), "foo"); reference.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "baz")), "foo"); reference.updateTableEntryWithReduce(serializer.copy(new StringPair("alma", "xyz")), "alma"); reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "bar"))); reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "baz"))); reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("alma", "xyz"))); for (int i = 0; i < 5; i++) { reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("korte", "abc"))); reference.updateTableEntryWithReduce(serializer.copy(new StringPair("korte", "abc")), "korte"); } reference.emitAndReset(); reduceFacade.emitAndReset(); // Generate some input UniformStringPairGenerator gen = new UniformStringPairGenerator(numKeys, numVals, true); List<StringPair> input = new ArrayList<>(); StringPair cur = new StringPair(); while (gen.next(cur) != null) { input.add(serializer.copy(cur)); } Collections.shuffle(input, rnd); // Process the generated input final int numIntermingledEmits = 5; for (StringPair record : input) { reference.updateTableEntryWithReduce(serializer.copy(record), record.getKey()); reduceFacade.updateTableEntryWithReduce(serializer.copy(record)); if (rnd.nextDouble() < 1.0 / ((double) numRecords / numIntermingledEmits)) { // this will fire approx. numIntermingledEmits times reference.emitAndReset(); reduceFacade.emitAndReset(); } } reference.emitAndReset(); reduceFacade.emit(); table.close(); // Check results assertEquals(expectedOutput.size(), actualOutput.size()); String[] expectedValues = new String[expectedOutput.size()]; for (int i = 0; i < expectedOutput.size(); i++) { expectedValues[i] = expectedOutput.get(i).getValue(); } String[] actualValues = new String[actualOutput.size()]; for (int i = 0; i < actualOutput.size(); i++) { actualValues[i] = actualOutput.get(i).getValue(); } Arrays.sort(expectedValues, Ordering.<String>natural()); Arrays.sort(actualValues, Ordering.<String>natural()); assertArrayEquals(expectedValues, actualValues); expectedOutput.clear(); actualOutput.clear(); } }