Java Code Examples for org.apache.flink.util.MutableObjectIterator#next()
The following examples show how to use
org.apache.flink.util.MutableObjectIterator#next() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BinaryMergeIteratorTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testOneStream() throws Exception { List<MutableObjectIterator<BinaryRow>> iterators = new ArrayList<>(); iterators.add(newIterator( new int[]{1, 2, 4, 5, 10}, new String[]{"1", "2", "4", "5", "10"})); final int[] expected = new int[]{1, 2, 4, 5, 10}; MutableObjectIterator<BinaryRow> iterator = new BinaryMergeIterator<>( iterators, Collections.singletonList(serializer.createInstance()), (o1, o2) -> this.comparator.compare(o1, o2)); BinaryRow row = serializer.createInstance(); int pos = 0; while ((row = iterator.next(row)) != null) { Assert.assertEquals(expected[pos++], row.getInt(0)); } }
Example 2
Source File: AbstractSortMergeOuterJoinIteratorITCase.java From flink with Apache License 2.0 | 6 votes |
private Map<Integer, Collection<String>> collectData(MutableObjectIterator<Tuple2<Integer, String>> iter) throws Exception { final Map<Integer, Collection<String>> map = new HashMap<>(); Tuple2<Integer, String> pair = new Tuple2<>(); while ((pair = iter.next(pair)) != null) { final Integer key = pair.getField(0); if (!map.containsKey(key)) { map.put(key, new ArrayList<String>()); } Collection<String> values = map.get(key); final String value = pair.getField(1); values.add(value); } return map; }
Example 3
Source File: BinaryHashTableTest.java From flink with Apache License 2.0 | 6 votes |
private int join( BinaryHashTable table, MutableObjectIterator<BinaryRow> buildInput, MutableObjectIterator<BinaryRow> probeInput, boolean buildOuterJoin) throws IOException { int count = 0; BinaryRow reuseBuildSizeRow = buildSideSerializer.createInstance(); BinaryRow buildRow; while ((buildRow = buildInput.next(reuseBuildSizeRow)) != null) { table.putBuildRow(buildRow); } table.endBuild(); BinaryRow probeRow = probeSideSerializer.createInstance(); while ((probeRow = probeInput.next(probeRow)) != null) { if (table.tryProbe(probeRow)){ count += joinWithNextKey(table, buildOuterJoin); } } while (table.nextMatching()){ count += joinWithNextKey(table, buildOuterJoin); } return count; }
Example 4
Source File: DataSourceTaskTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
public static void prepareInputFile(MutableObjectIterator<Record> inIt, File inputFile, boolean insertInvalidData) throws IOException { try (BufferedWriter bw = new BufferedWriter(new FileWriter(inputFile))) { if (insertInvalidData) { bw.write("####_I_AM_INVALID_########\n"); } Record rec = new Record(); while ((rec = inIt.next(rec)) != null) { IntValue key = rec.getField(0, IntValue.class); IntValue value = rec.getField(1, IntValue.class); bw.write(key.getValue() + "_" + value.getValue() + "\n"); } if (insertInvalidData) { bw.write("####_I_AM_INVALID_########\n"); } bw.flush(); } }
Example 5
Source File: IterationHeadTask.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private void streamOutFinalOutputBulk(MutableObjectIterator<X> results) throws IOException { final Collector<X> out = this.finalOutputCollector; X record = this.solutionTypeSerializer.getSerializer().createInstance(); while ((record = results.next(record)) != null) { out.collect(record); } }
Example 6
Source File: IterationHeadTask.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private void readInitialSolutionSet(JoinHashMap<X> solutionSet, MutableObjectIterator<X> solutionSetInput) throws IOException { TypeSerializer<X> serializer = solutionTypeSerializer.getSerializer(); X next; while ((next = solutionSetInput.next(serializer.createInstance())) != null) { solutionSet.insertOrReplace(next); } }
Example 7
Source File: MutableHashTableTestBase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testEntryIterator() throws Exception { final int NUM_MEM_PAGES = SIZE * NUM_LISTS / PAGE_SIZE; AbstractMutableHashTable<IntList> table = getHashTable(serializerV, comparatorV, getMemory(NUM_MEM_PAGES)); final Random rnd = new Random(RANDOM_SEED); final IntList[] lists = getRandomizedIntLists(NUM_LISTS, rnd); table.open(); int result = 0; for (int i = 0; i < NUM_LISTS; i++) { table.insert(lists[i]); result += lists[i].getKey(); } MutableObjectIterator<IntList> iter = table.getEntryIterator(); IntList target = new IntList(); int sum = 0; while((target = iter.next(target)) != null) { sum += target.getKey(); } table.close(); assertTrue(sum == result); assertEquals("Memory lost", NUM_MEM_PAGES, table.getFreeMemory().size()); }
Example 8
Source File: ExternalSortITCase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testInMemorySort() { try { // comparator final TypeComparator<Integer> keyComparator = new IntComparator(true); final TestData.TupleGenerator generator = new TestData.TupleGenerator(SEED, KEY_MAX, VALUE_LENGTH, KeyMode.RANDOM, ValueMode.CONSTANT, VAL); final MutableObjectIterator<Tuple2<Integer, String>> source = new TestData.TupleGeneratorIterator(generator, NUM_PAIRS); // merge iterator LOG.debug("Initializing sortmerger..."); Sorter<Tuple2<Integer, String>> merger = new UnilateralSortMerger<>(this.memoryManager, this.ioManager, source, this.parentTask, this.pactRecordSerializer, this.pactRecordComparator, (double)64/78, 2, 0.9f, true /*use large record handler*/, true); // emit data LOG.debug("Reading and sorting data..."); // check order MutableObjectIterator<Tuple2<Integer, String>> iterator = merger.getIterator(); LOG.debug("Checking results..."); int pairsEmitted = 1; Tuple2<Integer, String> rec1 = new Tuple2<>(); Tuple2<Integer, String> rec2 = new Tuple2<>(); Assert.assertTrue((rec1 = iterator.next(rec1)) != null); while ((rec2 = iterator.next(rec2)) != null) { pairsEmitted++; Assert.assertTrue(keyComparator.compare(rec1.f0, rec2.f0) <= 0); Tuple2<Integer, String> tmp = rec1; rec1 = rec2; rec2 = tmp; } Assert.assertTrue(NUM_PAIRS == pairsEmitted); merger.close(); testSuccess = true; } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
Example 9
Source File: HashTableITCase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testSpillingHashJoinOneRecursionPerformanceIntPair() throws IOException { final int NUM_KEYS = 1000000; final int BUILD_VALS_PER_KEY = 3; final int PROBE_VALS_PER_KEY = 10; // create a build input that gives 3 million pairs with 3 values sharing the same key MutableObjectIterator<IntPair> buildInput = new UniformIntPairGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false); // create a probe input that gives 10 million pairs with 10 values sharing a key MutableObjectIterator<IntPair> probeInput = new UniformIntPairGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true); // allocate the memory for the HashTable List<MemorySegment> memSegments; try { memSegments = this.memManager.allocatePages(MEM_OWNER, 896); } catch (MemoryAllocationException maex) { fail("Memory for the Join could not be provided."); return; } // ---------------------------------------------------------------------------------------- final MutableHashTable<IntPair, IntPair> join = new MutableHashTable<IntPair, IntPair>( this.pairBuildSideAccesssor, this.pairProbeSideAccesssor, this.pairBuildSideComparator, this.pairProbeSideComparator, this.pairComparator, memSegments, ioManager); join.open(buildInput, probeInput); final IntPair recordReuse = new IntPair(); int numRecordsInJoinResult = 0; while (join.nextRecord()) { MutableObjectIterator<IntPair> buildSide = join.getBuildSideIterator(); while (buildSide.next(recordReuse) != null) { numRecordsInJoinResult++; } } Assert.assertEquals("Wrong number of records in join result.", NUM_KEYS * BUILD_VALS_PER_KEY * PROBE_VALS_PER_KEY, numRecordsInJoinResult); join.close(); // ---------------------------------------------------------------------------------------- this.memManager.release(join.getFreedMemory()); }
Example 10
Source File: HashTableITCase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testSparseProbeSpillingWithOuterJoin() throws IOException, MemoryAllocationException { final int NUM_BUILD_KEYS = 1000000; final int NUM_BUILD_VALS = 1; final int NUM_PROBE_KEYS = 20; final int NUM_PROBE_VALS = 1; MutableObjectIterator<Record> buildInput = new UniformRecordGenerator( NUM_BUILD_KEYS, NUM_BUILD_VALS, false); // allocate the memory for the HashTable List<MemorySegment> memSegments; try { memSegments = this.memManager.allocatePages(MEM_OWNER, 96); } catch (MemoryAllocationException maex) { fail("Memory for the Join could not be provided."); return; } final MutableHashTable<Record, Record> join = new MutableHashTable<Record, Record>( this.recordBuildSideAccesssor, this.recordProbeSideAccesssor, this.recordBuildSideComparator, this.recordProbeSideComparator, this.pactRecordComparator, memSegments, ioManager); join.open(buildInput, new UniformRecordGenerator(NUM_PROBE_KEYS, NUM_PROBE_VALS, true), true); int expectedNumResults = (Math.max(NUM_PROBE_KEYS, NUM_BUILD_KEYS) * NUM_BUILD_VALS) * NUM_PROBE_VALS; final Record recordReuse = new Record(); int numRecordsInJoinResult = 0; while (join.nextRecord()) { MutableObjectIterator<Record> buildSide = join.getBuildSideIterator(); while (buildSide.next(recordReuse) != null) { numRecordsInJoinResult++; } } Assert.assertEquals("Wrong number of records in join result.", expectedNumResults, numRecordsInJoinResult); join.close(); this.memManager.release(join.getFreedMemory()); }
Example 11
Source File: SumHashAggTestOperator.java From flink with Apache License 2.0 | 4 votes |
public void endInput() throws Exception { StreamRecord<BaseRow> outElement = new StreamRecord<>(null); JoinedRow hashAggOutput = new JoinedRow(); GenericRow aggValueOutput = new GenericRow(1); if (sorter == null) { // no spilling, output by iterating aggregate map. MutableObjectIterator<BytesHashMap.Entry> iter = aggregateMap.getEntryIterator(); BinaryRow reuseAggMapKey = new BinaryRow(1); BinaryRow reuseAggBuffer = new BinaryRow(1); BytesHashMap.Entry reuseAggMapEntry = new BytesHashMap.Entry(reuseAggMapKey, reuseAggBuffer); while (iter.next(reuseAggMapEntry) != null) { // set result and output aggValueOutput.setField(0, reuseAggBuffer.isNullAt(0) ? null : reuseAggBuffer.getLong(0)); hashAggOutput.replace(reuseAggMapKey, aggValueOutput); getOutput().collect(outElement.replace(hashAggOutput)); } } else { // spill last part of input' aggregation output buffer sorter.sortAndSpill( aggregateMap.getRecordAreaMemorySegments(), aggregateMap.getNumElements(), new BytesHashMapSpillMemorySegmentPool(aggregateMap.getBucketAreaMemorySegments())); // only release non-data memory in advance. aggregateMap.free(true); // fall back to sort based aggregation BinaryRow lastKey = null; JoinedRow fallbackInput = new JoinedRow(); boolean aggSumIsNull = false; long aggSum = -1; // free hash map memory, but not release back to memory manager MutableObjectIterator<Tuple2<BinaryRow, BinaryRow>> iterator = sorter.getKVIterator(); Tuple2<BinaryRow, BinaryRow> kv; while ((kv = iterator.next()) != null) { BinaryRow key = kv.f0; BinaryRow value = kv.f1; // prepare input fallbackInput.replace(key, value); if (lastKey == null) { // found first key group lastKey = key.copy(); aggSumIsNull = true; aggSum = -1L; } else if (key.getSizeInBytes() != lastKey.getSizeInBytes() || !(BinaryRowUtil.byteArrayEquals( key.getSegments()[0].getArray(), lastKey.getSegments()[0].getArray(), key.getSizeInBytes()))) { // output current group aggregate result aggValueOutput.setField(0, aggSumIsNull ? null : aggSum); hashAggOutput.replace(lastKey, aggValueOutput); getOutput().collect(outElement.replace(hashAggOutput)); // found new group lastKey = key.copy(); aggSumIsNull = true; aggSum = -1L; } if (!fallbackInput.isNullAt(1)) { long sumInput = fallbackInput.getLong(1); if (aggSumIsNull) { aggSum = sumInput; } else { aggSum = aggSum + sumInput; } aggSumIsNull = false; } } // output last key group aggregate result aggValueOutput.setField(0, aggSumIsNull ? null : aggSum); hashAggOutput.replace(lastKey, aggValueOutput); getOutput().collect(outElement.replace(hashAggOutput)); } }
Example 12
Source File: NormalizedKeySorterTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testSortShortStringKeys() throws Exception { final int numSegments = MEMORY_SIZE / MEMORY_PAGE_SIZE; final List<MemorySegment> memory = this.memoryManager.allocatePages(new DummyInvokable(), numSegments); @SuppressWarnings("unchecked") TypeComparator<Tuple2<Integer, String>> accessors = TestData.getIntStringTupleTypeInfo().createComparator(new int[]{1}, new boolean[]{true}, 0, null); NormalizedKeySorter<Tuple2<Integer, String>> sorter = new NormalizedKeySorter<>(TestData.getIntStringTupleSerializer(), accessors, memory); TestData.TupleGenerator generator = new TestData.TupleGenerator(SEED, KEY_MAX, 5, KeyMode.RANDOM, ValueMode.FIX_LENGTH); // write the records Tuple2<Integer, String> record = new Tuple2<>(); do { generator.next(record); } while (sorter.write(record)); QuickSort qs = new QuickSort(); qs.sort(sorter); MutableObjectIterator<Tuple2<Integer, String>> iter = sorter.getIterator(); Tuple2<Integer, String> readTarget = new Tuple2<>(); iter.next(readTarget); String last = readTarget.f1; while ((readTarget = iter.next(readTarget)) != null) { String current = readTarget.f1; final int cmp = last.compareTo(current); if (cmp > 0) { Assert.fail("Next value is not larger or equal to previous value."); } last = current; } // release the memory occupied by the buffers sorter.dispose(); this.memoryManager.release(memory); }
Example 13
Source File: HashTableITCase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testSpillingHashJoinOneRecursionPerformance() throws IOException { final int NUM_KEYS = 1000000; final int BUILD_VALS_PER_KEY = 3; final int PROBE_VALS_PER_KEY = 10; // create a build input that gives 3 million pairs with 3 values sharing the same key MutableObjectIterator<Record> buildInput = new UniformRecordGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false); // create a probe input that gives 10 million pairs with 10 values sharing a key MutableObjectIterator<Record> probeInput = new UniformRecordGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true); // allocate the memory for the HashTable List<MemorySegment> memSegments; try { memSegments = this.memManager.allocatePages(MEM_OWNER, 896); } catch (MemoryAllocationException maex) { fail("Memory for the Join could not be provided."); return; } // ---------------------------------------------------------------------------------------- final MutableHashTable<Record, Record> join = new MutableHashTable<Record, Record>( this.recordBuildSideAccesssor, this.recordProbeSideAccesssor, this.recordBuildSideComparator, this.recordProbeSideComparator, this.pactRecordComparator, memSegments, ioManager); join.open(buildInput, probeInput); final Record recordReuse = new Record(); int numRecordsInJoinResult = 0; while (join.nextRecord()) { MutableObjectIterator<Record> buildSide = join.getBuildSideIterator(); while (buildSide.next(recordReuse) != null) { numRecordsInJoinResult++; } } Assert.assertEquals("Wrong number of records in join result.", NUM_KEYS * BUILD_VALS_PER_KEY * PROBE_VALS_PER_KEY, numRecordsInJoinResult); join.close(); // ---------------------------------------------------------------------------------------- this.memManager.release(join.getFreedMemory()); }
Example 14
Source File: ExternalSortITCase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testSpillingSortWithIntermediateMergeIntPair() { try { // amount of pairs final int PAIRS = 50000000; // comparator final RandomIntPairGenerator generator = new RandomIntPairGenerator(12345678, PAIRS); final TypeSerializerFactory<IntPair> serializerFactory = new IntPairSerializer.IntPairSerializerFactory(); final TypeComparator<IntPair> comparator = new TestData.IntPairComparator(); // merge iterator LOG.debug("Initializing sortmerger..."); Sorter<IntPair> merger = new UnilateralSortMerger<IntPair>(this.memoryManager, this.ioManager, generator, this.parentTask, serializerFactory, comparator, (double)64/78, 4, 0.7f, true /*use large record handler*/, true); // emit data LOG.debug("Emitting data..."); // check order MutableObjectIterator<IntPair> iterator = merger.getIterator(); LOG.debug("Checking results..."); int pairsRead = 1; int nextStep = PAIRS / 20; IntPair rec1 = new IntPair(); IntPair rec2 = new IntPair(); Assert.assertTrue((rec1 = iterator.next(rec1)) != null); while ((rec2 = iterator.next(rec2)) != null) { final int k1 = rec1.getKey(); final int k2 = rec2.getKey(); pairsRead++; Assert.assertTrue(k1 - k2 <= 0); IntPair tmp = rec1; rec1 = rec2; rec2 = tmp; // log if (pairsRead == nextStep) { nextStep += PAIRS / 20; } } Assert.assertEquals("Not all pairs were read back in.", PAIRS, pairsRead); merger.close(); testSuccess = true; } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
Example 15
Source File: HashTableTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that the MutableHashTable spills its partitions when creating the initial table * without overflow segments in the partitions. This means that the records are large. */ @Test public void testSpillingWhenBuildingTableWithoutOverflow() throws Exception { try (final IOManager ioMan = new IOManagerAsync()) { final TypeSerializer<byte[]> serializer = BytePrimitiveArraySerializer.INSTANCE; final TypeComparator<byte[]> buildComparator = new BytePrimitiveArrayComparator(true); final TypeComparator<byte[]> probeComparator = new BytePrimitiveArrayComparator(true); @SuppressWarnings("unchecked") final TypePairComparator<byte[], byte[]> pairComparator = new GenericPairComparator<>( new BytePrimitiveArrayComparator(true), new BytePrimitiveArrayComparator(true)); final int pageSize = 128; final int numSegments = 33; List<MemorySegment> memory = getMemory(numSegments, pageSize); MutableHashTable<byte[], byte[]> table = new MutableHashTable<byte[], byte[]>( serializer, serializer, buildComparator, probeComparator, pairComparator, memory, ioMan, 1, false); int numElements = 9; table.open( new CombiningIterator<byte[]>( new ByteArrayIterator(numElements, 128, (byte) 0), new ByteArrayIterator(numElements, 128, (byte) 1)), new CombiningIterator<byte[]>( new ByteArrayIterator(1, 128, (byte) 0), new ByteArrayIterator(1, 128, (byte) 1))); while (table.nextRecord()) { MutableObjectIterator<byte[]> iterator = table.getBuildSideIterator(); int counter = 0; while (iterator.next() != null) { counter++; } // check that we retrieve all our elements Assert.assertEquals(numElements, counter); } table.close(); } }
Example 16
Source File: HashTableITCase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testFailingHashJoinTooManyRecursions() throws IOException { // the following two values are known to have a hash-code collision on the first recursion level. // we use them to make sure one partition grows over-proportionally large final int REPEATED_VALUE_1 = 40559; final int REPEATED_VALUE_2 = 92882; final int REPEATED_VALUE_COUNT = 3000000; final int NUM_KEYS = 1000000; final int BUILD_VALS_PER_KEY = 3; final int PROBE_VALS_PER_KEY = 10; // create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys MutableObjectIterator<Record> build1 = new UniformRecordGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false); MutableObjectIterator<Record> build2 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT); MutableObjectIterator<Record> build3 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT); List<MutableObjectIterator<Record>> builds = new ArrayList<MutableObjectIterator<Record>>(); builds.add(build1); builds.add(build2); builds.add(build3); MutableObjectIterator<Record> buildInput = new UnionIterator<Record>(builds); // create a probe input that gives 10 million pairs with 10 values sharing a key MutableObjectIterator<Record> probe1 = new UniformRecordGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true); MutableObjectIterator<Record> probe2 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT); MutableObjectIterator<Record> probe3 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT); List<MutableObjectIterator<Record>> probes = new ArrayList<MutableObjectIterator<Record>>(); probes.add(probe1); probes.add(probe2); probes.add(probe3); MutableObjectIterator<Record> probeInput = new UnionIterator<Record>(probes); // allocate the memory for the HashTable List<MemorySegment> memSegments; try { memSegments = this.memManager.allocatePages(MEM_OWNER, 896); } catch (MemoryAllocationException maex) { fail("Memory for the Join could not be provided."); return; } // ---------------------------------------------------------------------------------------- final MutableHashTable<Record, Record> join = new MutableHashTable<Record, Record>( this.recordBuildSideAccesssor, this.recordProbeSideAccesssor, this.recordBuildSideComparator, this.recordProbeSideComparator, this.pactRecordComparator, memSegments, ioManager); join.open(buildInput, probeInput); final Record recordReuse = new Record(); try { while (join.nextRecord()) { MutableObjectIterator<Record> buildSide = join.getBuildSideIterator(); if (buildSide.next(recordReuse) == null) { fail("No build side values found for a probe key."); } while (buildSide.next(recordReuse) != null); } fail("Hash Join must have failed due to too many recursions."); } catch (Exception ex) { // expected } join.close(); // ---------------------------------------------------------------------------------------- this.memManager.release(join.getFreedMemory()); }
Example 17
Source File: LongHashTableTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testSpillingHashJoinWithTwoRecursions() throws IOException { // the following two values are known to have a hash-code collision on the first recursion level. // we use them to make sure one partition grows over-proportionally large final int repeatedValue1 = 40559; final int repeatedValue2 = 92882; final int repeatedValueCountBuild = 200000; final int repeatedValueCountProbe = 5; final int numKeys = 1000000; final int buildValsPerKey = 3; final int probeValsPerKey = 10; // create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys MutableObjectIterator<BinaryRow> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false); MutableObjectIterator<BinaryRow> build2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCountBuild); MutableObjectIterator<BinaryRow> build3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCountBuild); List<MutableObjectIterator<BinaryRow>> builds = new ArrayList<>(); builds.add(build1); builds.add(build2); builds.add(build3); MutableObjectIterator<BinaryRow> buildInput = new UnionIterator<>(builds); // create a probe input that gives 10 million pairs with 10 values sharing a key MutableObjectIterator<BinaryRow> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true); MutableObjectIterator<BinaryRow> probe2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, 5); MutableObjectIterator<BinaryRow> probe3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, 5); List<MutableObjectIterator<BinaryRow>> probes = new ArrayList<>(); probes.add(probe1); probes.add(probe2); probes.add(probe3); MutableObjectIterator<BinaryRow> probeInput = new UnionIterator<>(probes); // create the map for validating the results HashMap<Integer, Long> map = new HashMap<>(numKeys); final MyHashTable table = new MyHashTable(896 * PAGE_SIZE); BinaryRow buildRow = buildSideSerializer.createInstance(); while ((buildRow = buildInput.next(buildRow)) != null) { table.putBuildRow(buildRow); } table.endBuild(); BinaryRow probeRow = probeSideSerializer.createInstance(); while ((probeRow = probeInput.next(probeRow)) != null) { if (table.tryProbe(probeRow)) { testJoin(table, map); } } while (table.nextMatching()) { testJoin(table, map); } table.close(); Assert.assertEquals("Wrong number of keys", numKeys, map.size()); for (Map.Entry<Integer, Long> entry : map.entrySet()) { long val = entry.getValue(); int key = entry.getKey(); Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, (key == repeatedValue1 || key == repeatedValue2) ? (probeValsPerKey + repeatedValueCountProbe) * (buildValsPerKey + repeatedValueCountBuild) : probeValsPerKey * buildValsPerKey, val); } // ---------------------------------------------------------------------------------------- table.free(); }
Example 18
Source File: ExternalSortITCase.java From flink with Apache License 2.0 | 4 votes |
@Test public void testSpillingSortWithIntermediateMerge() { try { // amount of pairs final int PAIRS = 10000000; // comparator final TypeComparator<Integer> keyComparator = new IntComparator(true); final TestData.TupleGenerator generator = new TestData.TupleGenerator(SEED, KEY_MAX, VALUE_LENGTH, KeyMode.RANDOM, ValueMode.FIX_LENGTH); final MutableObjectIterator<Tuple2<Integer, String>> source = new TestData.TupleGeneratorIterator(generator, PAIRS); // merge iterator LOG.debug("Initializing sortmerger..."); Sorter<Tuple2<Integer, String>> merger = new UnilateralSortMerger<>(this.memoryManager, this.ioManager, source, this.parentTask, this.pactRecordSerializer, this.pactRecordComparator, (double)64/78, 16, 0.7f, true /*use large record handler*/, false); // emit data LOG.debug("Emitting data..."); // check order MutableObjectIterator<Tuple2<Integer, String>> iterator = merger.getIterator(); LOG.debug("Checking results..."); int pairsRead = 1; int nextStep = PAIRS / 20; Tuple2<Integer, String> rec1 = new Tuple2<>(); Tuple2<Integer, String> rec2 = new Tuple2<>(); Assert.assertTrue((rec1 = iterator.next(rec1)) != null); while ((rec2 = iterator.next(rec2)) != null) { pairsRead++; Assert.assertTrue(keyComparator.compare(rec1.f0, rec2.f0) <= 0); Tuple2<Integer, String> tmp = rec1; rec1 = rec2; rec2 = tmp; // log if (pairsRead == nextStep) { nextStep += PAIRS / 20; } } Assert.assertEquals("Not all pairs were read back in.", PAIRS, pairsRead); merger.close(); testSuccess = true; } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
Example 19
Source File: HashTableITCase.java From flink with Apache License 2.0 | 4 votes |
@Test public void testSpillingHashJoinOneRecursionPerformance() throws IOException { final int NUM_KEYS = 1000000; final int BUILD_VALS_PER_KEY = 3; final int PROBE_VALS_PER_KEY = 10; // create a build input that gives 3 million pairs with 3 values sharing the same key MutableObjectIterator<Record> buildInput = new UniformRecordGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false); // create a probe input that gives 10 million pairs with 10 values sharing a key MutableObjectIterator<Record> probeInput = new UniformRecordGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true); // allocate the memory for the HashTable List<MemorySegment> memSegments; try { memSegments = this.memManager.allocatePages(MEM_OWNER, 896); } catch (MemoryAllocationException maex) { fail("Memory for the Join could not be provided."); return; } // ---------------------------------------------------------------------------------------- final MutableHashTable<Record, Record> join = new MutableHashTable<Record, Record>( this.recordBuildSideAccesssor, this.recordProbeSideAccesssor, this.recordBuildSideComparator, this.recordProbeSideComparator, this.pactRecordComparator, memSegments, ioManager); join.open(buildInput, probeInput); final Record recordReuse = new Record(); int numRecordsInJoinResult = 0; while (join.nextRecord()) { MutableObjectIterator<Record> buildSide = join.getBuildSideIterator(); while (buildSide.next(recordReuse) != null) { numRecordsInJoinResult++; } } Assert.assertEquals("Wrong number of records in join result.", NUM_KEYS * BUILD_VALS_PER_KEY * PROBE_VALS_PER_KEY, numRecordsInJoinResult); join.close(); // ---------------------------------------------------------------------------------------- this.memManager.release(join.getFreedMemory()); }
Example 20
Source File: CombiningUnilateralSortMergerITCase.java From flink with Apache License 2.0 | 4 votes |
@Test public void testSortAndValidate() throws Exception { final Hashtable<Integer, Integer> countTable = new Hashtable<>(KEY_MAX); for (int i = 1; i <= KEY_MAX; i++) { countTable.put(i, 0); } // comparator final TypeComparator<Integer> keyComparator = new IntComparator(true); // reader TestData.MockTuple2Reader<Tuple2<Integer, String>> reader = TestData.getIntStringTupleReader(); // merge iterator LOG.debug("initializing sortmerger"); TestCountCombiner2 comb = new TestCountCombiner2(); Sorter<Tuple2<Integer, String>> merger = new CombiningUnilateralSortMerger<>(comb, this.memoryManager, this.ioManager, reader, this.parentTask, this.serializerFactory1, this.comparator1, 0.25, 2, 0.7f, true /* use large record handler */, false); // emit data LOG.debug("emitting data"); TestData.TupleGenerator generator = new TestData.TupleGenerator(SEED, KEY_MAX, VALUE_LENGTH, KeyMode.RANDOM, ValueMode.FIX_LENGTH); Tuple2<Integer, String> rec = new Tuple2<>(); for (int i = 0; i < NUM_PAIRS; i++) { Assert.assertTrue((rec = generator.next(rec)) != null); final Integer key = rec.f0; rec.setField("1", 1); reader.emit(rec); countTable.put(key, countTable.get(key) + 1); } reader.close(); // check order MutableObjectIterator<Tuple2<Integer, String>> iterator = merger.getIterator(); LOG.debug("checking results"); Tuple2<Integer, String> rec1 = new Tuple2<>(); Tuple2<Integer, String> rec2 = new Tuple2<>(); Assert.assertTrue((rec1 = iterator.next(rec1)) != null); countTable.put(rec1.f0, countTable.get(rec1.f0) - (Integer.parseInt(rec1.f1))); while ((rec2 = iterator.next(rec2)) != null) { int k1 = rec1.f0; int k2 = rec2.f0; Assert.assertTrue(keyComparator.compare(k1, k2) <= 0); countTable.put(k2, countTable.get(k2) - (Integer.parseInt(rec2.f1))); rec1 = rec2; } for (Integer cnt : countTable.values()) { Assert.assertTrue(cnt == 0); } merger.close(); // if the combiner was opened, it must have been closed Assert.assertTrue(comb.opened == comb.closed); }