org.apache.flink.runtime.operators.testutils.UnionIterator Java Exaples

Source File: ReOpenableHashTableITCase.java From flink with Apache License 2.0

5 votes

private MutableObjectIterator<Tuple2<Integer, Integer>> getProbeInput(final int numKeys,
																	  final int probeValsPerKey, final int repeatedValue1, final int repeatedValue2) {
	MutableObjectIterator<Tuple2<Integer, Integer>> probe1 = new UniformIntTupleGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<Tuple2<Integer, Integer>> probe2 = new TestData.ConstantIntIntTuplesIterator(repeatedValue1, 17, 5);
	MutableObjectIterator<Tuple2<Integer, Integer>> probe3 = new TestData.ConstantIntIntTuplesIterator(repeatedValue2, 23, 5);
	List<MutableObjectIterator<Tuple2<Integer, Integer>>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	return new UnionIterator<>(probes);
}

Source File: ReOpenableHashTableITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

private MutableObjectIterator<Tuple2<Integer, Integer>> getProbeInput(final int numKeys,
																	  final int probeValsPerKey, final int repeatedValue1, final int repeatedValue2) {
	MutableObjectIterator<Tuple2<Integer, Integer>> probe1 = new UniformIntTupleGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<Tuple2<Integer, Integer>> probe2 = new TestData.ConstantIntIntTuplesIterator(repeatedValue1, 17, 5);
	MutableObjectIterator<Tuple2<Integer, Integer>> probe3 = new TestData.ConstantIntIntTuplesIterator(repeatedValue2, 23, 5);
	List<MutableObjectIterator<Tuple2<Integer, Integer>>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	return new UnionIterator<>(probes);
}

Source File: ReOpenableHashTableITCase.java From flink with Apache License 2.0

5 votes

private MutableObjectIterator<Tuple2<Integer, Integer>> getProbeInput(final int numKeys,
																	  final int probeValsPerKey, final int repeatedValue1, final int repeatedValue2) {
	MutableObjectIterator<Tuple2<Integer, Integer>> probe1 = new UniformIntTupleGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<Tuple2<Integer, Integer>> probe2 = new TestData.ConstantIntIntTuplesIterator(repeatedValue1, 17, 5);
	MutableObjectIterator<Tuple2<Integer, Integer>> probe3 = new TestData.ConstantIntIntTuplesIterator(repeatedValue2, 23, 5);
	List<MutableObjectIterator<Tuple2<Integer, Integer>>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	return new UnionIterator<>(probes);
}

Source File: CombinerOversizedRecordsTest.java From flink with Apache License 2.0

4 votes

@Test
public void testOversizedRecordCombineTask() {
	try {
		final int keyCnt = 100;
		final int valCnt = 20;
		
		// create a long heavy string payload
		StringBuilder bld = new StringBuilder(10 * 1024 * 1024);
		Random rnd = new Random();
		
		for (int i = 0; i < 10000000; i++) {
			bld.append((char) (rnd.nextInt(26) + 'a'));
		}
		
		String longString = bld.toString();
		bld = null;

		// construct the input as a union of
		// 1) long string
		// 2) some random values
		// 3) long string
		// 4) random values
		// 5) long string
		
		// random values 1
		MutableObjectIterator<Tuple2<Integer, Integer>> gen1 = 
			new UniformIntTupleGenerator(keyCnt, valCnt, false);

		// random values 2
		MutableObjectIterator<Tuple2<Integer, Integer>> gen2 =
				new UniformIntTupleGenerator(keyCnt, valCnt, false);

		@SuppressWarnings("unchecked")
		MutableObjectIterator<Tuple3<Integer, Integer, String>> input = 
				new UnionIterator<Tuple3<Integer, Integer, String>>(
						new SingleValueIterator<Tuple3<Integer, Integer, String>>(new Tuple3<Integer, Integer, String>(-1, -1, longString)),
						new StringIteratorDecorator(gen1),
						new SingleValueIterator<Tuple3<Integer, Integer, String>>(new Tuple3<Integer, Integer, String>(-1, -1, longString)),
						new StringIteratorDecorator(gen2),
						new SingleValueIterator<Tuple3<Integer, Integer, String>>(new Tuple3<Integer, Integer, String>(-1, -1, longString)));
		
		setInput(input, serializer);
		addDriverComparator(this.comparator);
		addDriverComparator(this.comparator);
		setOutput(this.outList, this.outSerializer);

		getTaskConfig().setDriverStrategy(DriverStrategy.SORTED_GROUP_COMBINE);
		getTaskConfig().setRelativeMemoryDriver(combine_frac);
		getTaskConfig().setFilehandlesDriver(2);

		GroupReduceCombineDriver<Tuple3<Integer, Integer, String>, Tuple3<Integer, Double, String>> testTask = 
				new GroupReduceCombineDriver<Tuple3<Integer, Integer, String>, Tuple3<Integer, Double, String>>();
		
		testDriver(testTask, TestCombiner.class);

		assertEquals(3, testTask.getOversizedRecordCount());
		assertTrue(keyCnt + 3 == outList.size() || 2*keyCnt + 3 == outList.size());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: NonReusingHashJoinIteratorITCase.java From flink with Apache License 2.0

4 votes

@Test
public void testBuildFirstWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		NonReusingBuildFirstHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
				new NonReusingBuildFirstHashJoinIterator<>(
					input1, input2, this.recordSerializer, this.record1Comparator, 
					this.recordSerializer, this.record2Comparator, this.recordPairComparator,
					this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);

		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}

Source File: NonReusingHashJoinIteratorITCase.java From flink with Apache License 2.0

4 votes

@Test
public void testBuildSecondWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		NonReusingBuildSecondHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new NonReusingBuildSecondHashJoinIterator<>(
				input1, input2, this.recordSerializer, this.record1Comparator, 
				this.recordSerializer, this.record2Comparator, this.recordPairComparator,
				this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);
		
		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}

Source File: LongHashTableTest.java From flink with Apache License 2.0

4 votes

@Test
public void testSpillingHashJoinWithMassiveCollisions() throws IOException {
	// the following two values are known to have a hash-code collision on the initial level.
	// we use them to make sure one partition grows over-proportionally large
	final int repeatedValue1 = 40559;
	final int repeatedValue2 = 92882;
	final int repeatedValueCountBuild = 200000;
	final int repeatedValueCountProbe = 5;

	final int numKeys = 1000000;
	final int buildValsPerKey = 3;
	final int probeValsPerKey = 10;

	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<BinaryRowData> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
	MutableObjectIterator<BinaryRowData> build2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCountBuild);
	MutableObjectIterator<BinaryRowData> build3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCountBuild);
	List<MutableObjectIterator<BinaryRowData>> builds = new ArrayList<>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<BinaryRowData> buildInput = new UnionIterator<>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<BinaryRowData> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<BinaryRowData> probe2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, 5);
	MutableObjectIterator<BinaryRowData> probe3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, 5);
	List<MutableObjectIterator<BinaryRowData>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<BinaryRowData> probeInput = new UnionIterator<>(probes);

	// create the map for validating the results
	HashMap<Integer, Long> map = new HashMap<>(numKeys);

	final MyHashTable table = new MyHashTable(896 * PAGE_SIZE);

	BinaryRowData buildRow = buildSideSerializer.createInstance();
	while ((buildRow = buildInput.next(buildRow)) != null) {
		table.putBuildRow(buildRow);
	}
	table.endBuild();

	BinaryRowData probeRow = probeSideSerializer.createInstance();
	while ((probeRow = probeInput.next(probeRow)) != null) {
		if (table.tryProbe(probeRow)) {
			testJoin(table, map);
		}
	}

	while (table.nextMatching()) {
		testJoin(table, map);
	}

	table.close();

	Assert.assertEquals("Wrong number of keys", numKeys, map.size());
	for (Map.Entry<Integer, Long> entry : map.entrySet()) {
		long val = entry.getValue();
		int key = entry.getKey();

		Assert.assertEquals("Wrong number of values in per-key cross product for key " + key,
				(key == repeatedValue1 || key == repeatedValue2) ?
						(probeValsPerKey + repeatedValueCountProbe) * (buildValsPerKey + repeatedValueCountBuild) :
						probeValsPerKey * buildValsPerKey, val);
	}

	// ----------------------------------------------------------------------------------------

	table.free();
}

Source File: LongHashTableTest.java From flink with Apache License 2.0

4 votes

@Test
public void testSpillingHashJoinWithTwoRecursions() throws IOException {
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int repeatedValue1 = 40559;
	final int repeatedValue2 = 92882;
	final int repeatedValueCountBuild = 200000;
	final int repeatedValueCountProbe = 5;

	final int numKeys = 1000000;
	final int buildValsPerKey = 3;
	final int probeValsPerKey = 10;

	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<BinaryRowData> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
	MutableObjectIterator<BinaryRowData> build2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCountBuild);
	MutableObjectIterator<BinaryRowData> build3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCountBuild);
	List<MutableObjectIterator<BinaryRowData>> builds = new ArrayList<>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<BinaryRowData> buildInput = new UnionIterator<>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<BinaryRowData> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<BinaryRowData> probe2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, 5);
	MutableObjectIterator<BinaryRowData> probe3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, 5);
	List<MutableObjectIterator<BinaryRowData>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<BinaryRowData> probeInput = new UnionIterator<>(probes);

	// create the map for validating the results
	HashMap<Integer, Long> map = new HashMap<>(numKeys);

	final MyHashTable table = new MyHashTable(896 * PAGE_SIZE);

	BinaryRowData buildRow = buildSideSerializer.createInstance();
	while ((buildRow = buildInput.next(buildRow)) != null) {
		table.putBuildRow(buildRow);
	}
	table.endBuild();

	BinaryRowData probeRow = probeSideSerializer.createInstance();
	while ((probeRow = probeInput.next(probeRow)) != null) {
		if (table.tryProbe(probeRow)) {
			testJoin(table, map);
		}
	}

	while (table.nextMatching()) {
		testJoin(table, map);
	}

	table.close();

	Assert.assertEquals("Wrong number of keys", numKeys, map.size());
	for (Map.Entry<Integer, Long> entry : map.entrySet()) {
		long val = entry.getValue();
		int key = entry.getKey();

		Assert.assertEquals("Wrong number of values in per-key cross product for key " + key,
				(key == repeatedValue1 || key == repeatedValue2) ?
						(probeValsPerKey + repeatedValueCountProbe) * (buildValsPerKey + repeatedValueCountBuild) :
						probeValsPerKey * buildValsPerKey, val);
	}

	// ----------------------------------------------------------------------------------------

	table.free();
}

Source File: LongHashTableTest.java From flink with Apache License 2.0

4 votes

@Test
public void testFailingHashJoinTooManyRecursions() throws IOException {
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int repeatedValue1 = 40559;
	final int repeatedValue2 = 92882;
	final int repeatedValueCount = 3000000;

	final int numKeys = 1000000;
	final int buildValsPerKey = 3;
	final int probeValsPerKey = 10;

	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<BinaryRowData> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
	MutableObjectIterator<BinaryRowData> build2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCount);
	MutableObjectIterator<BinaryRowData> build3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCount);
	List<MutableObjectIterator<BinaryRowData>> builds = new ArrayList<>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<BinaryRowData> buildInput = new UnionIterator<>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<BinaryRowData> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<BinaryRowData> probe2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCount);
	MutableObjectIterator<BinaryRowData> probe3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCount);
	List<MutableObjectIterator<BinaryRowData>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<BinaryRowData> probeInput = new UnionIterator<>(probes);
	final MyHashTable table = new MyHashTable(896 * PAGE_SIZE);

	try {
		join(table, buildInput, probeInput);
		fail("Hash Join must have failed due to too many recursions.");
	} catch (Exception ex) {
		// expected
	}

	table.close();

	// ----------------------------------------------------------------------------------------

	table.free();
}

Source File: BinaryHashTableTest.java From flink with Apache License 2.0

4 votes

@Test
public void testSpillingHashJoinWithMassiveCollisions() throws IOException {
	// the following two values are known to have a hash-code collision on the initial level.
	// we use them to make sure one partition grows over-proportionally large
	final int repeatedValue1 = 40559;
	final int repeatedValue2 = 92882;
	final int repeatedValueCountBuild = 200000;
	final int repeatedValueCountProbe = 5;

	final int numKeys = 1000000;
	final int buildValsPerKey = 3;
	final int probeValsPerKey = 10;

	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<BinaryRowData> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
	MutableObjectIterator<BinaryRowData> build2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCountBuild);
	MutableObjectIterator<BinaryRowData> build3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCountBuild);
	List<MutableObjectIterator<BinaryRowData>> builds = new ArrayList<>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<BinaryRowData> buildInput = new UnionIterator<>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<BinaryRowData> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<BinaryRowData> probe2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, 5);
	MutableObjectIterator<BinaryRowData> probe3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, 5);
	List<MutableObjectIterator<BinaryRowData>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<BinaryRowData> probeInput = new UnionIterator<>(probes);

	// create the map for validating the results
	HashMap<Integer, Long> map = new HashMap<>(numKeys);
	MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(896 * PAGE_SIZE).build();
	// ----------------------------------------------------------------------------------------

	final BinaryHashTable table = newBinaryHashTable(
			this.buildSideSerializer, this.probeSideSerializer,
			new MyProjection(), new MyProjection(), memManager,
			896 * PAGE_SIZE, ioManager);

	final BinaryRowData recordReuse = new BinaryRowData(2);

	BinaryRowData buildRow = buildSideSerializer.createInstance();
	while ((buildRow = buildInput.next(buildRow)) != null) {
		table.putBuildRow(buildRow);
	}
	table.endBuild();

	BinaryRowData probeRow = probeSideSerializer.createInstance();
	while ((probeRow = probeInput.next(probeRow)) != null) {
		if (table.tryProbe(probeRow)){
			testJoin(table, map);
		}
	}

	while (table.nextMatching()){
		testJoin(table, map);
	}

	table.close();

	Assert.assertEquals("Wrong number of keys", numKeys, map.size());
	for (Map.Entry<Integer, Long> entry : map.entrySet()) {
		long val = entry.getValue();
		int key = entry.getKey();

		Assert.assertEquals("Wrong number of values in per-key cross product for key " + key,
				(key == repeatedValue1 || key == repeatedValue2) ?
						(probeValsPerKey + repeatedValueCountProbe) * (buildValsPerKey + repeatedValueCountBuild) :
						probeValsPerKey * buildValsPerKey, val);
	}

	// ----------------------------------------------------------------------------------------

	table.free();
}

Source File: BinaryHashTableTest.java From flink with Apache License 2.0

4 votes

@Test
public void testSpillingHashJoinWithTwoRecursions() throws IOException {
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int repeatedValue1 = 40559;
	final int repeatedValue2 = 92882;
	final int repeatedValueCountBuild = 200000;
	final int repeatedValueCountProbe = 5;

	final int numKeys = 1000000;
	final int buildValsPerKey = 3;
	final int probeValsPerKey = 10;

	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<BinaryRowData> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
	MutableObjectIterator<BinaryRowData> build2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCountBuild);
	MutableObjectIterator<BinaryRowData> build3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCountBuild);
	List<MutableObjectIterator<BinaryRowData>> builds = new ArrayList<>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<BinaryRowData> buildInput = new UnionIterator<>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<BinaryRowData> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<BinaryRowData> probe2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, 5);
	MutableObjectIterator<BinaryRowData> probe3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, 5);
	List<MutableObjectIterator<BinaryRowData>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<BinaryRowData> probeInput = new UnionIterator<>(probes);

	// create the map for validating the results
	HashMap<Integer, Long> map = new HashMap<>(numKeys);

	// ----------------------------------------------------------------------------------------
	MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(896 * PAGE_SIZE).build();
	final BinaryHashTable table = newBinaryHashTable(
			this.buildSideSerializer, this.probeSideSerializer,
			new MyProjection(), new MyProjection(), memManager,
			896 * PAGE_SIZE, ioManager);
	final BinaryRowData recordReuse = new BinaryRowData(2);

	BinaryRowData buildRow = buildSideSerializer.createInstance();
	while ((buildRow = buildInput.next(buildRow)) != null) {
		table.putBuildRow(buildRow);
	}
	table.endBuild();

	BinaryRowData probeRow = probeSideSerializer.createInstance();
	while ((probeRow = probeInput.next(probeRow)) != null) {
		if (table.tryProbe(probeRow)){
			testJoin(table, map);
		}
	}

	while (table.nextMatching()){
		testJoin(table, map);
	}

	table.close();

	Assert.assertEquals("Wrong number of keys", numKeys, map.size());
	for (Map.Entry<Integer, Long> entry : map.entrySet()) {
		long val = entry.getValue();
		int key = entry.getKey();

		Assert.assertEquals("Wrong number of values in per-key cross product for key " + key,
				(key == repeatedValue1 || key == repeatedValue2) ?
						(probeValsPerKey + repeatedValueCountProbe) * (buildValsPerKey + repeatedValueCountBuild) :
						probeValsPerKey * buildValsPerKey, val);
	}

	// ----------------------------------------------------------------------------------------

	table.free();
}

Source File: BinaryHashTableTest.java From flink with Apache License 2.0

4 votes

@Test
public void testFailingHashJoinTooManyRecursions() throws IOException {
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int repeatedValue1 = 40559;
	final int repeatedValue2 = 92882;
	final int repeatedValueCount = 3000000;

	final int numKeys = 1000000;
	final int buildValsPerKey = 3;
	final int probeValsPerKey = 10;

	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<BinaryRowData> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
	MutableObjectIterator<BinaryRowData> build2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCount);
	MutableObjectIterator<BinaryRowData> build3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCount);
	List<MutableObjectIterator<BinaryRowData>> builds = new ArrayList<>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<BinaryRowData> buildInput = new UnionIterator<>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<BinaryRowData> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<BinaryRowData> probe2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCount);
	MutableObjectIterator<BinaryRowData> probe3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCount);
	List<MutableObjectIterator<BinaryRowData>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<BinaryRowData> probeInput = new UnionIterator<>(probes);
	// ----------------------------------------------------------------------------------------
	MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(896 * PAGE_SIZE).build();
	final BinaryHashTable table = newBinaryHashTable(
			this.buildSideSerializer, this.probeSideSerializer,
			new MyProjection(), new MyProjection(), memManager,
			896 * PAGE_SIZE, ioManager);

	try {
		join(table, buildInput, probeInput);
		fail("Hash Join must have failed due to too many recursions.");
	} catch (Exception ex) {
		// expected
	}

	table.close();

	// ----------------------------------------------------------------------------------------

	table.free();
}

Source File: HashTableITCase.java From flink with Apache License 2.0

4 votes

@Test
public void testFailingHashJoinTooManyRecursions() throws IOException
{
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int REPEATED_VALUE_1 = 40559;
	final int REPEATED_VALUE_2 = 92882;
	final int REPEATED_VALUE_COUNT = 3000000; 
	
	final int NUM_KEYS = 1000000;
	final int BUILD_VALS_PER_KEY = 3;
	final int PROBE_VALS_PER_KEY = 10;
	
	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<Record> build1 = new UniformRecordGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false);
	MutableObjectIterator<Record> build2 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<Record> build3 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<Record>> builds = new ArrayList<MutableObjectIterator<Record>>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<Record> buildInput = new UnionIterator<Record>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<Record> probe1 = new UniformRecordGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true);
	MutableObjectIterator<Record> probe2 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<Record> probe3 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<Record>> probes = new ArrayList<MutableObjectIterator<Record>>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<Record> probeInput = new UnionIterator<Record>(probes);
	
	// allocate the memory for the HashTable
	List<MemorySegment> memSegments;
	try {
		memSegments = this.memManager.allocatePages(MEM_OWNER, 896);
	}
	catch (MemoryAllocationException maex) {
		fail("Memory for the Join could not be provided.");
		return;
	}
	
	// ----------------------------------------------------------------------------------------
	
	final MutableHashTable<Record, Record> join = new MutableHashTable<Record, Record>(
			this.recordBuildSideAccesssor, this.recordProbeSideAccesssor, 
			this.recordBuildSideComparator, this.recordProbeSideComparator, this.pactRecordComparator,
			memSegments, ioManager);
	join.open(buildInput, probeInput);
	
	final Record recordReuse = new Record();

	try {
		while (join.nextRecord()) {	
			MutableObjectIterator<Record> buildSide = join.getBuildSideIterator();
			if (buildSide.next(recordReuse) == null) {
				fail("No build side values found for a probe key.");
			}
			while (buildSide.next(recordReuse) != null);
		}
		
		fail("Hash Join must have failed due to too many recursions.");
	}
	catch (Exception ex) {
		// expected
	}
	
	join.close();
	
	// ----------------------------------------------------------------------------------------
	
	this.memManager.release(join.getFreedMemory());
}

Source File: ReusingHashJoinIteratorITCase.java From flink with Apache License 2.0

4 votes

@Test
public void testBuildFirstWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TestData.TupleGenerator generator1 = new TestData.TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TestData.TupleGenerator generator2 = new TestData.TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final FlatJoinFunction matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		ReusingBuildFirstHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
				new ReusingBuildFirstHashJoinIterator<>(
					input1, input2, this.recordSerializer, this.record1Comparator, 
					this.recordSerializer, this.record2Comparator, this.recordPairComparator,
					this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);

		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}

Source File: ReusingHashJoinIteratorITCase.java From flink with Apache License 2.0

4 votes

@Test
public void testBuildSecondWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TestData.TupleGenerator generator1 = new TestData.TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TestData.TupleGenerator generator2 = new TestData.TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final FlatJoinFunction matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		ReusingBuildSecondHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new ReusingBuildSecondHashJoinIterator<>(
				input1, input2, this.recordSerializer, this.record1Comparator, 
				this.recordSerializer, this.record2Comparator, this.recordPairComparator,
				this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);
		
		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}

Source File: HashTableITCase.java From flink with Apache License 2.0

4 votes

@Test
public void testFailingHashJoinTooManyRecursions() throws IOException
{
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int REPEATED_VALUE_1 = 40559;
	final int REPEATED_VALUE_2 = 92882;
	final int REPEATED_VALUE_COUNT = 3000000; 
	
	final int NUM_KEYS = 1000000;
	final int BUILD_VALS_PER_KEY = 3;
	final int PROBE_VALS_PER_KEY = 10;
	
	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<Record> build1 = new UniformRecordGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false);
	MutableObjectIterator<Record> build2 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<Record> build3 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<Record>> builds = new ArrayList<MutableObjectIterator<Record>>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<Record> buildInput = new UnionIterator<Record>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<Record> probe1 = new UniformRecordGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true);
	MutableObjectIterator<Record> probe2 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<Record> probe3 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<Record>> probes = new ArrayList<MutableObjectIterator<Record>>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<Record> probeInput = new UnionIterator<Record>(probes);
	
	// allocate the memory for the HashTable
	List<MemorySegment> memSegments;
	try {
		memSegments = this.memManager.allocatePages(MEM_OWNER, 896);
	}
	catch (MemoryAllocationException maex) {
		fail("Memory for the Join could not be provided.");
		return;
	}
	
	// ----------------------------------------------------------------------------------------
	
	final MutableHashTable<Record, Record> join = new MutableHashTable<Record, Record>(
			this.recordBuildSideAccesssor, this.recordProbeSideAccesssor, 
			this.recordBuildSideComparator, this.recordProbeSideComparator, this.pactRecordComparator,
			memSegments, ioManager);
	join.open(buildInput, probeInput);
	
	final Record recordReuse = new Record();

	try {
		while (join.nextRecord()) {	
			MutableObjectIterator<Record> buildSide = join.getBuildSideIterator();
			if (buildSide.next(recordReuse) == null) {
				fail("No build side values found for a probe key.");
			}
			while (buildSide.next(recordReuse) != null);
		}
		
		fail("Hash Join must have failed due to too many recursions.");
	}
	catch (Exception ex) {
		// expected
	}
	
	join.close();
	
	// ----------------------------------------------------------------------------------------
	
	this.memManager.release(join.getFreedMemory());
}

Source File: HashTableITCase.java From flink with Apache License 2.0

4 votes

@Test
public void testFailingHashJoinTooManyRecursionsIntPair() throws IOException
{
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int REPEATED_VALUE_1 = 40559;
	final int REPEATED_VALUE_2 = 92882;
	final int REPEATED_VALUE_COUNT = 3000000; 
	
	final int NUM_KEYS = 1000000;
	final int BUILD_VALS_PER_KEY = 3;
	final int PROBE_VALS_PER_KEY = 10;
	
	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<IntPair> build1 = new UniformIntPairGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false);
	MutableObjectIterator<IntPair> build2 = new ConstantsIntPairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<IntPair> build3 = new ConstantsIntPairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<IntPair>> builds = new ArrayList<MutableObjectIterator<IntPair>>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<IntPair> buildInput = new UnionIterator<IntPair>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<IntPair> probe1 = new UniformIntPairGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true);
	MutableObjectIterator<IntPair> probe2 = new ConstantsIntPairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<IntPair> probe3 = new ConstantsIntPairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<IntPair>> probes = new ArrayList<MutableObjectIterator<IntPair>>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<IntPair> probeInput = new UnionIterator<IntPair>(probes);
	
	// allocate the memory for the HashTable
	List<MemorySegment> memSegments;
	try {
		memSegments = this.memManager.allocatePages(MEM_OWNER, 896);
	}
	catch (MemoryAllocationException maex) {
		fail("Memory for the Join could not be provided.");
		return;
	}
	
	// ----------------------------------------------------------------------------------------
	
	final MutableHashTable<IntPair, IntPair> join = new MutableHashTable<IntPair, IntPair>(
			this.pairBuildSideAccesssor, this.pairProbeSideAccesssor, 
			this.pairBuildSideComparator, this.pairProbeSideComparator, this.pairComparator,
			memSegments, ioManager);
	join.open(buildInput, probeInput);
	
	final IntPair recordReuse = new IntPair();

	try {
		while (join.nextRecord())
		{	
			MutableObjectIterator<IntPair> buildSide = join.getBuildSideIterator();
			if (buildSide.next(recordReuse) == null) {
				fail("No build side values found for a probe key.");
			}
			while (buildSide.next(recordReuse) != null);
		}
		
		fail("Hash Join must have failed due to too many recursions.");
	}
	catch (Exception ex) {
		// expected
	}
	
	join.close();
	
	// ----------------------------------------------------------------------------------------
	
	this.memManager.release(join.getFreedMemory());
}

Source File: NonReusingHashJoinIteratorITCase.java From flink with Apache License 2.0

4 votes

@Test
public void testBuildFirstWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		NonReusingBuildFirstHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
				new NonReusingBuildFirstHashJoinIterator<>(
					input1, input2, this.recordSerializer, this.record1Comparator, 
					this.recordSerializer, this.record2Comparator, this.recordPairComparator,
					this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);

		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}

Source File: NonReusingHashJoinIteratorITCase.java From flink with Apache License 2.0

4 votes

@Test
public void testBuildSecondWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		NonReusingBuildSecondHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new NonReusingBuildSecondHashJoinIterator<>(
				input1, input2, this.recordSerializer, this.record1Comparator, 
				this.recordSerializer, this.record2Comparator, this.recordPairComparator,
				this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);
		
		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}

Source File: LongHashTableTest.java From flink with Apache License 2.0

4 votes

@Test
public void testFailingHashJoinTooManyRecursions() throws IOException {
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int repeatedValue1 = 40559;
	final int repeatedValue2 = 92882;
	final int repeatedValueCount = 3000000;

	final int numKeys = 1000000;
	final int buildValsPerKey = 3;
	final int probeValsPerKey = 10;

	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<BinaryRow> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
	MutableObjectIterator<BinaryRow> build2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCount);
	MutableObjectIterator<BinaryRow> build3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCount);
	List<MutableObjectIterator<BinaryRow>> builds = new ArrayList<>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<BinaryRow> buildInput = new UnionIterator<>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<BinaryRow> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<BinaryRow> probe2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCount);
	MutableObjectIterator<BinaryRow> probe3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCount);
	List<MutableObjectIterator<BinaryRow>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<BinaryRow> probeInput = new UnionIterator<>(probes);
	final MyHashTable table = new MyHashTable(896 * PAGE_SIZE);

	try {
		join(table, buildInput, probeInput);
		fail("Hash Join must have failed due to too many recursions.");
	} catch (Exception ex) {
		// expected
	}

	table.close();

	// ----------------------------------------------------------------------------------------

	table.free();
}

Source File: ReusingHashJoinIteratorITCase.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void testBuildFirstWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TestData.TupleGenerator generator1 = new TestData.TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TestData.TupleGenerator generator2 = new TestData.TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final FlatJoinFunction matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		ReusingBuildFirstHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
				new ReusingBuildFirstHashJoinIterator<>(
					input1, input2, this.recordSerializer, this.record1Comparator, 
					this.recordSerializer, this.record2Comparator, this.recordPairComparator,
					this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);

		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}

Source File: ReusingHashJoinIteratorITCase.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void testBuildSecondWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TestData.TupleGenerator generator1 = new TestData.TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TestData.TupleGenerator generator2 = new TestData.TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final FlatJoinFunction matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		ReusingBuildSecondHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new ReusingBuildSecondHashJoinIterator<>(
				input1, input2, this.recordSerializer, this.record1Comparator, 
				this.recordSerializer, this.record2Comparator, this.recordPairComparator,
				this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);
		
		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}

Source File: HashTableITCase.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void testFailingHashJoinTooManyRecursions() throws IOException
{
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int REPEATED_VALUE_1 = 40559;
	final int REPEATED_VALUE_2 = 92882;
	final int REPEATED_VALUE_COUNT = 3000000; 
	
	final int NUM_KEYS = 1000000;
	final int BUILD_VALS_PER_KEY = 3;
	final int PROBE_VALS_PER_KEY = 10;
	
	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<Record> build1 = new UniformRecordGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false);
	MutableObjectIterator<Record> build2 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<Record> build3 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<Record>> builds = new ArrayList<MutableObjectIterator<Record>>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<Record> buildInput = new UnionIterator<Record>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<Record> probe1 = new UniformRecordGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true);
	MutableObjectIterator<Record> probe2 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<Record> probe3 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<Record>> probes = new ArrayList<MutableObjectIterator<Record>>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<Record> probeInput = new UnionIterator<Record>(probes);
	
	// allocate the memory for the HashTable
	List<MemorySegment> memSegments;
	try {
		memSegments = this.memManager.allocatePages(MEM_OWNER, 896);
	}
	catch (MemoryAllocationException maex) {
		fail("Memory for the Join could not be provided.");
		return;
	}
	
	// ----------------------------------------------------------------------------------------
	
	final MutableHashTable<Record, Record> join = new MutableHashTable<Record, Record>(
			this.recordBuildSideAccesssor, this.recordProbeSideAccesssor, 
			this.recordBuildSideComparator, this.recordProbeSideComparator, this.pactRecordComparator,
			memSegments, ioManager);
	join.open(buildInput, probeInput);
	
	final Record recordReuse = new Record();

	try {
		while (join.nextRecord()) {	
			MutableObjectIterator<Record> buildSide = join.getBuildSideIterator();
			if (buildSide.next(recordReuse) == null) {
				fail("No build side values found for a probe key.");
			}
			while (buildSide.next(recordReuse) != null);
		}
		
		fail("Hash Join must have failed due to too many recursions.");
	}
	catch (Exception ex) {
		// expected
	}
	
	join.close();
	
	// ----------------------------------------------------------------------------------------
	
	this.memManager.release(join.getFreedMemory());
}

Source File: HashTableITCase.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void testFailingHashJoinTooManyRecursionsIntPair() throws IOException
{
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int REPEATED_VALUE_1 = 40559;
	final int REPEATED_VALUE_2 = 92882;
	final int REPEATED_VALUE_COUNT = 3000000; 
	
	final int NUM_KEYS = 1000000;
	final int BUILD_VALS_PER_KEY = 3;
	final int PROBE_VALS_PER_KEY = 10;
	
	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<IntPair> build1 = new UniformIntPairGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false);
	MutableObjectIterator<IntPair> build2 = new ConstantsIntPairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<IntPair> build3 = new ConstantsIntPairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<IntPair>> builds = new ArrayList<MutableObjectIterator<IntPair>>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<IntPair> buildInput = new UnionIterator<IntPair>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<IntPair> probe1 = new UniformIntPairGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true);
	MutableObjectIterator<IntPair> probe2 = new ConstantsIntPairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<IntPair> probe3 = new ConstantsIntPairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<IntPair>> probes = new ArrayList<MutableObjectIterator<IntPair>>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<IntPair> probeInput = new UnionIterator<IntPair>(probes);
	
	// allocate the memory for the HashTable
	List<MemorySegment> memSegments;
	try {
		memSegments = this.memManager.allocatePages(MEM_OWNER, 896);
	}
	catch (MemoryAllocationException maex) {
		fail("Memory for the Join could not be provided.");
		return;
	}
	
	// ----------------------------------------------------------------------------------------
	
	final MutableHashTable<IntPair, IntPair> join = new MutableHashTable<IntPair, IntPair>(
			this.pairBuildSideAccesssor, this.pairProbeSideAccesssor, 
			this.pairBuildSideComparator, this.pairProbeSideComparator, this.pairComparator,
			memSegments, ioManager);
	join.open(buildInput, probeInput);
	
	final IntPair recordReuse = new IntPair();

	try {
		while (join.nextRecord())
		{	
			MutableObjectIterator<IntPair> buildSide = join.getBuildSideIterator();
			if (buildSide.next(recordReuse) == null) {
				fail("No build side values found for a probe key.");
			}
			while (buildSide.next(recordReuse) != null);
		}
		
		fail("Hash Join must have failed due to too many recursions.");
	}
	catch (Exception ex) {
		// expected
	}
	
	join.close();
	
	// ----------------------------------------------------------------------------------------
	
	this.memManager.release(join.getFreedMemory());
}

Source File: NonReusingHashJoinIteratorITCase.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void testBuildFirstWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		NonReusingBuildFirstHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
				new NonReusingBuildFirstHashJoinIterator<>(
					input1, input2, this.recordSerializer, this.record1Comparator, 
					this.recordSerializer, this.record2Comparator, this.recordPairComparator,
					this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);

		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}

Source File: NonReusingHashJoinIteratorITCase.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void testBuildSecondWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		NonReusingBuildSecondHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new NonReusingBuildSecondHashJoinIterator<>(
				input1, input2, this.recordSerializer, this.record1Comparator, 
				this.recordSerializer, this.record2Comparator, this.recordPairComparator,
				this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);
		
		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}

Source File: LongHashTableTest.java From flink with Apache License 2.0

4 votes

@Test
public void testSpillingHashJoinWithMassiveCollisions() throws IOException {
	// the following two values are known to have a hash-code collision on the initial level.
	// we use them to make sure one partition grows over-proportionally large
	final int repeatedValue1 = 40559;
	final int repeatedValue2 = 92882;
	final int repeatedValueCountBuild = 200000;
	final int repeatedValueCountProbe = 5;

	final int numKeys = 1000000;
	final int buildValsPerKey = 3;
	final int probeValsPerKey = 10;

	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<BinaryRow> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
	MutableObjectIterator<BinaryRow> build2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCountBuild);
	MutableObjectIterator<BinaryRow> build3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCountBuild);
	List<MutableObjectIterator<BinaryRow>> builds = new ArrayList<>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<BinaryRow> buildInput = new UnionIterator<>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<BinaryRow> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<BinaryRow> probe2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, 5);
	MutableObjectIterator<BinaryRow> probe3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, 5);
	List<MutableObjectIterator<BinaryRow>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<BinaryRow> probeInput = new UnionIterator<>(probes);

	// create the map for validating the results
	HashMap<Integer, Long> map = new HashMap<>(numKeys);

	final MyHashTable table = new MyHashTable(896 * PAGE_SIZE);

	BinaryRow buildRow = buildSideSerializer.createInstance();
	while ((buildRow = buildInput.next(buildRow)) != null) {
		table.putBuildRow(buildRow);
	}
	table.endBuild();

	BinaryRow probeRow = probeSideSerializer.createInstance();
	while ((probeRow = probeInput.next(probeRow)) != null) {
		if (table.tryProbe(probeRow)) {
			testJoin(table, map);
		}
	}

	while (table.nextMatching()) {
		testJoin(table, map);
	}

	table.close();

	Assert.assertEquals("Wrong number of keys", numKeys, map.size());
	for (Map.Entry<Integer, Long> entry : map.entrySet()) {
		long val = entry.getValue();
		int key = entry.getKey();

		Assert.assertEquals("Wrong number of values in per-key cross product for key " + key,
				(key == repeatedValue1 || key == repeatedValue2) ?
						(probeValsPerKey + repeatedValueCountProbe) * (buildValsPerKey + repeatedValueCountBuild) :
						probeValsPerKey * buildValsPerKey, val);
	}

	// ----------------------------------------------------------------------------------------

	table.free();
}

Source File: LongHashTableTest.java From flink with Apache License 2.0

4 votes

@Test
public void testSpillingHashJoinWithTwoRecursions() throws IOException {
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int repeatedValue1 = 40559;
	final int repeatedValue2 = 92882;
	final int repeatedValueCountBuild = 200000;
	final int repeatedValueCountProbe = 5;

	final int numKeys = 1000000;
	final int buildValsPerKey = 3;
	final int probeValsPerKey = 10;

	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<BinaryRow> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
	MutableObjectIterator<BinaryRow> build2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCountBuild);
	MutableObjectIterator<BinaryRow> build3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCountBuild);
	List<MutableObjectIterator<BinaryRow>> builds = new ArrayList<>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<BinaryRow> buildInput = new UnionIterator<>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<BinaryRow> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<BinaryRow> probe2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, 5);
	MutableObjectIterator<BinaryRow> probe3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, 5);
	List<MutableObjectIterator<BinaryRow>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<BinaryRow> probeInput = new UnionIterator<>(probes);

	// create the map for validating the results
	HashMap<Integer, Long> map = new HashMap<>(numKeys);

	final MyHashTable table = new MyHashTable(896 * PAGE_SIZE);

	BinaryRow buildRow = buildSideSerializer.createInstance();
	while ((buildRow = buildInput.next(buildRow)) != null) {
		table.putBuildRow(buildRow);
	}
	table.endBuild();

	BinaryRow probeRow = probeSideSerializer.createInstance();
	while ((probeRow = probeInput.next(probeRow)) != null) {
		if (table.tryProbe(probeRow)) {
			testJoin(table, map);
		}
	}

	while (table.nextMatching()) {
		testJoin(table, map);
	}

	table.close();

	Assert.assertEquals("Wrong number of keys", numKeys, map.size());
	for (Map.Entry<Integer, Long> entry : map.entrySet()) {
		long val = entry.getValue();
		int key = entry.getKey();

		Assert.assertEquals("Wrong number of values in per-key cross product for key " + key,
				(key == repeatedValue1 || key == repeatedValue2) ?
						(probeValsPerKey + repeatedValueCountProbe) * (buildValsPerKey + repeatedValueCountBuild) :
						probeValsPerKey * buildValsPerKey, val);
	}

	// ----------------------------------------------------------------------------------------

	table.free();
}

Source File: HashTableITCase.java From flink with Apache License 2.0

4 votes

@Test
public void testFailingHashJoinTooManyRecursionsIntPair() throws IOException
{
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int REPEATED_VALUE_1 = 40559;
	final int REPEATED_VALUE_2 = 92882;
	final int REPEATED_VALUE_COUNT = 3000000; 
	
	final int NUM_KEYS = 1000000;
	final int BUILD_VALS_PER_KEY = 3;
	final int PROBE_VALS_PER_KEY = 10;
	
	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<IntPair> build1 = new UniformIntPairGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false);
	MutableObjectIterator<IntPair> build2 = new ConstantsIntPairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<IntPair> build3 = new ConstantsIntPairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<IntPair>> builds = new ArrayList<MutableObjectIterator<IntPair>>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<IntPair> buildInput = new UnionIterator<IntPair>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<IntPair> probe1 = new UniformIntPairGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true);
	MutableObjectIterator<IntPair> probe2 = new ConstantsIntPairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<IntPair> probe3 = new ConstantsIntPairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<IntPair>> probes = new ArrayList<MutableObjectIterator<IntPair>>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<IntPair> probeInput = new UnionIterator<IntPair>(probes);
	
	// allocate the memory for the HashTable
	List<MemorySegment> memSegments;
	try {
		memSegments = this.memManager.allocatePages(MEM_OWNER, 896);
	}
	catch (MemoryAllocationException maex) {
		fail("Memory for the Join could not be provided.");
		return;
	}
	
	// ----------------------------------------------------------------------------------------
	
	final MutableHashTable<IntPair, IntPair> join = new MutableHashTable<IntPair, IntPair>(
			this.pairBuildSideAccesssor, this.pairProbeSideAccesssor, 
			this.pairBuildSideComparator, this.pairProbeSideComparator, this.pairComparator,
			memSegments, ioManager);
	join.open(buildInput, probeInput);
	
	final IntPair recordReuse = new IntPair();

	try {
		while (join.nextRecord())
		{	
			MutableObjectIterator<IntPair> buildSide = join.getBuildSideIterator();
			if (buildSide.next(recordReuse) == null) {
				fail("No build side values found for a probe key.");
			}
			while (buildSide.next(recordReuse) != null);
		}
		
		fail("Hash Join must have failed due to too many recursions.");
	}
	catch (Exception ex) {
		// expected
	}
	
	join.close();
	
	// ----------------------------------------------------------------------------------------
	
	this.memManager.release(join.getFreedMemory());
}

Source File: BinaryHashTableTest.java From flink with Apache License 2.0

4 votes

@Test
public void testSpillingHashJoinWithMassiveCollisions() throws IOException {
	// the following two values are known to have a hash-code collision on the initial level.
	// we use them to make sure one partition grows over-proportionally large
	final int repeatedValue1 = 40559;
	final int repeatedValue2 = 92882;
	final int repeatedValueCountBuild = 200000;
	final int repeatedValueCountProbe = 5;

	final int numKeys = 1000000;
	final int buildValsPerKey = 3;
	final int probeValsPerKey = 10;

	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<BinaryRow> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
	MutableObjectIterator<BinaryRow> build2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCountBuild);
	MutableObjectIterator<BinaryRow> build3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCountBuild);
	List<MutableObjectIterator<BinaryRow>> builds = new ArrayList<>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<BinaryRow> buildInput = new UnionIterator<>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<BinaryRow> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<BinaryRow> probe2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, 5);
	MutableObjectIterator<BinaryRow> probe3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, 5);
	List<MutableObjectIterator<BinaryRow>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<BinaryRow> probeInput = new UnionIterator<>(probes);

	// create the map for validating the results
	HashMap<Integer, Long> map = new HashMap<>(numKeys);
	MemoryManager memManager = new MemoryManager(896 * PAGE_SIZE, 1);
	// ----------------------------------------------------------------------------------------

	final BinaryHashTable table = newBinaryHashTable(
			this.buildSideSerializer, this.probeSideSerializer,
			new MyProjection(), new MyProjection(), memManager,
			896 * PAGE_SIZE, ioManager);

	final BinaryRow recordReuse = new BinaryRow(2);

	BinaryRow buildRow = buildSideSerializer.createInstance();
	while ((buildRow = buildInput.next(buildRow)) != null) {
		table.putBuildRow(buildRow);
	}
	table.endBuild();

	BinaryRow probeRow = probeSideSerializer.createInstance();
	while ((probeRow = probeInput.next(probeRow)) != null) {
		if (table.tryProbe(probeRow)){
			testJoin(table, map);
		}
	}

	while (table.nextMatching()){
		testJoin(table, map);
	}

	table.close();

	Assert.assertEquals("Wrong number of keys", numKeys, map.size());
	for (Map.Entry<Integer, Long> entry : map.entrySet()) {
		long val = entry.getValue();
		int key = entry.getKey();

		Assert.assertEquals("Wrong number of values in per-key cross product for key " + key,
				(key == repeatedValue1 || key == repeatedValue2) ?
						(probeValsPerKey + repeatedValueCountProbe) * (buildValsPerKey + repeatedValueCountBuild) :
						probeValsPerKey * buildValsPerKey, val);
	}

	// ----------------------------------------------------------------------------------------

	table.free();
}

org.apache.flink.runtime.operators.testutils.UnionIterator Java Examples