Java Code Examples for org.apache.hadoop.util.hash.Hash#MURMUR_HASH
The following examples show how to use
org.apache.hadoop.util.hash.Hash#MURMUR_HASH .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestBloomFilters.java From hadoop with Apache License 2.0 | 6 votes |
@Test public void testFiltersWithMurmurHash() { int hashId = Hash.MURMUR_HASH; BloomFilterCommonTester.of(hashId, numInsertions) .withFilterInstance(new BloomFilter(bitSize, hashFunctionNumber, hashId)) .withFilterInstance(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId)) .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY, BloomFilterTestStrategy.ADD_KEYS_STRATEGY, BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY, BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY, BloomFilterTestStrategy.WRITE_READ_STRATEGY, BloomFilterTestStrategy.FILTER_OR_STRATEGY, BloomFilterTestStrategy.FILTER_AND_STRATEGY, BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test(); }
Example 2
Source File: BloomFilterCommonTester.java From hadoop with Apache License 2.0 | 6 votes |
private BloomFilterCommonTester(int hashId, int numInsertions) { this.hashType = hashId; this.numInsertions = numInsertions; this.preAssertionHelper = new PreAssertionHelper() { @Override public ImmutableSet<Integer> falsePositives(int hashId) { switch (hashId) { case Hash.JENKINS_HASH: { // // false pos for odd and event under 1000 return ImmutableSet.of(99, 963); } case Hash.MURMUR_HASH: { // false pos for odd and event under 1000 return ImmutableSet.of(769, 772, 810, 874); } default: { // fail fast with unknown hash error !!! Assert.assertFalse("unknown hash error", true); return ImmutableSet.of(); } } } }; }
Example 3
Source File: TestBloomFilters.java From big-c with Apache License 2.0 | 6 votes |
@Test public void testFiltersWithMurmurHash() { int hashId = Hash.MURMUR_HASH; BloomFilterCommonTester.of(hashId, numInsertions) .withFilterInstance(new BloomFilter(bitSize, hashFunctionNumber, hashId)) .withFilterInstance(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId)) .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY, BloomFilterTestStrategy.ADD_KEYS_STRATEGY, BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY, BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY, BloomFilterTestStrategy.WRITE_READ_STRATEGY, BloomFilterTestStrategy.FILTER_OR_STRATEGY, BloomFilterTestStrategy.FILTER_AND_STRATEGY, BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test(); }
Example 4
Source File: BloomFilterCommonTester.java From big-c with Apache License 2.0 | 6 votes |
private BloomFilterCommonTester(int hashId, int numInsertions) { this.hashType = hashId; this.numInsertions = numInsertions; this.preAssertionHelper = new PreAssertionHelper() { @Override public ImmutableSet<Integer> falsePositives(int hashId) { switch (hashId) { case Hash.JENKINS_HASH: { // // false pos for odd and event under 1000 return ImmutableSet.of(99, 963); } case Hash.MURMUR_HASH: { // false pos for odd and event under 1000 return ImmutableSet.of(769, 772, 810, 874); } default: { // fail fast with unknown hash error !!! Assert.assertFalse("unknown hash error", true); return ImmutableSet.of(); } } } }; }
Example 5
Source File: BloomFilterUtils.java From incubator-hivemall with Apache License 2.0 | 5 votes |
@Nonnull public static BloomFilter newBloomFilter(@Nonnegative final int expectedNumberOfElements, @Nonnegative final float errorRate, @Nonnegative final int nbHash) { // vector size should be `-kn / (ln(1 - c^(1/k)))` bits for // single key, where `k` is the number of hash functions, // `n` is the number of keys and `c` is the desired max error rate. int vectorSize = (int) Math.ceil((-nbHash * expectedNumberOfElements) / Math.log(1.d - Math.pow(errorRate, 1.d / nbHash))); return new BloomFilter(vectorSize, nbHash, Hash.MURMUR_HASH); }
Example 6
Source File: BloomFilterUtils.java From incubator-hivemall with Apache License 2.0 | 5 votes |
@Nonnull public static DynamicBloomFilter newDynamicBloomFilter( @Nonnegative final int expectedNumberOfElements, @Nonnegative final float errorRate, @Nonnegative final int nbHash) { int vectorSize = (int) Math.ceil((-nbHash * expectedNumberOfElements) / Math.log(1.d - Math.pow(errorRate, 1.d / nbHash))); return new DynamicBloomFilter(vectorSize, nbHash, Hash.MURMUR_HASH, expectedNumberOfElements); }
Example 7
Source File: BloomFilterFactory.java From hudi with Apache License 2.0 | 5 votes |
/** * Creates a new {@link BloomFilter} with the given args. * * @param numEntries total number of entries * @param errorRate max allowed error rate * @param bloomFilterTypeCode bloom filter type code * @return the {@link BloomFilter} thus created */ public static BloomFilter createBloomFilter(int numEntries, double errorRate, int maxNumberOfEntries, String bloomFilterTypeCode) { if (bloomFilterTypeCode.equalsIgnoreCase(BloomFilterTypeCode.SIMPLE.name())) { return new SimpleBloomFilter(numEntries, errorRate, Hash.MURMUR_HASH); } else if (bloomFilterTypeCode.equalsIgnoreCase(BloomFilterTypeCode.DYNAMIC_V0.name())) { return new HoodieDynamicBoundedBloomFilter(numEntries, errorRate, Hash.MURMUR_HASH, maxNumberOfEntries); } else { throw new IllegalArgumentException("Bloom Filter type code not recognizable " + bloomFilterTypeCode); } }
Example 8
Source File: TestInternalDynamicBloomFilter.java From hudi with Apache License 2.0 | 5 votes |
@Test public void testBoundedSize() { int[] batchSizes = {1000, 10000, 10000, 100000, 100000, 10000}; int indexForMaxGrowth = 3; int maxSize = batchSizes[0] * 100; BloomFilter filter = new HoodieDynamicBoundedBloomFilter(batchSizes[0], 0.000001, Hash.MURMUR_HASH, maxSize); int index = 0; int lastKnownBloomSize = 0; while (index < batchSizes.length) { for (int i = 0; i < batchSizes[index]; i++) { String key = UUID.randomUUID().toString(); filter.add(key); } String serString = filter.serializeToString(); if (index != 0) { int curLength = serString.length(); if (index > indexForMaxGrowth) { assertEquals(curLength, lastKnownBloomSize, "Length should not increase after hitting max entries"); } else { assertTrue(curLength > lastKnownBloomSize, "Length should increase until max entries are reached"); } } lastKnownBloomSize = serString.length(); index++; } }
Example 9
Source File: DistinctAggregator.java From compiler with Apache License 2.0 | 5 votes |
/** {@inheritDoc} */ @Override public void start(final EmitKey key) { super.start(key); this.filter = new DynamicBloomFilter(this.vectorSize, HASH_COUNT, Hash.MURMUR_HASH, (int) this.getArg()); }
Example 10
Source File: BuildBloomBase.java From spork with Apache License 2.0 | 5 votes |
private int convertHashType(String hashType) { if (hashType.toLowerCase().contains("jenkins")) { return Hash.JENKINS_HASH; } else if (hashType.toLowerCase().contains("murmur")) { return Hash.MURMUR_HASH; } else { throw new RuntimeException("Unknown hash type " + hashType + ". Valid values are jenkins and murmur."); } }