com.google.common.hash.BloomFilter Java Examples
The following examples show how to use
com.google.common.hash.BloomFilter.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BloomFilterUnitTest.java From tutorials with MIT License | 6 votes |
@Test public void givenBloomFilter_whenAddNStringsToIt_thenShouldNotReturnAnyFalsePositive() { //when BloomFilter<Integer> filter = BloomFilter.create( Funnels.integerFunnel(), 500, 0.01); //when filter.put(1); filter.put(2); filter.put(3); //then // the probability that it returns true, but is actually false is 1% assertThat(filter.mightContain(1)).isTrue(); assertThat(filter.mightContain(2)).isTrue(); assertThat(filter.mightContain(3)).isTrue(); assertThat(filter.mightContain(100)).isFalse(); }
Example #2
Source File: BlacklistPasswordPolicyProviderFactory.java From keycloak with Apache License 2.0 | 6 votes |
/** * Loads the referenced blacklist into a {@link BloomFilter}. * * @return the {@link BloomFilter} backing a password blacklist */ private BloomFilter<String> load() { try { LOG.infof("Loading blacklist with name %s from %s - start", name, path); long passwordCount = getPasswordCount(); BloomFilter<String> filter = BloomFilter.create( Funnels.stringFunnel(StandardCharsets.UTF_8), passwordCount, FALSE_POSITIVE_PROBABILITY); try (BufferedReader br = newReader(path)) { br.lines().forEach(filter::put); } LOG.infof("Loading blacklist with name %s from %s - end", name, path); return filter; } catch (IOException e) { throw new RuntimeException("Could not load password blacklist from path: " + path, e); } }
Example #3
Source File: TestBloom.java From XRTB with Apache License 2.0 | 6 votes |
/** * Test a valid bid response. * @throws Exception on networking errors. */ @Test public void testBloom() throws Exception { new Bloom("$test","data/c1x_cookies.csv"); BloomFilter b = (BloomFilter)LookingGlass.get("$test"); assertNotNull(b); boolean p = b.mightContain("842AAB10FBA04247B3A9CE00C9172350"); BufferedReader br = new BufferedReader(new FileReader("data/c1x_cookies.csv")); String line = null; int nP = 0; int k = 0; while((line = br.readLine()) != null) { p = b.mightContain(line); if (p) nP++; k++; } assertTrue(k == nP); }
Example #4
Source File: ScalableBloomFilter.java From nexus-public with Eclipse Public License 1.0 | 6 votes |
/** * @return the probability of encountering a false positive. */ public double expectedFpp() { double probabilitySum = 0.0; double combinatorialAnd = 0.0; List<Double> probabilities = filters.stream().mapToDouble(BloomFilter::expectedFpp).boxed().collect(toList()); for (int i = 0; i < probabilities.size(); i++) { Double probability = probabilities.get(i); probabilitySum += probability; for (int j = i + 1; j < probabilities.size(); j++) { combinatorialAnd += (probability * probabilities.get(j)); } } double andProbability = filters.stream().mapToDouble(BloomFilter::expectedFpp) .reduce((a , b) -> a * b) .getAsDouble(); // These events are not mutually exclusive so the formula for calculating the probability is // P(A) + P(B) + P(C) ... - P(A and B) - P(A and C) - P(B and C) ... + P (A and B and C...) return probabilitySum - combinatorialAnd + andProbability; }
Example #5
Source File: ProbableIntersectionCursorState.java From fdb-record-layer with Apache License 2.0 | 6 votes |
@Nonnull static <T> ProbableIntersectionCursorState<T> from( @Nonnull Function<byte[], RecordCursor<T>> cursorFunction, @Nonnull BloomFilterCursorContinuation continuation, @Nonnull Function<? super T, ? extends List<Object>> comparisonKeyFunction, long expectedInsertions, double falsePositiveRate) { BloomFilter<List<Object>> bloomFilter; if (continuation.getBloomBytes() == null) { bloomFilter = BloomFilter.create(KeyFunnel.VERSION_0, expectedInsertions, falsePositiveRate); } else { try { bloomFilter = BloomFilter.readFrom(continuation.getBloomBytes().newInput(), KeyFunnel.VERSION_0); } catch (IOException e) { throw new RecordCoreException("unable to deserialize bloom filter", e); } } if (continuation.isChildEnd()) { return new ProbableIntersectionCursorState<>(RecordCursor.empty(), continuation, comparisonKeyFunction, bloomFilter, Collections.emptySet(), false); } else { return new ProbableIntersectionCursorState<>(cursorFunction.apply(continuation.getChild().toBytes()), continuation, comparisonKeyFunction, bloomFilter, new HashSet<>(), continuation.getBloomBytes() == null); } }
Example #6
Source File: BloomFilterUtil.java From datawave with Apache License 2.0 | 6 votes |
/** * Create a BloomFilter based on a multi-map of fields * * @param fields * The fields and their values with which to create a bloom filter * @return a wrapped BloomFilter based on a multi-map of fields */ public BloomFilterWrapper newMultimapBasedFilter(final Multimap<String,NormalizedContentInterface> fields) { // Declare the return value final BloomFilter<String> filter; // Handle a non-null map of fields int fieldsApplied = 0; if (null != fields) { filter = MemberShipTest.create(fields.size()); for (final Entry<String,NormalizedContentInterface> e : fields.entries()) { MemberShipTest.update(filter, e.getValue().getIndexedFieldValue()); fieldsApplied++; } } // Handle a null set of fields else { filter = MemberShipTest.create(fieldsApplied); } final BloomFilterWrapper wrapper = new BloomFilterWrapper(filter); wrapper.setFieldValuesAppliedToFilter(fieldsApplied); return wrapper; }
Example #7
Source File: TestBloom.java From bidder with Apache License 2.0 | 6 votes |
/** * Test a valid bid response. * @throws Exception on networking errors. */ @Test public void testBloom() throws Exception { new Bloom("$test","data/c1x_cookies.csv"); BloomFilter b = (BloomFilter)LookingGlass.get("$test"); assertNotNull(b); boolean p = b.mightContain("842AAB10FBA04247B3A9CE00C9172350"); BufferedReader br = new BufferedReader(new FileReader("data/c1x_cookies.csv")); String line = null; int nP = 0; int k = 0; while((line = br.readLine()) != null) { p = b.mightContain(line); if (p) nP++; k++; } assertTrue(k == nP); }
Example #8
Source File: XpTrackerService.java From runelite with BSD 2-Clause "Simplified" License | 6 votes |
private BloomFilter<String> createFilter() { final BloomFilter<String> filter = BloomFilter.create( Funnels.stringFunnel(Charset.defaultCharset()), BLOOMFILTER_EXPECTED_INSERTIONS ); synchronized (usernameUpdateQueue) { for (String toUpdate : usernameUpdateQueue) { filter.put(toUpdate); } } return filter; }
Example #9
Source File: PremiumList.java From nomulus with Apache License 2.0 | 6 votes |
/** Returns a new PremiumListRevision for the given key and premium list map. */ @VisibleForTesting public static PremiumListRevision create(PremiumList parent, Set<String> premiumLabels) { PremiumListRevision revision = new PremiumListRevision(); revision.parent = Key.create(parent); revision.revisionId = allocateId(); // All premium list labels are already punycoded, so don't perform any further character // encoding on them. revision.probablePremiumLabels = BloomFilter.create(unencodedCharsFunnel(), premiumLabels.size()); premiumLabels.forEach(revision.probablePremiumLabels::put); try { ByteArrayOutputStream bos = new ByteArrayOutputStream(); revision.probablePremiumLabels.writeTo(bos); checkArgument( bos.size() <= MAX_BLOOM_FILTER_BYTES, "Too many premium labels were specified; Bloom filter exceeds max entity size"); } catch (IOException e) { throw new IllegalStateException("Could not serialize premium labels Bloom filter", e); } return revision; }
Example #10
Source File: ProbableIntersectionCursorState.java From fdb-record-layer with Apache License 2.0 | 5 votes |
private ProbableIntersectionCursorState(@Nonnull RecordCursor<T> cursor, @Nonnull BloomFilterCursorContinuation continuation, @Nonnull Function<? super T, ? extends List<Object>> comparisonKeyFunction, @Nonnull BloomFilter<List<Object>> bloomFilter, @Nonnull Set<List<Object>> seenSet, boolean firstIteration) { super(cursor, continuation.getChild(), comparisonKeyFunction); this.bloomFilter = bloomFilter; this.seenSet = seenSet; this.firstIteration = firstIteration; }
Example #11
Source File: AbstractNGramTokenizationStrategy.java From datawave with Apache License 2.0 | 5 votes |
/** * Constructor * * @param filter * Updated with n-grams tokenized from normalized content */ public AbstractNGramTokenizationStrategy(final BloomFilter<String> filter) { if (null == filter) { this.log.warn("Cannot create n-grams for bloom filter", new IllegalArgumentException("BloomFilter is null")); } this.setFilter(filter); }
Example #12
Source File: TestBloomFiltersSpeed.java From count-db with MIT License | 5 votes |
private static long readValues(BloomFilter<Long> bloomFilter2) { long start = System.currentTimeMillis(); for (int i = 0; i < NUM_OF_VALUES; i++) { if (i % 3 == 0) { bloomFilter2.mightContain((long) i); } } return System.currentTimeMillis() - start; }
Example #13
Source File: TestBloomFiltersSpeed.java From count-db with MIT License | 5 votes |
public static void main(String[] args) { LongBloomFilter bloomFilter1 = new LongBloomFilter(NUM_OF_VALUES, 0.001); BloomFilter<Long> bloomFilter2 = BloomFilter.create((from, into) -> into.putLong(from), NUM_OF_VALUES, 0.001); LongCountsBloomFilter bloomFilter3 = new LongCountsBloomFilter(NUM_OF_VALUES, 0.001); Log.i("Writing values for filter 1 took " + putValues(bloomFilter1)); Log.i("Writing values for filter 2 took " + putValues(bloomFilter2)); Log.i("Writing values for filter 3 took " + putValues(bloomFilter3)); Log.i("Reading values for filter 1 took " + readValues(bloomFilter1)); Log.i("Reading values for filter 2 took " + readValues(bloomFilter2)); Log.i("Reading values for filter 3 took " + readValues(bloomFilter3)); }
Example #14
Source File: SpillMap.java From phoenix with BSD 3-Clause "New" or "Revised" License | 5 votes |
public MappedByteBufferMap(int id, int thresholdBytes, int pageInserts, SpillFile spillFile) { this.spillFile = spillFile; // size threshold of a page this.thresholdBytes = thresholdBytes; this.pageIndex = id; pageMap.clear(); bFilter = BloomFilter.create(Funnels.byteArrayFunnel(), pageInserts); pagedIn = true; totalResultSize = 0; localDepth = 1; dirtyPage = true; }
Example #15
Source File: NGramTokenizationStrategy.java From datawave with Apache License 2.0 | 5 votes |
/** * Applies a tokenized n-gram to the BloomFilter based on the specified normalized content * * @param ngram * An n-gram generated from the specified normalized content * @param content * A normalized field name and value * @return true, if the n-gram was applied to the strategy's BloomFilter */ protected boolean updateFilter(final String ngram, final NormalizedContentInterface content) throws TokenizationException { boolean updated = super.updateFilter(ngram, content); if (!updated) { final BloomFilter<String> filter = this.getFilter(); if ((null != ngram) && (null != filter)) { MemberShipTest.update(filter, ngram); updated = true; } else { updated = false; } } return updated; }
Example #16
Source File: UniqueTransform.java From datawave with Apache License 2.0 | 5 votes |
public UniqueTransform(Set<String> fields) { this.fields = deconstruct(fields); this.bloom = BloomFilter.create(new ByteFunnel(), 500000, 1e-15); if (DEBUG) { this.seen = new HashSet<>(); } if (log.isTraceEnabled()) log.trace("unique fields: " + this.fields); }
Example #17
Source File: ListsTest.java From spring-boot-cookbook with Apache License 2.0 | 5 votes |
/** * https://mp.weixin.qq.com/s/VGtH_DXI6paogOekrodixA */ @Test public void testBloomFilterWithGuava() { StopWatch stopWatch = new StopWatch("guavaBloomFilterDemo"); stopWatch.start("init"); int num = 10000000; BloomFilter<Integer> bloomFilter = BloomFilter.create(Funnels.integerFunnel(), num, 0.01); for (int i = 0; i < num; i++) { bloomFilter.put(i); } stopWatch.stop(); stopWatch.start("check-1"); assertThat(bloomFilter.mightContain(1)).isTrue(); stopWatch.stop(); stopWatch.start("check-9999"); assertThat(bloomFilter.mightContain(9999)).isTrue(); stopWatch.stop(); stopWatch.start("check-1234567"); assertThat(bloomFilter.mightContain(1234567)).isTrue(); stopWatch.stop(); System.out.println(stopWatch.prettyPrint()); // ----------------------------------------- // ms % Task name // ----------------------------------------- // 04443 099% init // 00037 001% check-1 // 00000 000% check-9999 // 00000 000% check-1234567 }
Example #18
Source File: BloomFilters.java From quarantyne with Apache License 2.0 | 5 votes |
/** * Make a BF from its serialized form * @param asset * @return a {@link BloomFilter} */ public static BloomFilter<String> deserialize(Asset asset) throws AssetException { try { return BloomFilter.readFrom(asset.getBytes(), Funnels.stringFunnel(Charsets.UTF_8)); } catch (IOException ioex) { throw new AssetException(ioex); } }
Example #19
Source File: CompromisedPasswordClassifierTest.java From quarantyne with Apache License 2.0 | 5 votes |
@Test public void testClassifier() { BloomFilter<String> bloom = BloomFilter.create(Funnels.stringFunnel(Charsets.UTF_8), 3); bloom.put("alpha"); bloom.put("bravo"); bloom.put("charlie"); Supplier<Config> configSupplier = () -> Config.builder() .loginAction(new QIdentityAction("/login", "email", "password")) .registerAction(new QIdentityAction("/register", "email", "password")) .build(); CompromisedPasswordClassifier classifier = new CompromisedPasswordClassifier(bloom, configSupplier); HttpRequest defaultRequest = TestHttpRequest.REQ(); // null empty assertThat(classifier.classify(defaultRequest, null)).isEqualTo(Label.NONE); assertThat(classifier.classify(defaultRequest, TestHttpRequestBody.EMPTY)).isEqualTo(Label.NONE); // no key matches password assertThat(classifier.classify(defaultRequest, TestHttpRequestBody.make(new JsonObject().put("name", "john")))).isEqualTo(Label.NONE); // a key matches password but password is not in bloomf assertThat(classifier.classify(defaultRequest, TestHttpRequestBody.make(new JsonObject().put("password", "delta")))).isEqualTo(Label.NONE); // match HttpRequest requestOnPath = new TestHttpRequest.Builder().setPath("/login").build(); assertThat(classifier.classify(requestOnPath, TestHttpRequestBody.make(new JsonObject().put("password", "bravo")))).isEqualTo( Label.COMPROMISED_PASSWORD); }
Example #20
Source File: DisposableEmailClassifierTest.java From quarantyne with Apache License 2.0 | 5 votes |
@Test public void testClassifier() { BloomFilter<String> bloom = BloomFilter.create(Funnels.stringFunnel(Charsets.UTF_8), 2); bloom.put("disposable.com"); bloom.put("junk.com"); Supplier<Config> configSupplier = () -> Config .builder() .emailParamKeys(Sets.newHashSet("email")) .registerAction(new QIdentityAction("/register", "email", "password")) .build(); DisposableEmailClassifier classifier = new DisposableEmailClassifier(bloom, configSupplier); HttpRequest defaultRequest = TestHttpRequest.REQ(); // null empty assertThat(classifier.classify(defaultRequest, null)).isEqualTo(Label.NONE); assertThat(classifier.classify(defaultRequest, TestHttpRequestBody.EMPTY)).isEqualTo(Label.NONE); // no key matches password assertThat(classifier.classify(defaultRequest, TestHttpRequestBody.make(new JsonObject().put("name", "john")))).isEqualTo(Label.NONE); // a key matches password but password is not in bloomf assertThat(classifier.classify(defaultRequest, TestHttpRequestBody.make(new JsonObject().put("email", "[email protected]")))).isEqualTo(Label.NONE); // match HttpRequest req = new TestHttpRequest.Builder().setPath("/register").build(); assertThat(classifier.classify(req, TestHttpRequestBody.make(new JsonObject().put("email", "[email protected]")))).isEqualTo( Label.DISPOSABLE_EMAIL); }
Example #21
Source File: BloomFilters.java From quarantyne with Apache License 2.0 | 5 votes |
/** * Make a BF from its serialized form * @param resourceName a {@link BloomFilters} value * @return a {@link BloomFilter} */ public static BloomFilter<String> deserialize(String resourceName) throws IOException{ InputStream is = new BufferedInputStream( new ByteArrayInputStream( Resources.toByteArray(Resources.getResource(resourceName)))); return BloomFilter.readFrom(is, FUNNEL); }
Example #22
Source File: BloomFilterConverter.java From nomulus with Apache License 2.0 | 5 votes |
@Override @Nullable public byte[] convertToDatabaseColumn(@Nullable BloomFilter<String> entity) { if (entity == null) { return null; } ByteArrayOutputStream bos = new ByteArrayOutputStream(); try { entity.writeTo(bos); } catch (IOException e) { throw new UncheckedIOException("Error saving Bloom filter data", e); } return bos.toByteArray(); }
Example #23
Source File: RecordSet.java From db with GNU Affero General Public License v3.0 | 5 votes |
/** * Retrains the bloom filter. Should be called intermittently. */ public void reBloom() { if(bloomChanged) { bloomFilter = BloomFilter.create(Funnels.integerFunnel(), Performance.HASH_BUCKET, 0.01); recordSet.forEach(record -> bloomFilter.put(record.getId())); } }
Example #24
Source File: ScalableBloomFilter.java From nexus-public with Eclipse Public License 1.0 | 5 votes |
/** * Determines whether across all filters there is a chance that this element has already been added. * * @param input - the element to check. * @return whether the element may exist in the filter. */ public boolean mightContain(final T input) { for (BloomFilter<T> filter : filters) { if (filter.mightContain(input)) { return true; } } return false; }
Example #25
Source File: PremiumListTest.java From nomulus with Apache License 2.0 | 5 votes |
@Test public void bloomFilter_worksCorrectly() { BloomFilter<String> bloomFilter = PremiumList.create("testname", CurrencyUnit.USD, TEST_PRICES).getBloomFilter(); ImmutableSet.of("silver", "gold", "palladium") .forEach(l -> assertThat(bloomFilter.mightContain(l)).isTrue()); ImmutableSet.of("dirt", "pyrite", "zirconia") .forEach(l -> assertThat(bloomFilter.mightContain(l)).isFalse()); }
Example #26
Source File: Schema.java From metanome-algorithms with Apache License 2.0 | 5 votes |
public Schema(int size, BitSet attributes, FunctionalDependency primaryKey, List<FunctionalDependency> fdKeys, List<FunctionalDependency> fds, List<Schema> referencedSchemata, int[] minValueLengths, int[] maxValueLengths, int[] nullValueCounts, List<BloomFilter<CharSequence>> bloomFilters) { this.size = size; this.attributes = attributes; this.primaryKey = primaryKey; this.fdKeys = fdKeys; this.fds = fds; this.referencedSchemata = referencedSchemata; this.minValueLengths = minValueLengths; this.maxValueLengths = maxValueLengths; this.nullValueCounts = nullValueCounts; this.bloomFilters = bloomFilters; }
Example #27
Source File: BloomFilterConverter.java From nomulus with Apache License 2.0 | 5 votes |
@Override @Nullable public BloomFilter<String> convertToEntityAttribute(@Nullable byte[] columnValue) { if (columnValue == null) { return null; } try { return BloomFilter.readFrom(new ByteArrayInputStream(columnValue), stringFunnel(US_ASCII)); } catch (IOException e) { throw new UncheckedIOException("Error loading Bloom filter data", e); } }
Example #28
Source File: PeerTable.java From besu with Apache License 2.0 | 5 votes |
private void buildBloomFilter() { final BloomFilter<Bytes> bf = BloomFilter.create((id, val) -> val.putBytes(id.toArray()), maxEntriesCnt, 0.001); streamAllPeers().map(Peer::getId).forEach(bf::put); this.evictionCnt = 0; this.idBloom = bf; }
Example #29
Source File: BloomFilterConverterTest.java From nomulus with Apache License 2.0 | 5 votes |
@Test public void roundTripConversion_returnsSameBloomFilter() { BloomFilter<String> bloomFilter = BloomFilter.create(stringFunnel(US_ASCII), 3); ImmutableSet.of("foo", "bar", "baz").forEach(bloomFilter::put); TestEntity entity = new TestEntity(bloomFilter); jpaTm().transact(() -> jpaTm().getEntityManager().persist(entity)); TestEntity persisted = jpaTm().transact(() -> jpaTm().getEntityManager().find(TestEntity.class, "id")); assertThat(persisted.bloomFilter).isEqualTo(bloomFilter); }
Example #30
Source File: PremiumList.java From nomulus with Apache License 2.0 | 5 votes |
private PremiumList(String name, CurrencyUnit currency, Map<String, BigDecimal> labelsToPrices) { this.name = name; this.currency = currency; this.labelsToPrices = labelsToPrices; // ASCII is used for the charset because all premium list domain labels are stored punycoded. this.bloomFilter = BloomFilter.create(stringFunnel(US_ASCII), labelsToPrices.size()); labelsToPrices.keySet().forEach(this.bloomFilter::put); }