Java Code Examples for com.google.common.hash.BloomFilter#create()

The following examples show how to use com.google.common.hash.BloomFilter#create() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ProbableIntersectionCursorState.java    From fdb-record-layer with Apache License 2.0 6 votes vote down vote up
@Nonnull
static <T> ProbableIntersectionCursorState<T> from(
        @Nonnull Function<byte[], RecordCursor<T>> cursorFunction,
        @Nonnull BloomFilterCursorContinuation continuation,
        @Nonnull Function<? super T, ? extends List<Object>> comparisonKeyFunction,
        long expectedInsertions, double falsePositiveRate) {
    BloomFilter<List<Object>> bloomFilter;
    if (continuation.getBloomBytes() == null) {
        bloomFilter = BloomFilter.create(KeyFunnel.VERSION_0, expectedInsertions, falsePositiveRate);
    } else {
        try {
            bloomFilter = BloomFilter.readFrom(continuation.getBloomBytes().newInput(), KeyFunnel.VERSION_0);
        } catch (IOException e) {
            throw new RecordCoreException("unable to deserialize bloom filter", e);
        }
    }
    if (continuation.isChildEnd()) {
        return new ProbableIntersectionCursorState<>(RecordCursor.empty(), continuation, comparisonKeyFunction, bloomFilter, Collections.emptySet(), false);
    } else {
        return new ProbableIntersectionCursorState<>(cursorFunction.apply(continuation.getChild().toBytes()), continuation, comparisonKeyFunction, bloomFilter, new HashSet<>(), continuation.getBloomBytes() == null);
    }
}
 
Example 2
Source File: BloomFilterUnitTest.java    From tutorials with MIT License 6 votes vote down vote up
@Test
public void givenBloomFilter_whenAddNStringsToIt_thenShouldNotReturnAnyFalsePositive() {
    //when
    BloomFilter<Integer> filter = BloomFilter.create(
            Funnels.integerFunnel(),
            500,
            0.01);

    //when
    filter.put(1);
    filter.put(2);
    filter.put(3);

    //then
    // the probability that it returns true, but is actually false is 1%
    assertThat(filter.mightContain(1)).isTrue();
    assertThat(filter.mightContain(2)).isTrue();
    assertThat(filter.mightContain(3)).isTrue();

    assertThat(filter.mightContain(100)).isFalse();
}
 
Example 3
Source File: SpillMap.java    From phoenix with Apache License 2.0 5 votes vote down vote up
public FileMap(int id, int thresholdBytes, int pageInserts, SpillFile spillFile) {
    this.spillFile = spillFile;
    // size threshold of a page
    this.thresholdBytes = thresholdBytes;
    this.pageIndex = id;
    pageMap.clear();
    bFilter = BloomFilter.create(Funnels.byteArrayFunnel(), pageInserts);
    pagedIn = true;
    totalResultSize = 0;
    localDepth = 1;
    dirtyPage = true;
}
 
Example 4
Source File: BloomFilterTest.java    From jdk-source-analysis with Apache License 2.0 5 votes vote down vote up
@Test
public void test() {
    BloomFilter<Integer> filter = BloomFilter.create(Funnels.integerFunnel(), 1500, 0.01);
    System.out.println(filter.mightContain(1));
    System.out.println(filter.mightContain(2));

    filter.put(1);
    filter.put(2);

    System.out.println(filter.mightContain(1));
    System.out.println(filter.mightContain(2));
}
 
Example 5
Source File: ListsTest.java    From spring-boot-cookbook with Apache License 2.0 5 votes vote down vote up
/**
     * https://mp.weixin.qq.com/s/VGtH_DXI6paogOekrodixA
     */
    @Test
    public void testBloomFilterWithGuava() {
        StopWatch stopWatch = new StopWatch("guavaBloomFilterDemo");
        stopWatch.start("init");
        int num = 10000000;
        BloomFilter<Integer> bloomFilter = BloomFilter.create(Funnels.integerFunnel(), num, 0.01);
        for (int i = 0; i < num; i++) {
            bloomFilter.put(i);
        }
        stopWatch.stop();
        stopWatch.start("check-1");
        assertThat(bloomFilter.mightContain(1)).isTrue();
        stopWatch.stop();

        stopWatch.start("check-9999");
        assertThat(bloomFilter.mightContain(9999)).isTrue();
        stopWatch.stop();

        stopWatch.start("check-1234567");
        assertThat(bloomFilter.mightContain(1234567)).isTrue();
        stopWatch.stop();

        System.out.println(stopWatch.prettyPrint());

//        -----------------------------------------
//        ms     %     Task name
//        -----------------------------------------
//        04443  099%  init
//        00037  001%  check-1
//        00000  000%  check-9999
//        00000  000%  check-1234567

    }
 
Example 6
Source File: BloomFilterConverterTest.java    From nomulus with Apache License 2.0 5 votes vote down vote up
@Test
public void roundTripConversion_returnsSameBloomFilter() {
  BloomFilter<String> bloomFilter = BloomFilter.create(stringFunnel(US_ASCII), 3);
  ImmutableSet.of("foo", "bar", "baz").forEach(bloomFilter::put);
  TestEntity entity = new TestEntity(bloomFilter);
  jpaTm().transact(() -> jpaTm().getEntityManager().persist(entity));
  TestEntity persisted =
      jpaTm().transact(() -> jpaTm().getEntityManager().find(TestEntity.class, "id"));
  assertThat(persisted.bloomFilter).isEqualTo(bloomFilter);
}
 
Example 7
Source File: Bloom.java    From XRTB with Apache License 2.0 5 votes vote down vote up
/**
 * Reads a file or S3 object line by line and loads the filter.
 * @param br BufferedReader. The line-by-line reader.
 * @throws Exception on I/O errors.
 */
void makeFilter(BufferedReader br, long size) throws Exception {
	String[] parts;
	int i;
	long sz;
	
	double fpp = 0.03; // desired false positive probability
	
	String line = br.readLine();
	sz = line.length() - 5;
	sz = size / sz;
	parts = eatquotedStrings(line);
	this.size = 1;
	for (i = 0; i < parts.length; i++) {
		parts[i] = parts[i].replaceAll("\"", "");
	}
	
	bloomFilter = BloomFilter.create(Funnels.stringFunnel(Charset.forName("UTF-8")), sz,fpp);
	bloomFilter.put(parts[0]);
	
	while ((line = br.readLine()) != null) {
		parts = eatquotedStrings(line);
		for (i = 0; i < parts.length; i++) {
			parts[i] = parts[i].replaceAll("\"", "");
		}
		bloomFilter.put(parts[0]);
		this.size++;
	}
	br.close();
}
 
Example 8
Source File: CompromisedPasswordClassifierTest.java    From quarantyne with Apache License 2.0 5 votes vote down vote up
@Test
public void testClassifier() {
  BloomFilter<String> bloom =
      BloomFilter.create(Funnels.stringFunnel(Charsets.UTF_8), 3);
  bloom.put("alpha");
  bloom.put("bravo");
  bloom.put("charlie");

  Supplier<Config> configSupplier = () -> Config.builder()
      .loginAction(new QIdentityAction("/login", "email", "password"))
      .registerAction(new QIdentityAction("/register", "email", "password"))
      .build();
  CompromisedPasswordClassifier classifier = new CompromisedPasswordClassifier(bloom, configSupplier);
  HttpRequest defaultRequest = TestHttpRequest.REQ();

  // null empty
  assertThat(classifier.classify(defaultRequest, null)).isEqualTo(Label.NONE);
  assertThat(classifier.classify(defaultRequest, TestHttpRequestBody.EMPTY)).isEqualTo(Label.NONE);

  // no key matches password
  assertThat(classifier.classify(defaultRequest,
      TestHttpRequestBody.make(new JsonObject().put("name", "john")))).isEqualTo(Label.NONE);

  // a key matches password but password is not in bloomf
  assertThat(classifier.classify(defaultRequest,
      TestHttpRequestBody.make(new JsonObject().put("password", "delta")))).isEqualTo(Label.NONE);

  // match
  HttpRequest requestOnPath = new TestHttpRequest.Builder().setPath("/login").build();
  assertThat(classifier.classify(requestOnPath,
      TestHttpRequestBody.make(new JsonObject().put("password", "bravo")))).isEqualTo(
          Label.COMPROMISED_PASSWORD);
}
 
Example 9
Source File: DisposableEmailClassifierTest.java    From quarantyne with Apache License 2.0 5 votes vote down vote up
@Test
public void testClassifier() {
  BloomFilter<String> bloom =
      BloomFilter.create(Funnels.stringFunnel(Charsets.UTF_8), 2);
  bloom.put("disposable.com");
  bloom.put("junk.com");

  Supplier<Config> configSupplier = () -> Config
      .builder()
      .emailParamKeys(Sets.newHashSet("email"))
      .registerAction(new QIdentityAction("/register", "email", "password"))
      .build();
  DisposableEmailClassifier classifier = new DisposableEmailClassifier(bloom, configSupplier);
  HttpRequest defaultRequest = TestHttpRequest.REQ();

  // null empty
  assertThat(classifier.classify(defaultRequest, null)).isEqualTo(Label.NONE);
  assertThat(classifier.classify(defaultRequest, TestHttpRequestBody.EMPTY)).isEqualTo(Label.NONE);

  // no key matches password
  assertThat(classifier.classify(defaultRequest,
      TestHttpRequestBody.make(new JsonObject().put("name", "john")))).isEqualTo(Label.NONE);

  // a key matches password but password is not in bloomf
  assertThat(classifier.classify(defaultRequest,
      TestHttpRequestBody.make(new JsonObject().put("email", "john@gmail.com")))).isEqualTo(Label.NONE);

  // match
  HttpRequest req = new TestHttpRequest.Builder().setPath("/register").build();
  assertThat(classifier.classify(req,
      TestHttpRequestBody.make(new JsonObject().put("email", "spammy@disposable.com")))).isEqualTo(
      Label.DISPOSABLE_EMAIL);
}
 
Example 10
Source File: TestBloomFiltersSpeed.java    From count-db with MIT License 5 votes vote down vote up
public static void main(String[] args) {
    LongBloomFilter bloomFilter1 = new LongBloomFilter(NUM_OF_VALUES, 0.001);

    BloomFilter<Long> bloomFilter2 = BloomFilter.create((from, into) -> into.putLong(from), NUM_OF_VALUES, 0.001);

    LongCountsBloomFilter bloomFilter3 = new LongCountsBloomFilter(NUM_OF_VALUES, 0.001);
    Log.i("Writing values for filter 1 took " + putValues(bloomFilter1));
    Log.i("Writing values for filter 2 took " + putValues(bloomFilter2));
    Log.i("Writing values for filter 3 took " + putValues(bloomFilter3));

    Log.i("Reading values for filter 1 took " + readValues(bloomFilter1));
    Log.i("Reading values for filter 2 took " + readValues(bloomFilter2));
    Log.i("Reading values for filter 3 took " + readValues(bloomFilter3));
}
 
Example 11
Source File: PeerTable.java    From besu with Apache License 2.0 5 votes vote down vote up
private void buildBloomFilter() {
  final BloomFilter<Bytes> bf =
      BloomFilter.create((id, val) -> val.putBytes(id.toArray()), maxEntriesCnt, 0.001);
  streamAllPeers().map(Peer::getId).forEach(bf::put);
  this.evictionCnt = 0;
  this.idBloom = bf;
}
 
Example 12
Source File: PremiumList.java    From nomulus with Apache License 2.0 5 votes vote down vote up
private PremiumList(String name, CurrencyUnit currency, Map<String, BigDecimal> labelsToPrices) {
  this.name = name;
  this.currency = currency;
  this.labelsToPrices = labelsToPrices;
  // ASCII is used for the charset because all premium list domain labels are stored punycoded.
  this.bloomFilter = BloomFilter.create(stringFunnel(US_ASCII), labelsToPrices.size());
  labelsToPrices.keySet().forEach(this.bloomFilter::put);
}
 
Example 13
Source File: SpillMap.java    From phoenix with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public MappedByteBufferMap(int id, int thresholdBytes, int pageInserts, SpillFile spillFile) {
    this.spillFile = spillFile;
    // size threshold of a page
    this.thresholdBytes = thresholdBytes;
    this.pageIndex = id;
    pageMap.clear();
    bFilter = BloomFilter.create(Funnels.byteArrayFunnel(), pageInserts);
    pagedIn = true;
    totalResultSize = 0;
    localDepth = 1;
    dirtyPage = true;
}
 
Example 14
Source File: CorporaBloomFilter.java    From modernmt with Apache License 2.0 4 votes vote down vote up
public CorporaBloomFilter(long expectedEntries, double fpp) {
    this.bloomFilter = BloomFilter.create(new StringFunnel(), expectedEntries, fpp);
}
 
Example 15
Source File: Source.java    From meghanada-server with GNU General Public License v3.0 4 votes vote down vote up
public Source(String filePath) {
  this.filePath = filePath;
  this.methodCallsBF =
      BloomFilter.create(Funnels.stringFunnel(StandardCharsets.UTF_8), 10000, 0.01);
}
 
Example 16
Source File: EntryPointDeduplicator.java    From burp-molly-scanner with GNU Lesser General Public License v3.0 4 votes vote down vote up
public EntryPointDeduplicator(IBurpExtenderCallbacks callbacks) {
    this.callbacks = callbacks;
    this.helpers = callbacks.getHelpers();
    this.dubBloomFilter = BloomFilter.create(Funnels.stringFunnel(Charset.defaultCharset()), 1000);
    this.dubTree = new BKTree<>(new HammingDistance());
}
 
Example 17
Source File: MemberShipTest.java    From datawave with Apache License 2.0 4 votes vote down vote up
public static <T> BloomFilter<T> create(int expectedInsertions) {
    
    TermFilter<T> funnel = new TermFilter<>();
    return (BloomFilter<T>) BloomFilter.create(funnel, expectedInsertions);
}
 
Example 18
Source File: GuavaBloomFilter.java    From caffeine with Apache License 2.0 4 votes vote down vote up
private void reset() {
  bloomFilter = BloomFilter.create(Funnels.longFunnel(), expectedInsertions, fpp);
}
 
Example 19
Source File: Bloom.java    From bidder with Apache License 2.0 4 votes vote down vote up
public Bloom(long size, double error) {
	bloomFilter = BloomFilter.create(Funnels.stringFunnel(Charset.forName("UTF-8")), size,error);
}
 
Example 20
Source File: BloomFilterBuilder.java    From bboxdb with Apache License 2.0 2 votes vote down vote up
/**
 * Create a bloom filter for a given number of keys
 * @param entries
 * @return
 */
public static BloomFilter<String> buildBloomFilter(final long entries) {
	return BloomFilter.create(new TupleKeyFunnel(), entries);
}