org.apache.hadoop.util.bloom.Key Java Examples
The following examples show how to use
org.apache.hadoop.util.bloom.Key.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: InternalDynamicBloomFilter.java From hudi with Apache License 2.0 | 6 votes |
@Override public void add(Key key) { if (key == null) { throw new NullPointerException("Key can not be null"); } org.apache.hadoop.util.bloom.BloomFilter bf = getActiveStandardBF(); if (bf == null) { addRow(); bf = matrix[matrix.length - 1]; currentNbRecord = 0; } bf.add(key); currentNbRecord++; }
Example #2
Source File: BloomFilter.java From hadoop-map-reduce-patterns with Apache License 2.0 | 6 votes |
@Override public void map(Object key, Text value, Context context) throws IOException, InterruptedException { Map<String, String> parsed = transformXmlToMap(value.toString()); String body = parsed.get("Text"); if (isNullOrEmpty(body)) { return; } StringTokenizer tokenizer = new StringTokenizer(body); while (tokenizer.hasMoreTokens()) { String word = tokenizer.nextToken(); if (filter.membershipTest(new Key(word.getBytes()))) { context.write(value, NullWritable.get()); break; } } }
Example #3
Source File: BuildBloom.java From spork with Apache License 2.0 | 6 votes |
@Override public Tuple exec(Tuple input) throws IOException { if (input == null || input.size() == 0) return null; // Strip off the initial level of bag DataBag values = (DataBag)input.get(0); Iterator<Tuple> it = values.iterator(); Tuple t = it.next(); // If the input tuple has only one field, then we'll extract // that field and serialize it into a key. If it has multiple // fields, we'll serialize the whole tuple. byte[] b; if (t.size() == 1) b = DataType.toBytes(t.get(0)); else b = DataType.toBytes(t, DataType.TUPLE); Key k = new Key(b); filter = new BloomFilter(vSize, numHash, hType); filter.add(k); return TupleFactory.getInstance().newTuple(bloomOut()); }
Example #4
Source File: DistinctAggregator.java From compiler with Apache License 2.0 | 6 votes |
/** {@inheritDoc} */ @Override public void aggregate(final String data, final String metadata) throws IOException, InterruptedException { // instantiate a bloom filter input key initialized by the data Key key = new Key(data.getBytes()); // if the key is already in the filter, forget it if (this.filter.membershipTest(key)) return; // add the key to the bloom filter this.filter.add(key); // and collect it this.collect(data); }
Example #5
Source File: UniqueAggregator.java From compiler with Apache License 2.0 | 6 votes |
/** {@inheritDoc} */ @Override public void aggregate(final String data, final String metadata) throws IOException, InterruptedException { // instantiate a bloom filter input key initialized by the data final Key key = new Key(data.getBytes()); // if the key is already in the filter, forget about it if (this.filter.membershipTest(key)) return; // add the key to the bloom filter this.filter.add(key); if (this.isCombining()) this.collect(data); else this.total++; }
Example #6
Source File: BloomContainsUDFTest.java From incubator-hivemall with Apache License 2.0 | 6 votes |
@Nonnull private static DynamicBloomFilter createBloomFilter(long seed, int size) { DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(30); final Key key = new Key(); final Random rnd1 = new Random(seed); for (int i = 0; i < size; i++) { double d = rnd1.nextGaussian(); String s = Double.toHexString(d); Text t = new Text(s); key.set(t.copyBytes(), 1.0); dbf.add(key); } return dbf; }
Example #7
Source File: BloomNotUDFTest.java From incubator-hivemall with Apache License 2.0 | 6 votes |
@Nonnull private static DynamicBloomFilter createBloomFilter(long seed, int size) { DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(3000); final Key key = new Key(); final Random rnd1 = new Random(seed); for (int i = 0; i < size; i++) { double d = rnd1.nextGaussian(); String s = Double.toHexString(d); key.set(s.getBytes(), 1.0); dbf.add(key); } return dbf; }
Example #8
Source File: BloomOrUDFTest.java From incubator-hivemall with Apache License 2.0 | 6 votes |
@Nonnull private static DynamicBloomFilter createBloomFilter(long seed, int size) { DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(3000); final Key key = new Key(); final Random rnd1 = new Random(seed); for (int i = 0; i < size; i++) { double d = rnd1.nextGaussian(); String s = Double.toHexString(d); key.set(s.getBytes(), 1.0); dbf.add(key); } return dbf; }
Example #9
Source File: BloomAndUDFTest.java From incubator-hivemall with Apache License 2.0 | 6 votes |
@Nonnull private static DynamicBloomFilter createBloomFilter(long seed, int size) { DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(3000); final Key key = new Key(); final Random rnd1 = new Random(seed); for (int i = 0; i < size; i++) { double d = rnd1.nextGaussian(); String s = Double.toHexString(d); key.set(s.getBytes(), 1.0); dbf.add(key); } return dbf; }
Example #10
Source File: SimpleBloomFilter.java From hudi with Apache License 2.0 | 5 votes |
@Override public void add(String key) { if (key == null) { throw new NullPointerException("Key cannot by null"); } filter.add(new Key(key.getBytes(StandardCharsets.UTF_8))); }
Example #11
Source File: ReduceSideJoinBloomFilter.java From hadoop-map-reduce-patterns with Apache License 2.0 | 5 votes |
public void map(Object key, Text value, Context context) throws IOException, InterruptedException { Map<String, String> parsed = MRDPUtils.transformXmlToMap(value .toString()); String userId = parsed.get("UserId"); if (userId == null) { return; } if (bfilter.membershipTest(new Key(userId.getBytes()))) { outkey.set(userId); outvalue.set("B" + value.toString()); context.write(outkey, outvalue); } }
Example #12
Source File: Bloom.java From spork with Apache License 2.0 | 5 votes |
@Override public Boolean exec(Tuple input) throws IOException { if (filter == null) { init(); } byte[] b; if (input.size() == 1) b = DataType.toBytes(input.get(0)); else b = DataType.toBytes(input, DataType.TUPLE); Key k = new Key(b); return filter.membershipTest(k); }
Example #13
Source File: BloomFilterCreator.java From hiped2 with Apache License 2.0 | 5 votes |
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { User user = User.fromText(value); if ("CA".equals(user.getState())) { filter.add(new Key(user.getName().getBytes())); } }
Example #14
Source File: BloomJoin.java From hiped2 with Apache License 2.0 | 5 votes |
@Override protected void map(LongWritable offset, Text value, Context context) throws IOException, InterruptedException { String user = getUsername(value); if (filter.membershipTest(new Key(user.getBytes()))) { Tuple outputValue = new Tuple(); outputValue.setInt(ValueFields.DATASET, getDataset()); outputValue.setString(ValueFields.DATA, value.toString()); context.write(new Text(user), outputValue); } }
Example #15
Source File: BloomFilterCreator.java From hiped2 with Apache License 2.0 | 5 votes |
@Override public void map(Text key, Text value, OutputCollector<NullWritable, BloomFilter> output, Reporter reporter) throws IOException { System.out.println("K[" + key + "]"); int age = Integer.valueOf(value.toString()); if (age > 30) { filter.add(new Key(key.toString().getBytes())); } collector = output; }
Example #16
Source File: BloomJoin.java From hiped2 with Apache License 2.0 | 5 votes |
@Override protected void map(Text key, Text value, Context context) throws IOException, InterruptedException { System.out.println("K[" + key + "]"); if(filter.membershipTest(new Key(key.toString().getBytes()))) { context.write(key, value); } }
Example #17
Source File: BloomOrUDFTest.java From incubator-hivemall with Apache License 2.0 | 5 votes |
private static void assertEquals(@Nonnull Filter expected, @Nonnull Filter actual, long seed, int size) { final Key key = new Key(); final Random rnd1 = new Random(seed); for (int i = 0; i < size; i++) { double d = rnd1.nextGaussian(); String s = Double.toHexString(d); key.set(s.getBytes(), 1.0); Assert.assertEquals(expected.membershipTest(key), actual.membershipTest(key)); } }
Example #18
Source File: SimpleBloomFilter.java From hudi with Apache License 2.0 | 5 votes |
@Override public boolean mightContain(String key) { if (key == null) { throw new NullPointerException("Key cannot by null"); } return filter.membershipTest(new Key(key.getBytes(StandardCharsets.UTF_8))); }
Example #19
Source File: InternalDynamicBloomFilter.java From hudi with Apache License 2.0 | 5 votes |
@Override public boolean membershipTest(Key key) { if (key == null) { return true; } for (BloomFilter bloomFilter : matrix) { if (bloomFilter.membershipTest(key)) { return true; } } return false; }
Example #20
Source File: InternalFilter.java From hudi with Apache License 2.0 | 5 votes |
/** * Adds an array of keys to <i>this</i> filter. * * @param keys The array of keys. */ public void add(Key[] keys) { if (keys == null) { throw new IllegalArgumentException("Key[] may not be null"); } for (Key key : keys) { add(key); } }
Example #21
Source File: InternalFilter.java From hudi with Apache License 2.0 | 5 votes |
/** * Adds a collection of keys to <i>this</i> filter. * * @param keys The collection of keys. */ public void add(Collection<Key> keys) { if (keys == null) { throw new IllegalArgumentException("Collection<Key> may not be null"); } for (Key key : keys) { add(key); } }
Example #22
Source File: InternalFilter.java From hudi with Apache License 2.0 | 5 votes |
/** * Adds a list of keys to <i>this</i> filter. * * @param keys The list of keys. */ public void add(List<Key> keys) { if (keys == null) { throw new IllegalArgumentException("ArrayList<Key> may not be null"); } for (Key key : keys) { add(key); } }
Example #23
Source File: BloomAndUDFTest.java From incubator-hivemall with Apache License 2.0 | 5 votes |
private static void assertNotContains(@Nonnull Filter expected, @Nonnull Filter actual, long seed, int size) { final Key key = new Key(); final Random rnd1 = new Random(seed); for (int i = 0; i < size; i++) { double d = rnd1.nextGaussian(); String s = Double.toHexString(d); key.set(s.getBytes(), 1.0); Assert.assertEquals(expected.membershipTest(key), actual.membershipTest(key)); } }
Example #24
Source File: BloomFilterUDAF.java From incubator-hivemall with Apache License 2.0 | 4 votes |
@Override public void init() { this.filter = BloomFilterUtils.newDynamicBloomFilter(); this.key = new Key(); }
Example #25
Source File: HoodieDynamicBoundedBloomFilter.java From hudi with Apache License 2.0 | 4 votes |
@Override public boolean mightContain(String key) { return internalDynamicBloomFilter.membershipTest(new Key(key.getBytes(StandardCharsets.UTF_8))); }
Example #26
Source File: HoodieDynamicBoundedBloomFilter.java From hudi with Apache License 2.0 | 4 votes |
@Override public void add(String key) { internalDynamicBloomFilter.add(new Key(key.getBytes(StandardCharsets.UTF_8))); }
Example #27
Source File: InternalFilter.java From hudi with Apache License 2.0 | 2 votes |
/** * Determines wether a specified key belongs to <i>this</i> filter. * * @param key The key to test. * @return boolean True if the specified key belongs to <i>this</i> filter. False otherwise. */ public abstract boolean membershipTest(Key key);
Example #28
Source File: InternalFilter.java From hudi with Apache License 2.0 | 2 votes |
/** * Adds a key to <i>this</i> filter. * * @param key The key to add. */ public abstract void add(Key key);