Java Code Examples for org.apache.avro.mapred.AvroValue#datum()
The following examples show how to use
org.apache.avro.mapred.AvroValue#datum() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroKeyDedupReducer.java From incubator-gobblin with Apache License 2.0 | 5 votes |
@Override public int compare(AvroValue<GenericRecord> o1, AvroValue<GenericRecord> o2) { GenericRecord record1 = o1.datum(); GenericRecord record2 = o2.datum(); for (String deltaFieldName : this.deltaSchemaProvider.getDeltaFieldNames(record1)) { if (record1.get(deltaFieldName).equals(record2.get(deltaFieldName))) { continue; } return ((Comparable) record1.get(deltaFieldName)).compareTo(record2.get(deltaFieldName)); } return 0; }
Example 2
Source File: TransformPhaseJob.java From incubator-pinot with Apache License 2.0 | 5 votes |
@Override public void reduce(IntWritable key, Iterable<AvroValue<GenericRecord>> values, Context context) throws IOException, InterruptedException { for (AvroValue<GenericRecord> value : values) { GenericRecord record = value.datum(); context.write(new AvroKey<GenericRecord>(record), NullWritable.get()); } }
Example 3
Source File: KeyDedupReducerTest.java From incubator-gobblin with Apache License 2.0 | 4 votes |
@Test public void testAvroReduce() throws IOException, InterruptedException { Schema keySchema = new Schema.Parser().parse(AVRO_KEY_SCHEMA); GenericRecordBuilder keyRecordBuilder = new GenericRecordBuilder(keySchema.getField("key").schema()); keyRecordBuilder.set("partitionKey", 1); keyRecordBuilder.set("environment", "test"); keyRecordBuilder.set("subKey", "2"); GenericRecord record = keyRecordBuilder.build(); keyRecordBuilder = new GenericRecordBuilder(keySchema); keyRecordBuilder.set("key", record); GenericRecord keyRecord = keyRecordBuilder.build(); // Test reducer with delta field "scn" Schema fullSchema = new Schema.Parser().parse(AVRO_FULL_SCHEMA); AvroValue<GenericRecord> fullRecord1 = new AvroValue<>(); AvroValue<GenericRecord> fullRecord2 = new AvroValue<>(); AvroValue<GenericRecord> fullRecord3 = new AvroValue<>(); AvroValue<GenericRecord> fullRecord4 = new AvroValue<>(); GenericRecordBuilder fullRecordBuilder1 = new GenericRecordBuilder(fullSchema); fullRecordBuilder1.set("key", record); fullRecordBuilder1.set("scn", 123); fullRecordBuilder1.set("scn2", 100); fullRecord1.datum(fullRecordBuilder1.build()); fullRecordBuilder1.set("scn", 125); fullRecordBuilder1.set("scn2", 1); fullRecord2.datum(fullRecordBuilder1.build()); fullRecordBuilder1.set("scn", 124); fullRecordBuilder1.set("scn2", 10); fullRecord3.datum(fullRecordBuilder1.build()); fullRecordBuilder1.set("scn", 122); fullRecordBuilder1.set("scn2", 1000); fullRecord4.datum(fullRecordBuilder1.build()); Configuration conf = mock(Configuration.class); when(conf.get(AvroKeyDedupReducer.DELTA_SCHEMA_PROVIDER)) .thenReturn(FieldAttributeBasedDeltaFieldsProvider.class.getName()); when(conf.get(FieldAttributeBasedDeltaFieldsProvider.ATTRIBUTE_FIELD)).thenReturn("attributes_json"); when(conf.get(FieldAttributeBasedDeltaFieldsProvider.DELTA_PROP_NAME, FieldAttributeBasedDeltaFieldsProvider.DEFAULT_DELTA_PROP_NAME)) .thenReturn(FieldAttributeBasedDeltaFieldsProvider.DEFAULT_DELTA_PROP_NAME); RecordKeyDedupReducerBase<AvroKey<GenericRecord>, AvroValue<GenericRecord>, AvroKey<GenericRecord>, NullWritable> reducer = new AvroKeyDedupReducer(); WrappedReducer.Context reducerContext = mock(WrappedReducer.Context.class); when(reducerContext.getConfiguration()).thenReturn(conf); Counter moreThan1Counter = new GenericCounter(); when(reducerContext.getCounter(RecordKeyDedupReducerBase.EVENT_COUNTER.MORE_THAN_1)).thenReturn(moreThan1Counter); Counter dedupedCounter = new GenericCounter(); when(reducerContext.getCounter(RecordKeyDedupReducerBase.EVENT_COUNTER.DEDUPED)).thenReturn(dedupedCounter); Counter recordCounter = new GenericCounter(); when(reducerContext.getCounter(RecordKeyDedupReducerBase.EVENT_COUNTER.RECORD_COUNT)).thenReturn(recordCounter); reducer.setup(reducerContext); doNothing().when(reducerContext).write(any(AvroKey.class), any(NullWritable.class)); List<AvroValue<GenericRecord>> valueIterable = Lists.newArrayList(fullRecord1, fullRecord2, fullRecord3, fullRecord4); AvroKey<GenericRecord> key = new AvroKey<>(); key.datum(keyRecord); reducer.reduce(key, valueIterable, reducerContext); Assert.assertEquals(reducer.getOutKey().datum(), fullRecord2.datum()); // Test reducer without delta field Configuration conf2 = mock(Configuration.class); when(conf2.get(AvroKeyDedupReducer.DELTA_SCHEMA_PROVIDER)).thenReturn(null); when(reducerContext.getConfiguration()).thenReturn(conf2); RecordKeyDedupReducerBase<AvroKey<GenericRecord>, AvroValue<GenericRecord>, AvroKey<GenericRecord>, NullWritable> reducer2 = new AvroKeyDedupReducer(); reducer2.setup(reducerContext); reducer2.reduce(key, valueIterable, reducerContext); Assert.assertEquals(reducer2.getOutKey().datum(), fullRecord1.datum()); // Test reducer with compound delta key. Schema fullSchema2 = new Schema.Parser().parse(AVRO_FULL_SCHEMA_WITH_TWO_DELTA_FIELDS); GenericRecordBuilder fullRecordBuilder2 = new GenericRecordBuilder(fullSchema2); fullRecordBuilder2.set("key", record); fullRecordBuilder2.set("scn", 123); fullRecordBuilder2.set("scn2", 100); fullRecord1.datum(fullRecordBuilder2.build()); fullRecordBuilder2.set("scn", 125); fullRecordBuilder2.set("scn2", 1000); fullRecord2.datum(fullRecordBuilder2.build()); fullRecordBuilder2.set("scn", 126); fullRecordBuilder2.set("scn2", 1000); fullRecord3.datum(fullRecordBuilder2.build()); fullRecordBuilder2.set("scn", 130); fullRecordBuilder2.set("scn2", 100); fullRecord4.datum(fullRecordBuilder2.build()); List<AvroValue<GenericRecord>> valueIterable2 = Lists.newArrayList(fullRecord1, fullRecord2, fullRecord3, fullRecord4); reducer.reduce(key, valueIterable2, reducerContext); Assert.assertEquals(reducer.getOutKey().datum(), fullRecord3.datum()); }
Example 4
Source File: GenericPartitioner.java From incubator-pinot with Apache License 2.0 | 4 votes |
@Override public int getPartition(T genericRecordAvroKey, AvroValue<GenericRecord> genericRecordAvroValue, int numPartitions) { final GenericRecord inputRecord = genericRecordAvroValue.datum(); final Object partitionColumnValue = inputRecord.get(_partitionColumn); return _partitionFunction.getPartition(partitionColumnValue); }