org.apache.parquet.io.api.RecordMaterializer Java Examples
The following examples show how to use
org.apache.parquet.io.api.RecordMaterializer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestFiltered.java From parquet-mr with Apache License 2.0 | 6 votes |
@Test public void testPaged() { MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema); MemPageStore memPageStore = writeTestRecords(columnIO, 6); RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema); RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(page(4, 4))); List<Group> all = readAll(recordReader); assertEquals("expecting records " + all, 4, all.size()); for (int i = 0; i < all.size(); i++) { assertEquals("expecting record", (i%2 == 0 ? r2 : r1).toString(), all.get(i).toString()); } }
Example #2
Source File: TupleConsumerPerfTest.java From parquet-mr with Apache License 2.0 | 6 votes |
private static void read(PageReadStore columns, String pigSchemaString, String message) throws ParserException { System.out.println(message); MessageColumnIO columnIO = newColumnFactory(pigSchemaString); TupleReadSupport tupleReadSupport = new TupleReadSupport(); Map<String, String> pigMetaData = pigMetaData(pigSchemaString); MessageType schema = new PigSchemaConverter().convert(Utils.getSchemaFromString(pigSchemaString)); ReadContext init = tupleReadSupport.init(null, pigMetaData, schema); RecordMaterializer<Tuple> recordConsumer = tupleReadSupport.prepareForRead(null, pigMetaData, schema, init); RecordReader<Tuple> recordReader = columnIO.getRecordReader(columns, recordConsumer); // TODO: put this back // if (DEBUG) { // recordConsumer = new RecordConsumerLoggingWrapper(recordConsumer); // } read(recordReader, 10000, pigSchemaString); read(recordReader, 10000, pigSchemaString); read(recordReader, 10000, pigSchemaString); read(recordReader, 10000, pigSchemaString); read(recordReader, 10000, pigSchemaString); read(recordReader, 100000, pigSchemaString); read(recordReader, 1000000, pigSchemaString); System.out.println(); }
Example #3
Source File: TestFiltered.java From parquet-mr with Apache License 2.0 | 6 votes |
@Test public void testFilteredAndPaged() { MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema); MemPageStore memPageStore = writeTestRecords(columnIO, 8); RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema); RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(and(column("DocId", equalTo(10l)), page(2, 4)))); List<Group> all = readAll(recordReader); assertEquals("expecting 4 records " + all, 4, all.size()); for (int i = 0; i < all.size(); i++) { assertEquals("expecting record1", r1.toString(), all.get(i).toString()); } }
Example #4
Source File: TestFiltered.java From parquet-mr with Apache License 2.0 | 6 votes |
@Test public void testFilteredOrPaged() { MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema); MemPageStore memPageStore = writeTestRecords(columnIO, 8); RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema); RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(or(column("DocId", equalTo(10l)), column("DocId", equalTo(20l))))); List<Group> all = readAll(recordReader); assertEquals("expecting 8 records " + all, 16, all.size()); for (int i = 0; i < all.size () / 2; i++) { assertEquals("expecting record1", r1.toString(), all.get(2 * i).toString()); assertEquals("expecting record2", r2.toString(), all.get(2 * i + 1).toString()); } }
Example #5
Source File: TestFiltered.java From parquet-mr with Apache License 2.0 | 6 votes |
@Test public void testApplyFunctionFilterOnLong() { MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema); MemPageStore memPageStore = writeTestRecords(columnIO, 1); // Get first record RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema); RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("DocId", equalTo(10l)))); readOne(recordReader, "r2 filtered out", r1); // Get second record recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("DocId", applyFunctionToLong(new LongGreaterThan15Predicate())))); readOne(recordReader, "r1 filtered out", r2); }
Example #6
Source File: TestTupleRecordConsumer.java From parquet-mr with Apache License 2.0 | 6 votes |
private void testFromTuple(String pigSchemaString, List<Tuple> input) throws Exception { List<Tuple> tuples = new ArrayList<Tuple>(); RecordMaterializer<Tuple> recordConsumer = newPigRecordConsumer(pigSchemaString); TupleWriteSupport tupleWriter = newTupleWriter(pigSchemaString, recordConsumer); for (Tuple tuple : input) { LOG.debug("{}", tuple); tupleWriter.write(tuple); tuples.add(recordConsumer.getCurrentRecord()); } assertEquals(input.size(), tuples.size()); for (int i = 0; i < input.size(); i++) { Tuple in = input.get(i); Tuple out = tuples.get(i); assertEquals(in.toString(), out.toString()); } }
Example #7
Source File: FilteringRecordMaterializer.java From parquet-mr with Apache License 2.0 | 6 votes |
public FilteringRecordMaterializer( RecordMaterializer<T> delegate, List<PrimitiveColumnIO> columnIOs, Map<ColumnPath, List<ValueInspector>> valueInspectorsByColumn, IncrementallyUpdatedFilterPredicate filterPredicate) { Objects.requireNonNull(columnIOs, "columnIOs cannot be null"); Objects.requireNonNull(valueInspectorsByColumn, "valueInspectorsByColumn cannot be null"); this.filterPredicate = Objects.requireNonNull(filterPredicate, "filterPredicate cannot be null"); this.delegate = Objects.requireNonNull(delegate, "delegate cannot be null"); // keep track of which path of indices leads to which primitive column Map<List<Integer>, PrimitiveColumnIO> columnIOsByIndexFieldPath = new HashMap<>(); for (PrimitiveColumnIO c : columnIOs) { List<Integer> indexFieldPath = Arrays.stream(c.getIndexFieldPath()) .boxed().collect(Collectors.toList()); columnIOsByIndexFieldPath.put(indexFieldPath, c); } // create a proxy for the delegate's root converter this.rootConverter = new FilteringGroupConverter( delegate.getRootConverter(), Collections.emptyList(), valueInspectorsByColumn, columnIOsByIndexFieldPath); }
Example #8
Source File: TupleReadSupport.java From parquet-mr with Apache License 2.0 | 6 votes |
@Override public RecordMaterializer<Tuple> prepareForRead( Configuration configuration, Map<String, String> keyValueMetaData, MessageType fileSchema, ReadContext readContext) { MessageType requestedSchema = readContext.getRequestedSchema(); Schema requestedPigSchema = getPigSchema(configuration); if (requestedPigSchema == null) { throw new ParquetDecodingException("Missing Pig schema: ParquetLoader sets the schema in the job conf"); } boolean elephantBirdCompatible = configuration.getBoolean(PARQUET_PIG_ELEPHANT_BIRD_COMPATIBLE, false); boolean columnIndexAccess = configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false); if (elephantBirdCompatible) { LOG.info("Numbers will default to 0 instead of NULL; Boolean will be converted to Int"); } return new TupleRecordMaterializer(requestedSchema, requestedPigSchema, elephantBirdCompatible, columnIndexAccess); }
Example #9
Source File: ProtoReadSupport.java From parquet-mr with Apache License 2.0 | 6 votes |
@Override public RecordMaterializer<T> prepareForRead(Configuration configuration, Map<String, String> keyValueMetaData, MessageType fileSchema, ReadContext readContext) { String headerProtoClass = keyValueMetaData.get(PB_CLASS); String configuredProtoClass = configuration.get(PB_CLASS); if (configuredProtoClass != null) { LOG.debug("Replacing class " + headerProtoClass + " by " + configuredProtoClass); headerProtoClass = configuredProtoClass; } if (headerProtoClass == null) { throw new RuntimeException("I Need parameter " + PB_CLASS + " with Protocol Buffer class"); } LOG.debug("Reading data with Protocol Buffer class {}", headerProtoClass); MessageType requestedSchema = readContext.getRequestedSchema(); Class<? extends Message> protobufClass = Protobufs.getProtobufClass(headerProtoClass); return new ProtoRecordMaterializer(requestedSchema, protobufClass); }
Example #10
Source File: TestFiltered.java From parquet-mr with Apache License 2.0 | 6 votes |
@Test public void testFilterOnInteger() { MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema); MemPageStore memPageStore = writeTestRecords(columnIO, 1); // Get first record RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema); RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("DocId", equalTo(10l)))); readOne(recordReader, "r2 filtered out", r1); // Get second record recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("DocId", equalTo(20l)))); readOne(recordReader, "r1 filtered out", r2); }
Example #11
Source File: TestFiltered.java From parquet-mr with Apache License 2.0 | 6 votes |
@Test public void testFilteredNotPaged() { MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema); MemPageStore memPageStore = writeTestRecords(columnIO, 8); RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema); RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(not(column("DocId", equalTo(10l))))); List<Group> all = readAll(recordReader); assertEquals("expecting 8 records " + all, 8, all.size()); for (int i = 0; i < all.size(); i++) { assertEquals("expecting record2", r2.toString(), all.get(i).toString()); } }
Example #12
Source File: ThriftReadSupport.java From parquet-mr with Apache License 2.0 | 6 votes |
@Override public RecordMaterializer<T> prepareForRead(Configuration configuration, Map<String, String> keyValueMetaData, MessageType fileSchema, org.apache.parquet.hadoop.api.ReadSupport.ReadContext readContext) { ThriftMetaData thriftMetaData = ThriftMetaData.fromExtraMetaData(keyValueMetaData); try { initThriftClass(thriftMetaData, configuration); } catch (ClassNotFoundException e) { throw new RuntimeException("Cannot find Thrift object class for metadata: " + thriftMetaData, e); } // if there was not metadata in the file, get it from requested class if (thriftMetaData == null) { thriftMetaData = ThriftMetaData.fromThriftClass(thriftClass); } String converterClassName = configuration.get(RECORD_CONVERTER_CLASS_KEY, RECORD_CONVERTER_DEFAULT); return getRecordConverterInstance(converterClassName, thriftClass, readContext.getRequestedSchema(), thriftMetaData.getDescriptor(), configuration); }
Example #13
Source File: PerfTest.java From parquet-mr with Apache License 2.0 | 6 votes |
private static void read(MemPageStore memPageStore, MessageType myschema, String message) { MessageColumnIO columnIO = newColumnFactory(myschema); System.out.println(message); RecordMaterializer<Object> recordConsumer = new DummyRecordConverter(myschema); RecordReader<Object> recordReader = columnIO.getRecordReader(memPageStore, recordConsumer); read(recordReader, 2, myschema); read(recordReader, 10000, myschema); read(recordReader, 10000, myschema); read(recordReader, 10000, myschema); read(recordReader, 10000, myschema); read(recordReader, 10000, myschema); read(recordReader, 100000, myschema); read(recordReader, 1000000, myschema); System.out.println(); }
Example #14
Source File: DataWritableReadSupport.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * * It creates the hive read support to interpret data from parquet to hive * * @param configuration // unused * @param keyValueMetaData string map of metadata * @param fileSchema // unused * @param readContext containing the requested schema and the schema of the hive table * @return Record Materialize for Hive */ @Override public RecordMaterializer<ArrayWritable> prepareForRead(final Configuration configuration, final Map<String, String> keyValueMetaData, final MessageType fileSchema, final org.apache.parquet.hadoop.api.ReadSupport.ReadContext readContext) { final Map<String, String> metadata = readContext.getReadSupportMetadata(); if (metadata == null) { throw new IllegalStateException("ReadContext not initialized properly. " + "Don't know the Hive Schema."); } final MessageType tableSchema = resolveSchemaAccess(MessageTypeParser. parseMessageType(metadata.get(HIVE_SCHEMA_KEY)), fileSchema, configuration); return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema); }
Example #15
Source File: DelegatingReadSupport.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public RecordMaterializer<T> prepareForRead( Configuration configuration, Map<String, String> keyValueMetaData, MessageType fileSchema, ReadSupport.ReadContext readContext) { return delegate.prepareForRead(configuration, keyValueMetaData, fileSchema, readContext); }
Example #16
Source File: TupleReadSupport.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public RecordMaterializer<Tuple> prepareForRead( Configuration configuration, Map<String, String> keyValueMetaData, MessageType fileSchema, ReadContext readContext) { MessageType requestedSchema = readContext.getRequestedSchema(); return new TupleRecordMaterializer(requestedSchema); }
Example #17
Source File: ParquetReadSupport.java From iceberg with Apache License 2.0 | 5 votes |
@Override public RecordMaterializer<T> prepareForRead(Configuration configuration, Map<String, String> fileMetadata, MessageType fileMessageType, ReadContext readContext) { // This is the type created in init that was based on the file's schema. The schema that this // will pass to the wrapped ReadSupport needs to match the expected schema's names. Rather than // renaming the file's schema, convert the expected schema to Parquet. This relies on writing // files with the correct schema. // TODO: this breaks when columns are reordered. MessageType readSchema = ParquetSchemaUtil.convert(expectedSchema, fileMessageType.getName()); return wrapped.prepareForRead(configuration, fileMetadata, readSchema, readContext); }
Example #18
Source File: TestTupleRecordConsumer.java From parquet-mr with Apache License 2.0 | 5 votes |
private void testFromGroups(String pigSchemaString, List<Group> input) throws ParserException { List<Tuple> tuples = new ArrayList<Tuple>(); MessageType schema = getMessageType(pigSchemaString); RecordMaterializer<Tuple> pigRecordConsumer = newPigRecordConsumer(pigSchemaString); GroupWriter groupWriter = new GroupWriter(new RecordConsumerLoggingWrapper(new ConverterConsumer(pigRecordConsumer.getRootConverter(), schema)), schema); for (Group group : input) { groupWriter.write(group); final Tuple tuple = pigRecordConsumer.getCurrentRecord(); tuples.add(tuple); LOG.debug("in: {}\nout:{}", group, tuple); } List<Group> groups = new ArrayList<Group>(); GroupRecordConverter recordConsumer = new GroupRecordConverter(schema); TupleWriteSupport tupleWriter = newTupleWriter(pigSchemaString, recordConsumer); for (Tuple t : tuples) { LOG.debug("{}", t); tupleWriter.write(t); groups.add(recordConsumer.getCurrentRecord()); } assertEquals(input.size(), groups.size()); for (int i = 0; i < input.size(); i++) { Group in = input.get(i); LOG.debug("{}", in); Group out = groups.get(i); assertEquals(in.toString(), out.toString()); } }
Example #19
Source File: TestTupleRecordConsumer.java From parquet-mr with Apache License 2.0 | 5 votes |
private <T> TupleWriteSupport newTupleWriter(String pigSchemaString, RecordMaterializer<T> recordConsumer) throws ParserException { TupleWriteSupport tupleWriter = TupleWriteSupport.fromPigSchema(pigSchemaString); tupleWriter.init(null); tupleWriter.prepareForWrite( new ConverterConsumer(recordConsumer.getRootConverter(), tupleWriter.getParquetSchema()) ); return tupleWriter; }
Example #20
Source File: TestTupleRecordConsumer.java From parquet-mr with Apache License 2.0 | 5 votes |
private RecordMaterializer<Tuple> newPigRecordConsumer(String pigSchemaString) throws ParserException { TupleReadSupport tupleReadSupport = new TupleReadSupport(); final Configuration configuration = new Configuration(false); MessageType parquetSchema = getMessageType(pigSchemaString); final Map<String, String> pigMetaData = pigMetaData(pigSchemaString); Map<String, Set<String>> globalMetaData = new HashMap<String, Set<String>>(); for (Entry<String, String> entry : pigMetaData.entrySet()) { globalMetaData.put(entry.getKey(), new HashSet<String>(Arrays.asList(entry.getValue()))); } configuration.set(PARQUET_PIG_SCHEMA, pigSchemaString); final ReadContext init = tupleReadSupport.init(new InitContext(configuration, globalMetaData, parquetSchema)); return tupleReadSupport.prepareForRead(configuration, pigMetaData, parquetSchema, init); }
Example #21
Source File: AvroReadSupport.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public RecordMaterializer<T> prepareForRead( Configuration configuration, Map<String, String> keyValueMetaData, MessageType fileSchema, ReadContext readContext) { Map<String, String> metadata = readContext.getReadSupportMetadata(); MessageType parquetSchema = readContext.getRequestedSchema(); Schema avroSchema; if (metadata.get(AVRO_READ_SCHEMA_METADATA_KEY) != null) { // use the Avro read schema provided by the user avroSchema = new Schema.Parser().parse(metadata.get(AVRO_READ_SCHEMA_METADATA_KEY)); } else if (keyValueMetaData.get(AVRO_SCHEMA_METADATA_KEY) != null) { // use the Avro schema from the file metadata if present avroSchema = new Schema.Parser().parse(keyValueMetaData.get(AVRO_SCHEMA_METADATA_KEY)); } else if (keyValueMetaData.get(OLD_AVRO_SCHEMA_METADATA_KEY) != null) { // use the Avro schema from the file metadata if present avroSchema = new Schema.Parser().parse(keyValueMetaData.get(OLD_AVRO_SCHEMA_METADATA_KEY)); } else { // default to converting the Parquet schema into an Avro schema avroSchema = new AvroSchemaConverter(configuration).convert(parquetSchema); } GenericData model = getDataModel(configuration); String compatEnabled = metadata.get(AvroReadSupport.AVRO_COMPATIBILITY); if (compatEnabled != null && Boolean.valueOf(compatEnabled)) { return newCompatMaterializer(parquetSchema, avroSchema, model); } return new AvroRecordMaterializer<T>(parquetSchema, avroSchema, model); }
Example #22
Source File: TestFiltered.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testFilterOnString() { MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema); MemPageStore memPageStore = writeTestRecords(columnIO, 1); // First try matching against the A url in record 1 RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema); RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", equalTo("http://A")))); readOne(recordReader, "r2 filtered out", r1); // Second try matching against the B url in record 1 - it should fail as we only match // against the first instance of a recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", equalTo("http://B")))); List<Group> all = readAll(recordReader); assertEquals("There should be no matching records: " + all , 0, all.size()); // Finally try matching against the C url in record 2 recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", equalTo("http://C")))); readOne(recordReader, "r1 filtered out", r2); }
Example #23
Source File: TestFiltered.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testApplyFunctionFilterOnString() { MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema); MemPageStore memPageStore = writeTestRecords(columnIO, 1); // First try matching against the A url in record 1 RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema); RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", applyFunctionToString(new StringEndsWithAPredicate())))); readOne(recordReader, "r2 filtered out", r1); // Second try matching against the B url in record 1 - it should fail as we only match // against the first instance of a recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", equalTo("http://B")))); List<Group> all = readAll(recordReader); assertEquals("There should be no matching records: " + all , 0, all.size()); // Finally try matching against the C url in record 2 recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", equalTo("http://C")))); readOne(recordReader, "r1 filtered out", r2); }
Example #24
Source File: FilteredRecordReader.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * @param root the root of the schema * @param validating * @param columnStore * @param unboundFilter Filter records, pass in NULL_FILTER to leave unfiltered. */ public FilteredRecordReader(MessageColumnIO root, RecordMaterializer<T> recordMaterializer, boolean validating, ColumnReadStoreImpl columnStore, UnboundRecordFilter unboundFilter, long recordCount) { super(root, recordMaterializer, validating, columnStore); this.recordCount = recordCount; if ( unboundFilter != null ) { recordFilter = unboundFilter.bind(getColumnReaders()); } else { recordFilter = null; } }
Example #25
Source File: ParquetReadSupport.java From iceberg with Apache License 2.0 | 5 votes |
@Override public RecordMaterializer<T> prepareForRead(Configuration configuration, Map<String, String> fileMetadata, MessageType fileMessageType, ReadContext readContext) { // This is the type created in init that was based on the file's schema. The schema that this // will pass to the wrapped ReadSupport needs to match the expected schema's names. Rather than // renaming the file's schema, convert the expected schema to Parquet. This relies on writing // files with the correct schema. // TODO: this breaks when columns are reordered. MessageType readSchema = ParquetSchemaUtil.convert(expectedSchema, fileMessageType.getName()); return wrapped.prepareForRead(configuration, fileMetadata, readSchema, readContext); }
Example #26
Source File: TajoReadSupport.java From tajo with Apache License 2.0 | 5 votes |
/** * Prepares for read. * * @param configuration The job configuration. * @param keyValueMetaData App-specific metadata from the file. * @param fileSchema The schema of the Parquet file. * @param readContext Returned by the init method. */ @Override public RecordMaterializer<Tuple> prepareForRead( Configuration configuration, Map<String, String> keyValueMetaData, MessageType fileSchema, ReadContext readContext) { MessageType parquetRequestedSchema = readContext.getRequestedSchema(); return new TajoRecordMaterializer(parquetRequestedSchema, requestedSchema, readSchema); }
Example #27
Source File: PentahoParquetReadSupport.java From pentaho-hadoop-shims with Apache License 2.0 | 4 votes |
@Override public RecordMaterializer<RowMetaAndData> prepareForRead( Configuration configuration, Map<String, String> keyValueMetaData, MessageType fileSchema, ReadContext readContext ) { return new ParquetConverter.MyRecordMaterializer( converter ); }
Example #28
Source File: GroupReadSupport.java From iow-hadoop-streaming with Apache License 2.0 | 4 votes |
@Override public RecordMaterializer<Group> prepareForRead(Configuration configuration, Map<String, String> keyValueMetaData, MessageType fileSchema, ReadContext readContext) { return new GroupRecordConverter(readContext.getRequestedSchema()); }
Example #29
Source File: AvroReadSupport.java From parquet-mr with Apache License 2.0 | 4 votes |
@SuppressWarnings("unchecked") private static <T> RecordMaterializer<T> newCompatMaterializer( MessageType parquetSchema, Schema avroSchema, GenericData model) { return (RecordMaterializer<T>) new AvroCompatRecordMaterializer( parquetSchema, avroSchema, model); }
Example #30
Source File: RowReadSupport.java From flink with Apache License 2.0 | 4 votes |
@Override public RecordMaterializer<Row> prepareForRead( Configuration configuration, Map<String, String> keyValueMetaData, MessageType fileSchema, ReadContext readContext) { return new RowMaterializer(readContext.getRequestedSchema(), returnTypeInfo); }