org.apache.parquet.hadoop.api.ReadSupport Java Examples
The following examples show how to use
org.apache.parquet.hadoop.api.ReadSupport.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PentahoApacheInputFormat.java From pentaho-hadoop-shims with Apache License 2.0 | 6 votes |
@Override public IPentahoRecordReader createRecordReader( IPentahoInputSplit split ) throws Exception { return inClassloader( () -> { PentahoInputSplitImpl pentahoInputSplit = (PentahoInputSplitImpl) split; InputSplit inputSplit = pentahoInputSplit.getInputSplit(); ReadSupport<RowMetaAndData> readSupport = new PentahoParquetReadSupport(); ParquetRecordReader<RowMetaAndData> nativeRecordReader = new ParquetRecordReader<>( readSupport, ParquetInputFormat.getFilter( job .getConfiguration() ) ); TaskAttemptContextImpl task = new TaskAttemptContextImpl( job.getConfiguration(), new TaskAttemptID() ); nativeRecordReader.initialize( inputSplit, task ); return new PentahoParquetRecordReader( nativeRecordReader ); } ); }
Example #2
Source File: ParquetReader.java From tajo with Apache License 2.0 | 6 votes |
private ParquetReader(Configuration conf, Path file, ReadSupport<T> readSupport, Filter filter) throws IOException { this.readSupport = readSupport; this.filter = checkNotNull(filter, "filter"); this.conf = conf; FileSystem fs = file.getFileSystem(conf); List<FileStatus> statuses = Arrays.asList(fs.listStatus(file, HiddenFileFilter.INSTANCE)); List<Footer> footers = ParquetFileReader.readAllFootersInParallelUsingSummaryFiles(conf, statuses, false); this.footersIterator = footers.iterator(); for (Footer footer : footers) { for(BlockMetaData block : footer.getParquetMetadata().getBlocks()) { totalRowCount += block.getRowCount(); } } }
Example #3
Source File: InternalParquetRecordReader.java From tajo with Apache License 2.0 | 6 votes |
public void initialize(FileMetaData parquetFileMetadata, Path file, List<BlockMetaData> blocks, Configuration configuration) throws IOException { // initialize a ReadContext for this file Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData(); ReadSupport.ReadContext readContext = readSupport.init(new InitContext( configuration, toSetMultiMap(fileMetadata), fileSchema)); this.columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy()); this.requestedSchema = readContext.getRequestedSchema(); this.fileSchema = parquetFileMetadata.getSchema(); this.file = file; this.columnCount = requestedSchema.getPaths().size(); this.recordConverter = readSupport.prepareForRead( configuration, fileMetadata, fileSchema, readContext); this.strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true); List<ColumnDescriptor> columns = requestedSchema.getColumns(); reader = new ParquetFileReader(configuration, parquetFileMetadata, file, blocks, columns); for (BlockMetaData block : blocks) { total += block.getRowCount(); } this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(configuration, total); LOG.info("RecordReader initialized will read a total of " + total + " records."); }
Example #4
Source File: ThriftReadSupport.java From parquet-mr with Apache License 2.0 | 6 votes |
@Override public RecordMaterializer<T> prepareForRead(Configuration configuration, Map<String, String> keyValueMetaData, MessageType fileSchema, org.apache.parquet.hadoop.api.ReadSupport.ReadContext readContext) { ThriftMetaData thriftMetaData = ThriftMetaData.fromExtraMetaData(keyValueMetaData); try { initThriftClass(thriftMetaData, configuration); } catch (ClassNotFoundException e) { throw new RuntimeException("Cannot find Thrift object class for metadata: " + thriftMetaData, e); } // if there was not metadata in the file, get it from requested class if (thriftMetaData == null) { thriftMetaData = ThriftMetaData.fromThriftClass(thriftClass); } String converterClassName = configuration.get(RECORD_CONVERTER_CLASS_KEY, RECORD_CONVERTER_DEFAULT); return getRecordConverterInstance(converterClassName, thriftClass, readContext.getRequestedSchema(), thriftMetaData.getDescriptor(), configuration); }
Example #5
Source File: InternalParquetRecordReader.java From parquet-mr with Apache License 2.0 | 6 votes |
public void initialize(ParquetFileReader reader, Configuration configuration) throws IOException { // initialize a ReadContext for this file this.reader = reader; FileMetaData parquetFileMetadata = reader.getFooter().getFileMetaData(); this.fileSchema = parquetFileMetadata.getSchema(); Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData(); ReadSupport.ReadContext readContext = readSupport.init(new InitContext( configuration, toSetMultiMap(fileMetadata), fileSchema)); this.columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy()); this.requestedSchema = readContext.getRequestedSchema(); this.columnCount = requestedSchema.getPaths().size(); // Setting the projection schema before running any filtering (e.g. getting filtered record count) // because projection impacts filtering reader.setRequestedSchema(requestedSchema); this.recordConverter = readSupport.prepareForRead( configuration, fileMetadata, fileSchema, readContext); this.strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true); this.total = reader.getFilteredRecordCount(); this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(configuration, total); this.filterRecords = configuration.getBoolean(RECORD_FILTERING_ENABLED, true); LOG.info("RecordReader initialized will read a total of {} records.", total); }
Example #6
Source File: GroupReadSupport.java From iow-hadoop-streaming with Apache License 2.0 | 6 votes |
@Override public ReadContext init( Configuration configuration, Map<String, String> keyValueMetaData, MessageType fileSchema) { String partialSchemaString; String partialSchemaFile = configuration.get(PARQUET_READ_SCHEMA_FILE, ""); if (!partialSchemaFile.isEmpty()) { StringBuilder r = new StringBuilder(); try { BufferedReader br = new BufferedReader(new FileReader(new File(partialSchemaFile))); String line; while ((line = br.readLine()) != null) r.append(line); } catch (Exception e) { throw new RuntimeException("Can't read schema from file " + partialSchemaFile + ": " + e.getMessage()); } partialSchemaString = r.toString(); } else partialSchemaString = configuration.get(ReadSupport.PARQUET_READ_SCHEMA); return new ReadContext(getSchemaForRead(fileSchema, partialSchemaString)); }
Example #7
Source File: GroupReadSupportTest.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testInitWithPartialSchema() { GroupReadSupport s = new GroupReadSupport(); Configuration configuration = new Configuration(); Map<String, String> keyValueMetaData = new HashMap<String, String>(); MessageType fileSchema = MessageTypeParser.parseMessageType(fullSchemaStr); MessageType partialSchema = MessageTypeParser.parseMessageType(partialSchemaStr); configuration.set(ReadSupport.PARQUET_READ_SCHEMA, partialSchemaStr); ReadSupport.ReadContext context = s.init(configuration, keyValueMetaData, fileSchema); assertEquals(context.getRequestedSchema(), partialSchema); }
Example #8
Source File: ParquetAsTextInputFormat.java From iow-hadoop-streaming with Apache License 2.0 | 5 votes |
public TextRecordReaderWrapper(ParquetInputFormat<SimpleGroup> newInputFormat, InputSplit oldSplit, JobConf oldJobConf, Reporter reporter) throws IOException { splitLen = oldSplit.getLength(); try { ReadSupport<SimpleGroup> rs = ParquetInputFormat.getReadSupportInstance(oldJobConf); realReader = new ParquetRecordReader<>(rs); realReader.initialize(((StreamingParquetInputSplitWrapper)oldSplit).realSplit, oldJobConf, reporter); oldJobConf.set("map.input.file",((StreamingParquetInputSplitWrapper)oldSplit).realSplit.getPath().toString()); oldJobConf.set("mapreduce.map.input.file",((StreamingParquetInputSplitWrapper)oldSplit).realSplit.getPath().toString()); // read once to gain access to key and value objects if (realReader.nextKeyValue()) { firstRecord = true; valueContainer = new Container<>(); SimpleGroup v = realReader.getCurrentValue(); valueContainer.set(v); ls = groupToStrings(v); } else { eof = true; } } catch (InterruptedException e) { Thread.interrupted(); throw new IOException(e); } }
Example #9
Source File: ThriftReadSupport.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init(InitContext context) { final Configuration configuration = context.getConfiguration(); final MessageType fileMessageType = context.getFileSchema(); MessageType requestedProjection = fileMessageType; String partialSchemaString = configuration.get(ReadSupport.PARQUET_READ_SCHEMA); FieldProjectionFilter projectionFilter = getFieldProjectionFilter(configuration); if (partialSchemaString != null && projectionFilter != null) { throw new ThriftProjectionException( String.format("You cannot provide both a partial schema and field projection filter." + "Only one of (%s, %s, %s) should be set.", PARQUET_READ_SCHEMA, STRICT_THRIFT_COLUMN_FILTER_KEY, THRIFT_COLUMN_FILTER_KEY)); } //set requestedProjections only when it's specified if (partialSchemaString != null) { requestedProjection = getSchemaForRead(fileMessageType, partialSchemaString); } else if (projectionFilter != null) { try { initThriftClassFromMultipleFiles(context.getKeyValueMetadata(), configuration); requestedProjection = getProjectedSchema(projectionFilter); } catch (ClassNotFoundException e) { throw new ThriftProjectionException("can not find thriftClass from configuration", e); } } MessageType schemaForRead = getSchemaForRead(fileMessageType, requestedProjection); return new ReadContext(schemaForRead); }
Example #10
Source File: ThriftParquetReader.java From parquet-mr with Apache License 2.0 | 5 votes |
public ParquetReader<T> build() throws IOException { ReadSupport<T> readSupport; if (thriftClass != null) { readSupport = new ThriftReadSupport<T>(thriftClass); } else { readSupport = new ThriftReadSupport<T>(); } return ParquetReader.builder(readSupport, file).withConf(conf).withFilter(filter).build(); }
Example #11
Source File: ParquetRecordReader.java From flink with Apache License 2.0 | 5 votes |
public void initialize(ParquetFileReader reader, Configuration configuration) { this.reader = reader; FileMetaData parquetFileMetadata = reader.getFooter().getFileMetaData(); // real schema of parquet file this.fileSchema = parquetFileMetadata.getSchema(); Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData(); ReadSupport.ReadContext readContext = readSupport.init(new InitContext( configuration, toSetMultiMap(fileMetadata), readSchema)); this.columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy()); this.recordMaterializer = readSupport.prepareForRead( configuration, fileMetadata, readSchema, readContext); this.numTotalRecords = reader.getRecordCount(); }
Example #12
Source File: TestParquetToThriftReadWriteAndProjection.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testThriftOptionalFieldsWithReadProjectionUsingParquetSchema() throws Exception { // test with projection Configuration conf = new Configuration(); final String readProjectionSchema = "message AddressBook {\n" + " optional group persons {\n" + " repeated group persons_tuple {\n" + " required group name {\n" + " optional binary first_name;\n" + " optional binary last_name;\n" + " }\n" + " optional int32 id;\n" + " }\n" + " }\n" + "}"; conf.set(ReadSupport.PARQUET_READ_SCHEMA, readProjectionSchema); TBase toWrite = new AddressBook( Arrays.asList( new Person( new Name("Bob", "Roberts"), 0, "bob.roberts@example.com", Arrays.asList(new PhoneNumber("1234567890"))))); TBase toRead = new AddressBook( Arrays.asList( new Person( new Name("Bob", "Roberts"), 0, null, null))); shouldDoProjection(conf, toWrite, toRead, AddressBook.class); }
Example #13
Source File: TestColumnIndexFiltering.java From parquet-mr with Apache License 2.0 | 5 votes |
private List<User> readUsersWithProjection(Filter filter, MessageType schema, boolean useOtherFiltering, boolean useColumnIndexFilter) throws IOException { return PhoneBookWriter.readUsers(ParquetReader.builder(new GroupReadSupport(), file) .withFilter(filter) .useDictionaryFilter(useOtherFiltering) .useStatsFilter(useOtherFiltering) .useRecordFilter(useOtherFiltering) .useColumnIndexFilter(useColumnIndexFilter) .set(ReadSupport.PARQUET_READ_SCHEMA, schema.toString())); }
Example #14
Source File: DataWritableReadSupport.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * * It creates the readContext for Parquet side with the requested schema during the init phase. * * @param configuration needed to get the wanted columns * @param keyValueMetaData // unused * @param fileSchema parquet file schema * @return the parquet ReadContext */ @Override public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init(final Configuration configuration, final Map<String, String> keyValueMetaData, final MessageType fileSchema) { final String columns = configuration.get(IOConstants.COLUMNS); final Map<String, String> contextMetadata = new HashMap<String, String>(); if (columns != null) { final List<String> listColumns = getColumns(columns); final List<Type> typeListTable = new ArrayList<Type>(); for (final String col : listColumns) { // listColumns contains partition columns which are metadata only if (fileSchema.containsField(col)) { typeListTable.add(fileSchema.getType(col)); } else { // below allows schema evolution typeListTable.add(new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, col)); } } MessageType tableSchema = new MessageType(TABLE_SCHEMA, typeListTable); contextMetadata.put(HIVE_SCHEMA_KEY, tableSchema.toString()); MessageType requestedSchemaByUser = tableSchema; final List<Integer> indexColumnsWanted = ColumnProjectionUtils.getReadColumnIDs(configuration); final List<Type> typeListWanted = new ArrayList<Type>(); for (final Integer idx : indexColumnsWanted) { typeListWanted.add(tableSchema.getType(listColumns.get(idx))); } requestedSchemaByUser = resolveSchemaAccess(new MessageType(fileSchema.getName(), typeListWanted), fileSchema, configuration); return new ReadContext(requestedSchemaByUser, contextMetadata); } else { contextMetadata.put(HIVE_SCHEMA_KEY, fileSchema.toString()); return new ReadContext(fileSchema, contextMetadata); } }
Example #15
Source File: AvroParquetReader.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override protected ReadSupport<T> getReadSupport() { if (isReflect) { conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, false); } else { conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, enableCompatibility); } return new AvroReadSupport<T>(model); }
Example #16
Source File: DataWritableReadSupport.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * * It creates the hive read support to interpret data from parquet to hive * * @param configuration // unused * @param keyValueMetaData string map of metadata * @param fileSchema // unused * @param readContext containing the requested schema and the schema of the hive table * @return Record Materialize for Hive */ @Override public RecordMaterializer<ArrayWritable> prepareForRead(final Configuration configuration, final Map<String, String> keyValueMetaData, final MessageType fileSchema, final org.apache.parquet.hadoop.api.ReadSupport.ReadContext readContext) { final Map<String, String> metadata = readContext.getReadSupportMetadata(); if (metadata == null) { throw new IllegalStateException("ReadContext not initialized properly. " + "Don't know the Hive Schema."); } final MessageType tableSchema = resolveSchemaAccess(MessageTypeParser. parseMessageType(metadata.get(HIVE_SCHEMA_KEY)), fileSchema, configuration); return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema); }
Example #17
Source File: DeprecatedInputFormatTest.java From parquet-mr with Apache License 2.0 | 5 votes |
private void runMapReduceJob(CompressionCodecName codec) throws IOException, ClassNotFoundException, InterruptedException { final FileSystem fileSystem = parquetPath.getFileSystem(conf); fileSystem.delete(parquetPath, true); fileSystem.delete(outputPath, true); { writeJob = new Job(conf, "write"); TextInputFormat.addInputPath(writeJob, inputPath); writeJob.setInputFormatClass(TextInputFormat.class); writeJob.setNumReduceTasks(0); ExampleOutputFormat.setCompression(writeJob, codec); ExampleOutputFormat.setOutputPath(writeJob, parquetPath); writeJob.setOutputFormatClass(ExampleOutputFormat.class); writeJob.setMapperClass(ReadMapper.class); ExampleOutputFormat.setSchema( writeJob, MessageTypeParser.parseMessageType( writeSchema)); writeJob.submit(); waitForJob(writeJob); } { jobConf.set(ReadSupport.PARQUET_READ_SCHEMA, readSchema); jobConf.set(ParquetInputFormat.READ_SUPPORT_CLASS, GroupReadSupport.class.getCanonicalName()); jobConf.setInputFormat(MyDeprecatedInputFormat.class); MyDeprecatedInputFormat.setInputPaths(jobConf, parquetPath); jobConf.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class); org.apache.hadoop.mapred.TextOutputFormat.setOutputPath(jobConf, outputPath); jobConf.setMapperClass(DeprecatedWriteMapper.class); jobConf.setNumReduceTasks(0); mapRedJob = JobClient.runJob(jobConf); } }
Example #18
Source File: TajoReadSupport.java From tajo with Apache License 2.0 | 5 votes |
/** * Initializes the ReadSupport. * * @param context The InitContext. * @return A ReadContext that defines how to read the file. */ @Override public ReadSupport.ReadContext init(InitContext context) { if (requestedSchema == null) { throw new RuntimeException("requestedSchema is null."); } MessageType requestedParquetSchema = new TajoSchemaConverter().convert(requestedSchema); LOG.debug("Reading data with projection:\n" + requestedParquetSchema); return new ReadContext(requestedParquetSchema); }
Example #19
Source File: GroupReadSupportTest.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testInitWithoutSpecifyingRequestSchema() throws Exception { GroupReadSupport s = new GroupReadSupport(); Configuration configuration = new Configuration(); Map<String, String> keyValueMetaData = new HashMap<String, String>(); MessageType fileSchema = MessageTypeParser.parseMessageType(fullSchemaStr); ReadSupport.ReadContext context = s.init(configuration, keyValueMetaData, fileSchema); assertEquals(context.getRequestedSchema(), fileSchema); }
Example #20
Source File: ParquetReader.java From parquet-mr with Apache License 2.0 | 5 votes |
private ParquetReader(Configuration conf, Path file, ReadSupport<T> readSupport, FilterCompat.Filter filter) throws IOException { this(Collections.singletonList((InputFile) HadoopInputFile.fromPath(file, conf)), HadoopReadOptions.builder(conf) .withRecordFilter(Objects.requireNonNull(filter, "filter cannot be null")) .build(), readSupport); }
Example #21
Source File: ParquetInputFormat.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * @param readSupportClass to instantiate * @param <T> the Java type of objects created by the ReadSupport * @return the configured read support */ @SuppressWarnings("unchecked") static <T> ReadSupport<T> getReadSupportInstance( Class<? extends ReadSupport<T>> readSupportClass){ try { return readSupportClass.newInstance(); } catch (InstantiationException | IllegalAccessException e) { throw new BadConfigurationException("could not instantiate read support class", e); } }
Example #22
Source File: ParquetRecordReader.java From flink with Apache License 2.0 | 5 votes |
public void initialize(ParquetFileReader reader, Configuration configuration) { this.reader = reader; FileMetaData parquetFileMetadata = reader.getFooter().getFileMetaData(); // real schema of parquet file this.fileSchema = parquetFileMetadata.getSchema(); Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData(); ReadSupport.ReadContext readContext = readSupport.init(new InitContext( configuration, toSetMultiMap(fileMetadata), readSchema)); this.columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy()); this.recordMaterializer = readSupport.prepareForRead( configuration, fileMetadata, readSchema, readContext); this.numTotalRecords = reader.getRecordCount(); }
Example #23
Source File: FilteringBenchmarks.java From parquet-mr with Apache License 2.0 | 5 votes |
public ParquetReader.Builder<Group> createReaderBuilder() throws IOException { ReadConfigurator readConfigurator = getReadConfigurator(); return readConfigurator.configureBuilder( new ParquetReader.Builder<Group>(HadoopInputFile.fromPath(file, new Configuration())) { @Override protected ReadSupport<Group> getReadSupport() { return new GroupReadSupport(); } }.set(GroupWriteSupport.PARQUET_EXAMPLE_SCHEMA, SCHEMA.toString())); }
Example #24
Source File: GroupReadSupport.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init( Configuration configuration, Map<String, String> keyValueMetaData, MessageType fileSchema) { String partialSchemaString = configuration.get(ReadSupport.PARQUET_READ_SCHEMA); MessageType requestedProjection = getSchemaForRead(fileSchema, partialSchemaString); return new ReadContext(requestedProjection); }
Example #25
Source File: ParquetInputFormat.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * @param configuration to find the configuration for the read support * @return the configured read support * @deprecated use getReadSupportInstance static methods instead */ @Deprecated @SuppressWarnings("unchecked") ReadSupport<T> getReadSupport(Configuration configuration){ return getReadSupportInstance(readSupportClass == null ? (Class<? extends ReadSupport<T>>) getReadSupportClass(configuration) : readSupportClass); }
Example #26
Source File: ParquetInputFormat.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * {@inheritDoc} */ @Override public RecordReader<Void, T> createRecordReader( InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { Configuration conf = ContextUtil.getConfiguration(taskAttemptContext); ReadSupport<T> readSupport = getReadSupport(conf); return new ParquetRecordReader<T>(readSupport, getFilter(conf)); }
Example #27
Source File: InternalParquetRecordReader.java From parquet-mr with Apache License 2.0 | 5 votes |
public void initialize(ParquetFileReader reader, ParquetReadOptions options) { // copy custom configuration to the Configuration passed to the ReadSupport Configuration conf = new Configuration(); if (options instanceof HadoopReadOptions) { conf = ((HadoopReadOptions) options).getConf(); } for (String property : options.getPropertyNames()) { conf.set(property, options.getProperty(property)); } // initialize a ReadContext for this file this.reader = reader; FileMetaData parquetFileMetadata = reader.getFooter().getFileMetaData(); this.fileSchema = parquetFileMetadata.getSchema(); Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData(); ReadSupport.ReadContext readContext = readSupport.init(new InitContext(conf, toSetMultiMap(fileMetadata), fileSchema)); this.columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy()); this.requestedSchema = readContext.getRequestedSchema(); this.columnCount = requestedSchema.getPaths().size(); // Setting the projection schema before running any filtering (e.g. getting filtered record count) // because projection impacts filtering reader.setRequestedSchema(requestedSchema); this.recordConverter = readSupport.prepareForRead(conf, fileMetadata, fileSchema, readContext); this.strictTypeChecking = options.isEnabled(STRICT_TYPE_CHECKING, true); this.total = reader.getFilteredRecordCount(); this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(options, total); this.filterRecords = options.useRecordFilter(); LOG.info("RecordReader initialized will read a total of {} records.", total); }
Example #28
Source File: ParquetReader.java From parquet-mr with Apache License 2.0 | 5 votes |
@Deprecated private Builder(ReadSupport<T> readSupport, Path path) { this.readSupport = Objects.requireNonNull(readSupport, "readSupport cannot be null"); this.file = null; this.path = Objects.requireNonNull(path, "path cannot be null"); this.conf = new Configuration(); this.optionsBuilder = HadoopReadOptions.builder(conf); }
Example #29
Source File: ParquetReader.java From parquet-mr with Apache License 2.0 | 5 votes |
private ParquetReader(List<InputFile> files, ParquetReadOptions options, ReadSupport<T> readSupport) throws IOException { this.readSupport = readSupport; this.options = options; this.filesIterator = files.iterator(); }
Example #30
Source File: ParquetReader.java From parquet-mr with Apache License 2.0 | 4 votes |
public static <T> Builder<T> builder(ReadSupport<T> readSupport, Path path) { return new Builder<>(readSupport, path); }