Java Code Examples for org.apache.parquet.hadoop.ParquetReader#close()
The following examples show how to use
org.apache.parquet.hadoop.ParquetReader#close() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParquetHdfsDataWriterTest.java From incubator-gobblin with Apache License 2.0 | 6 votes |
private List<TestRecord> readParquetFilesAvro(File outputFile) throws IOException { ParquetReader<org.apache.gobblin.test.avro.TestRecord> reader = null; List<TestRecord> records = new ArrayList<>(); try { reader = new AvroParquetReader<>(new Path(outputFile.toString())); for (org.apache.gobblin.test.avro.TestRecord value = reader.read(); value != null; value = reader.read()) { records.add(new TestRecord(value.getPartition(), value.getSequence(), value.getPayload())); } } finally { if (reader != null) { try { reader.close(); } catch (Exception ex) { System.out.println(ex.getMessage()); } } } return records; }
Example 2
Source File: ParquetHdfsDataWriterTest.java From incubator-gobblin with Apache License 2.0 | 6 votes |
protected List<TestRecord> readParquetFilesProto(File outputFile) throws IOException { ParquetReader<TestRecordProtos.TestRecordOrBuilder> reader = null; List<TestRecord> records = new ArrayList<>(); try { reader = new ProtoParquetReader<>(new Path(outputFile.toString())); for (TestRecordProtos.TestRecordOrBuilder value = reader.read(); value != null; value = reader.read()) { records.add(new TestRecord(value.getPartition(), value.getSequence(), value.getPayload())); } } finally { if (reader != null) { try { reader.close(); } catch (Exception ex) { System.out.println(ex.getMessage()); } } } return records; }
Example 3
Source File: ParquetHdfsDataWriterTest.java From incubator-gobblin with Apache License 2.0 | 6 votes |
protected List<TestRecord> readParquetFilesGroup(File outputFile) throws IOException { ParquetReader<Group> reader = null; List<Group> records = new ArrayList<>(); try { reader = new ParquetReader<>(new Path(outputFile.toString()), new SimpleReadSupport()); for (Group value = reader.read(); value != null; value = reader.read()) { records.add(value); } } finally { if (reader != null) { try { reader.close(); } catch (Exception ex) { System.out.println(ex.getMessage()); } } } return records.stream().map(value -> new TestRecord( value.getInteger(TestConstants.PARTITION_FIELD_NAME, 0), value.getLong(TestConstants.SEQUENCE_FIELD_NAME, 0), value.getString(TestConstants.PAYLOAD_FIELD_NAME, 0) )).collect(Collectors.toList()); }
Example 4
Source File: TestPruneColumnsCommand.java From parquet-mr with Apache License 2.0 | 6 votes |
private void validateColumns(String inputFile, List<String> prunePaths) throws IOException { ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), new Path(inputFile)).withConf(conf).build(); for (int i = 0; i < numRecord; i++) { Group group = reader.read(); if (!prunePaths.contains("DocId")) { assertEquals(1l, group.getLong("DocId", 0)); } if (!prunePaths.contains("Name")) { assertEquals("foo", group.getBinary("Name", 0).toStringUsingUTF8()); } if (!prunePaths.contains("Gender")) { assertEquals("male", group.getBinary("Gender", 0).toStringUsingUTF8()); } if (!prunePaths.contains("Links")) { Group subGroup = group.getGroup("Links", 0); if (!prunePaths.contains("Links.Backward")) { assertEquals(2l, subGroup.getLong("Backward", 0)); } if (!prunePaths.contains("Links.Forward")) { assertEquals(3l, subGroup.getLong("Forward", 0)); } } } reader.close(); }
Example 5
Source File: ReadBenchmarks.java From parquet-mr with Apache License 2.0 | 6 votes |
private void read(Path parquetFile, int nRows, Blackhole blackhole) throws IOException { ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), parquetFile).withConf(configuration).build(); for (int i = 0; i < nRows; i++) { Group group = reader.read(); blackhole.consume(group.getBinary("binary_field", 0)); blackhole.consume(group.getInteger("int32_field", 0)); blackhole.consume(group.getLong("int64_field", 0)); blackhole.consume(group.getBoolean("boolean_field", 0)); blackhole.consume(group.getFloat("float_field", 0)); blackhole.consume(group.getDouble("double_field", 0)); blackhole.consume(group.getBinary("flba_field", 0)); blackhole.consume(group.getInt96("int96_field", 0)); } reader.close(); }
Example 6
Source File: TestFiltersWithMissingColumns.java From parquet-mr with Apache License 2.0 | 6 votes |
public static long countFilteredRecords(Path path, FilterPredicate pred) throws IOException{ ParquetReader<Group> reader = ParquetReader .builder(new GroupReadSupport(), path) .withFilter(FilterCompat.get(pred)) .build(); long count = 0; try { while (reader.read() != null) { count += 1; } } finally { reader.close(); } return count; }
Example 7
Source File: ParquetFileReader.java From streamx with Apache License 2.0 | 5 votes |
@Override public Schema getSchema(Configuration conf, Path path) throws IOException { AvroReadSupport<GenericRecord> readSupport = new AvroReadSupport<>(); ParquetReader.Builder<GenericRecord> builder = ParquetReader.builder(readSupport, path); ParquetReader<GenericRecord> parquetReader = builder.withConf(conf).build(); GenericRecord record; Schema schema = null; while ((record = parquetReader.read()) != null) { schema = avroData.toConnectSchema(record.getSchema()); } parquetReader.close(); return schema; }
Example 8
Source File: ParquetFileReader.java From streamx with Apache License 2.0 | 5 votes |
@Override public Collection<Object> readData(Configuration conf, Path path) throws IOException { Collection<Object> result = new ArrayList<>(); AvroReadSupport<GenericRecord> readSupport = new AvroReadSupport<>(); ParquetReader.Builder<GenericRecord> builder = ParquetReader.builder(readSupport, path); ParquetReader<GenericRecord> parquetReader = builder.withConf(conf).build(); GenericRecord record; while ((record = parquetReader.read()) != null) { result.add(record); } parquetReader.close(); return result; }
Example 9
Source File: TestParquetInLining.java From hudi with Apache License 2.0 | 5 votes |
@Test public void testSimpleInlineFileSystem() throws IOException { Path outerInMemFSPath = getRandomOuterInMemPath(); Path outerPath = new Path(FILE_SCHEME + outerInMemFSPath.toString().substring(outerInMemFSPath.toString().indexOf(':'))); generatedPath = outerPath; ParquetWriter inlineWriter = new AvroParquetWriter(outerInMemFSPath, HoodieTestDataGenerator.AVRO_SCHEMA, CompressionCodecName.GZIP, 100 * 1024 * 1024, 1024 * 1024, true, inMemoryConf); // write few records List<GenericRecord> recordsToWrite = getParquetHoodieRecords(); for (GenericRecord rec : recordsToWrite) { inlineWriter.write(rec); } inlineWriter.close(); byte[] inlineBytes = getBytesToInline(outerInMemFSPath); long startOffset = generateOuterFile(outerPath, inlineBytes); long inlineLength = inlineBytes.length; // Generate phantom inline file Path inlinePath = getPhantomFile(outerPath, startOffset, inlineLength); // instantiate Parquet reader ParquetReader inLineReader = AvroParquetReader.builder(inlinePath).withConf(inlineConf).build(); List<GenericRecord> records = readParquetGenericRecords(inLineReader); assertArrayEquals(recordsToWrite.toArray(), records.toArray()); inLineReader.close(); }
Example 10
Source File: HeadCommand.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void execute(CommandLine options) throws Exception { super.execute(options); long num = DEFAULT; if (options.hasOption('n')) { num = Long.parseLong(options.getOptionValue('n')); } String[] args = options.getArgs(); String input = args[0]; ParquetReader<SimpleRecord> reader = null; try { PrintWriter writer = new PrintWriter(Main.out, true); reader = ParquetReader.builder(new SimpleReadSupport(), new Path(input)).build(); for (SimpleRecord value = reader.read(); value != null && num-- > 0; value = reader.read()) { value.prettyPrint(writer); writer.println(); } } finally { if (reader != null) { try { reader.close(); } catch (Exception ex) { } } } }
Example 11
Source File: CatCommand.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void execute(CommandLine options) throws Exception { super.execute(options); String[] args = options.getArgs(); String input = args[0]; ParquetReader<SimpleRecord> reader = null; try { PrintWriter writer = new PrintWriter(Main.out, true); reader = ParquetReader.builder(new SimpleReadSupport(), new Path(input)).build(); ParquetMetadata metadata = ParquetFileReader.readFooter(new Configuration(), new Path(input)); JsonRecordFormatter.JsonGroupFormatter formatter = JsonRecordFormatter.fromSchema(metadata.getFileMetaData().getSchema()); for (SimpleRecord value = reader.read(); value != null; value = reader.read()) { if (options.hasOption('j')) { writer.write(formatter.formatRecord(value)); } else { value.prettyPrint(writer); } writer.println(); } } finally { if (reader != null) { try { reader.close(); } catch (Exception ex) { } } } }
Example 12
Source File: CompressionConveterTest.java From parquet-mr with Apache License 2.0 | 5 votes |
private void validateColumns(String file, int numRecord, TestDocs testDocs) throws IOException { ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), new Path(file)).withConf(conf).build(); for (int i = 0; i < numRecord; i++) { Group group = reader.read(); assertTrue(group.getLong("DocId", 0) == testDocs.docId[i]); assertArrayEquals(group.getBinary("Name", 0).getBytes(), testDocs.name[i].getBytes()); assertArrayEquals(group.getBinary("Gender", 0).getBytes(), testDocs.gender[i].getBytes()); Group subGroup = group.getGroup("Links", 0); assertArrayEquals(subGroup.getBinary("Backward", 0).getBytes(), testDocs.linkBackward[i].getBytes()); assertArrayEquals(subGroup.getBinary("Forward", 0).getBytes(), testDocs.linkForward[i].getBytes()); } reader.close(); }
Example 13
Source File: ScroogeBinaryTest.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testScroogeBinaryEncoding() throws Exception { StringAndBinary expected = new StringAndBinary.Immutable("test", ByteBuffer.wrap(new byte[] {-123, 20, 33})); File temp = tempDir.newFile(UUID.randomUUID().toString()); temp.deleteOnExit(); temp.delete(); Path path = new Path(temp.getPath()); ParquetWriter<StringAndBinary> writer = new ParquetWriter<StringAndBinary>( path, new Configuration(), new ScroogeWriteSupport<StringAndBinary>(StringAndBinary.class)); writer.write(expected); writer.close(); // read using the parquet-thrift version to isolate the write path ParquetReader<org.apache.parquet.thrift.test.binary.StringAndBinary> reader = ThriftParquetReader.<org.apache.parquet.thrift.test.binary.StringAndBinary> build(path) .withThriftClass(org.apache.parquet.thrift.test.binary.StringAndBinary.class) .build(); org.apache.parquet.thrift.test.binary.StringAndBinary record = reader.read(); reader.close(); Assert.assertEquals("String should match after serialization round trip", "test", record.s); Assert.assertEquals("ByteBuffer should match after serialization round trip", ByteBuffer.wrap(new byte[] {-123, 20, 33}), record.b); }
Example 14
Source File: ScroogeBinaryTest.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test @SuppressWarnings("unchecked") public void testScroogeBinaryDecoding() throws Exception { StringAndBinary expected = new StringAndBinary.Immutable("test", ByteBuffer.wrap(new byte[] {-123, 20, 33})); File temp = tempDir.newFile(UUID.randomUUID().toString()); temp.deleteOnExit(); temp.delete(); Path path = new Path(temp.getPath()); ParquetWriter<StringAndBinary> writer = new ParquetWriter<StringAndBinary>( path, new Configuration(), new ScroogeWriteSupport<StringAndBinary>(StringAndBinary.class)); writer.write(expected); writer.close(); Configuration conf = new Configuration(); conf.set("parquet.thrift.converter.class", ScroogeRecordConverter.class.getName()); ParquetReader<StringAndBinary> reader = ParquetReader.<StringAndBinary> builder(new ScroogeReadSupport(), path) .withConf(conf) .build(); StringAndBinary record = reader.read(); reader.close(); Assert.assertEquals("String should match after serialization round trip", "test", record.s()); Assert.assertEquals("ByteBuffer should match after serialization round trip", ByteBuffer.wrap(new byte[] {-123, 20, 33}), record.b()); }
Example 15
Source File: TestBinary.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testBinary() throws IOException { StringAndBinary expected = new StringAndBinary("test", ByteBuffer.wrap(new byte[] { -123, 20, 33 })); File temp = tempDir.newFile(UUID.randomUUID().toString()); temp.deleteOnExit(); temp.delete(); Path path = new Path(temp.getPath()); ThriftParquetWriter<StringAndBinary> writer = new ThriftParquetWriter<StringAndBinary>( path, StringAndBinary.class, CompressionCodecName.SNAPPY); writer.write(expected); writer.close(); ParquetReader<StringAndBinary> reader = ThriftParquetReader.<StringAndBinary> build(path) .withThriftClass(StringAndBinary.class) .build(); StringAndBinary record = reader.read(); reader.close(); assertSchema(ParquetFileReader.readFooter(new Configuration(), path)); assertEquals("Should match after serialization round trip", expected, record); }