org.apache.parquet.avro.AvroParquetWriter Java Examples
The following examples show how to use
org.apache.parquet.avro.AvroParquetWriter.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IntegrationTestHelper.java From circus-train with Apache License 2.0 | 9 votes |
URI createData( URI tableUri, Schema schema, String hour, int id, String fieldName, Object data) throws IOException { GenericData.Record record = new GenericData.Record(schema); record.put("id", id); if (fieldName != null) { Schema.Field field = schema.getField(fieldName); Schema fieldSchema = field.schema(); if (data instanceof Map) { GenericData.Record schemaRecord = new GenericData.Record(fieldSchema); ((Map<String, String>) data).forEach(schemaRecord::put); record.put(fieldName, schemaRecord); } else if (data != null) { record.put(fieldName, data); } } URI partition = URI.create(tableUri + "/hour=" + hour); String path = partition.getPath(); File parentFolder = new File(path); parentFolder.mkdirs(); File partitionFile = new File(parentFolder, "parquet0000"); Path filePath = new Path(partitionFile.toURI()); ParquetWriter<GenericData.Record> writer = AvroParquetWriter.<GenericData.Record>builder(filePath) .withSchema(schema) .withConf(new Configuration()) .build(); try { writer.write(record); } finally { writer.close(); } return partition; }
Example #2
Source File: ConvertAvroToParquet.java From nifi with Apache License 2.0 | 6 votes |
private ParquetWriter createParquetWriter(final ProcessContext context, final FlowFile flowFile, final OutputStream out, final Schema schema) throws IOException { NifiParquetOutputFile nifiParquetOutputFile = new NifiParquetOutputFile(out); final AvroParquetWriter.Builder<GenericRecord> parquetWriter = AvroParquetWriter .<GenericRecord>builder(nifiParquetOutputFile) .withSchema(schema); final ParquetConfig parquetConfig = createParquetConfig(context, flowFile.getAttributes()); parquetConfig.setAvroReadCompatibility(true); parquetConfig.setAvroAddListElementRecords(false); parquetConfig.setAvroWriteOldListStructure(false); final Configuration conf = new Configuration(); applyCommonConfig(parquetWriter, conf, parquetConfig); return parquetWriter.build(); }
Example #3
Source File: FetchParquetTest.java From nifi with Apache License 2.0 | 6 votes |
private void writeParquetUsersWithDecimal(final File parquetFile, int numUsers) throws IOException { if (parquetFile.exists()) { Assert.assertTrue(parquetFile.delete()); } final BigDecimal initialAmount = new BigDecimal("1234567.0123456789"); final AvroParquetWriter.Builder<GenericRecord> writerBuilder = createAvroParquetWriter(parquetFile, schemaWithDecimal); final List<Schema> amountSchemaUnion = schemaWithDecimal.getField("amount").schema().getTypes(); final Schema amountSchema = amountSchemaUnion.stream().filter(s -> s.getType() == Schema.Type.FIXED).findFirst().orElse(null); Assert.assertNotNull(amountSchema); final Conversions.DecimalConversion decimalConversion = new Conversions.DecimalConversion(); try (final ParquetWriter<GenericRecord> writer = writerBuilder.build()) { for (int i=0; i < numUsers; i++) { final BigDecimal incrementedAmount = initialAmount.add(new BigDecimal("1")); final GenericRecord user = new GenericData.Record(schemaWithDecimal); user.put("name", "Bob" + i); user.put("amount", decimalConversion.toFixed(incrementedAmount, amountSchema, amountSchema.getLogicalType())); writer.write(user); } } }
Example #4
Source File: ParquetFileReaderTest.java From kafka-connect-fs with Apache License 2.0 | 6 votes |
@Override protected Path createDataFile(ReaderFsTestConfig fsConfig, Object... args) throws IOException { FileSystem fs = fsConfig.getFs(); File parquetFile = File.createTempFile("test-", "." + getFileExtension()); try (ParquetWriter writer = AvroParquetWriter.<GenericRecord>builder(new Path(parquetFile.toURI())) .withConf(fs.getConf()).withWriteMode(ParquetFileWriter.Mode.OVERWRITE).withSchema(readerSchema).build()) { IntStream.range(0, NUM_RECORDS).forEach(index -> { GenericRecord datum = new GenericData.Record(readerSchema); datum.put(FIELD_INDEX, index); String uuid = UUID.randomUUID().toString(); datum.put(FIELD_NAME, String.format("%d_name_%s", index, uuid)); datum.put(FIELD_SURNAME, String.format("%d_surname_%s", index, uuid)); try { fsConfig.offsetsByIndex().put(index, (long) index); writer.write(datum); } catch (IOException ioe) { throw new RuntimeException(ioe); } }); } Path path = new Path(new Path(fsConfig.getFsUri()), parquetFile.getName()); fs.moveFromLocalFile(new Path(parquetFile.getAbsolutePath()), path); return path; }
Example #5
Source File: InputFormatTestUtil.java From hudi with Apache License 2.0 | 6 votes |
public static void simulateParquetUpdates(File directory, Schema schema, String originalCommit, int totalNumberOfRecords, int numberOfRecordsToUpdate, String newCommit) throws IOException { File fileToUpdate = Objects.requireNonNull(directory.listFiles((dir, name) -> name.endsWith("parquet")))[0]; String fileId = FSUtils.getFileId(fileToUpdate.getName()); File dataFile = new File(directory, FSUtils.makeDataFileName(newCommit, TEST_WRITE_TOKEN, fileId)); try (AvroParquetWriter parquetWriter = new AvroParquetWriter(new Path(dataFile.getAbsolutePath()), schema)) { for (GenericRecord record : generateAvroRecords(schema, totalNumberOfRecords, originalCommit, fileId)) { if (numberOfRecordsToUpdate > 0) { // update this record record.put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, newCommit); String oldSeqNo = (String) record.get(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD); record.put(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, oldSeqNo.replace(originalCommit, newCommit)); numberOfRecordsToUpdate--; } parquetWriter.write(record); } } }
Example #6
Source File: TestHDFSParquetImporter.java From hudi with Apache License 2.0 | 6 votes |
public List<GenericRecord> createInsertRecords(Path srcFolder) throws ParseException, IOException { Path srcFile = new Path(srcFolder.toString(), "file1.parquet"); long startTime = HoodieActiveTimeline.COMMIT_FORMATTER.parse("20170203000000").getTime() / 1000; List<GenericRecord> records = new ArrayList<GenericRecord>(); for (long recordNum = 0; recordNum < 96; recordNum++) { records.add(HoodieTestDataGenerator.generateGenericRecord(Long.toString(recordNum), "rider-" + recordNum, "driver-" + recordNum, startTime + TimeUnit.HOURS.toSeconds(recordNum))); } try (ParquetWriter<GenericRecord> writer = AvroParquetWriter.<GenericRecord>builder(srcFile) .withSchema(HoodieTestDataGenerator.AVRO_SCHEMA).withConf(HoodieTestUtils.getDefaultHadoopConf()).build()) { for (GenericRecord record : records) { writer.write(record); } } return records; }
Example #7
Source File: TestHDFSParquetImporter.java From hudi with Apache License 2.0 | 6 votes |
public List<GenericRecord> createUpsertRecords(Path srcFolder) throws ParseException, IOException { Path srcFile = new Path(srcFolder.toString(), "file1.parquet"); long startTime = HoodieActiveTimeline.COMMIT_FORMATTER.parse("20170203000000").getTime() / 1000; List<GenericRecord> records = new ArrayList<GenericRecord>(); // 10 for update for (long recordNum = 0; recordNum < 11; recordNum++) { records.add(HoodieTestDataGenerator.generateGenericRecord(Long.toString(recordNum), "rider-upsert-" + recordNum, "driver-upsert" + recordNum, startTime + TimeUnit.HOURS.toSeconds(recordNum))); } // 4 for insert for (long recordNum = 96; recordNum < 100; recordNum++) { records.add(HoodieTestDataGenerator.generateGenericRecord(Long.toString(recordNum), "rider-upsert-" + recordNum, "driver-upsert" + recordNum, startTime + TimeUnit.HOURS.toSeconds(recordNum))); } try (ParquetWriter<GenericRecord> writer = AvroParquetWriter.<GenericRecord>builder(srcFile) .withSchema(HoodieTestDataGenerator.AVRO_SCHEMA).withConf(HoodieTestUtils.getDefaultHadoopConf()).build()) { for (GenericRecord record : records) { writer.write(record); } } return records; }
Example #8
Source File: ConvertCsvToParquetFileExpressionProcessor.java From vividus with Apache License 2.0 | 6 votes |
private void write(File file, String avroSchemaPath, List<Map<String, String>> data) throws IOException { Schema schema = new Parser().parse(ResourceUtils.loadResource(avroSchemaPath)); try (ParquetWriter<GenericRecord> writer = AvroParquetWriter .<GenericRecord>builder(new Path(file.toURI())) .withWriteMode(ParquetFileWriter.Mode.OVERWRITE) .withDataModel(GenericData.get()) .withSchema(schema) .build()) { for (Map<String, String> map : data) { GenericRecord record = new GenericData.Record(schema); map.forEach(record::put); writer.write(record); } } }
Example #9
Source File: FetchParquetTest.java From nifi with Apache License 2.0 | 6 votes |
private void writeParquetUsersWithArray(final File parquetFile, int numUsers) throws IOException { if (parquetFile.exists()) { Assert.assertTrue(parquetFile.delete()); } final AvroParquetWriter.Builder<GenericRecord> writerBuilder = createAvroParquetWriter(parquetFile, schemaWithArray); final Schema favoriteColorsSchema = schemaWithArray.getField("favorite_colors").schema(); try (final ParquetWriter<GenericRecord> writer = writerBuilder.build()) { for (int i=0; i < numUsers; i++) { final GenericRecord user = new GenericData.Record(schema); user.put("name", "Bob" + i); user.put("favorite_number", i); final GenericData.Array<String> colors = new GenericData.Array<>(1, favoriteColorsSchema); colors.add("blue" + i); user.put("favorite_color", colors); writer.write(user); } } }
Example #10
Source File: ParquetIO.java From beam with Apache License 2.0 | 6 votes |
@Override public void open(WritableByteChannel channel) throws IOException { checkNotNull(getJsonSchema(), "Schema cannot be null"); Schema schema = new Schema.Parser().parse(getJsonSchema()); BeamParquetOutputFile beamParquetOutputFile = new BeamParquetOutputFile(Channels.newOutputStream(channel)); AvroParquetWriter.Builder<GenericRecord> builder = AvroParquetWriter.<GenericRecord>builder(beamParquetOutputFile) .withSchema(schema) .withCompressionCodec(getCompressionCodec()) .withWriteMode(OVERWRITE); if (getConfiguration() != null) { builder = builder.withConf(getConfiguration().get()); } this.writer = builder.build(); }
Example #11
Source File: ParquetPartition.java From entrada with GNU General Public License v3.0 | 6 votes |
public ParquetPartition(String partition, Schema schema) { Configuration conf = new Configuration(); Path file = new Path(partition + System.getProperty("file.separator") + UUID.randomUUID() + ".parquet"); filename = file.toString(); log.info("Create new parquet file: {}", filename); try { Files.createDirectories(Paths.get(partition)); writer = AvroParquetWriter .<T>builder(file) .enableDictionaryEncoding() .withCompressionCodec(CompressionCodecName.SNAPPY) .withConf(conf) .withWriterVersion(WriterVersion.PARQUET_1_0) .withSchema(schema) .withRowGroupSize(ROWGROUP_SIZE) .build(); } catch (IOException e) { throw new ApplicationException("Cannot create a Parquet parition", e); } }
Example #12
Source File: AvroParquetConvertCreator.java From datacollector with Apache License 2.0 | 6 votes |
@Override protected void addNecessaryJarsToJob(Configuration conf) { MapreduceUtils.addJarsToJob(conf, SemanticVersion.class, ParquetWriter.class, AvroParquetWriter.class, AvroParquetWriterBuilder190Int96.class, AvroSchemaConverter190Int96Avro18.class, FsInput.class, CompressionCodec.class, ParquetProperties.class, BytesInput.class, AvroToParquetConverterUtil.class, AvroLogicalTypeSupport.class ); }
Example #13
Source File: FetchParquetTest.java From nifi with Apache License 2.0 | 6 votes |
private void writeParquetUsers(final File parquetFile, int numUsers) throws IOException { if (parquetFile.exists()) { Assert.assertTrue(parquetFile.delete()); } final AvroParquetWriter.Builder<GenericRecord> writerBuilder = createAvroParquetWriter(parquetFile, schema); try (final ParquetWriter<GenericRecord> writer = writerBuilder.build()) { for (int i=0; i < numUsers; i++) { final GenericRecord user = new GenericData.Record(schema); user.put("name", "Bob" + i); user.put("favorite_number", i); user.put("favorite_color", "blue" + i); writer.write(user); } } }
Example #14
Source File: FetchParquetTest.java From nifi with Apache License 2.0 | 5 votes |
private void writeParquetUsersWithNullableArray(final File parquetFile, int numUsers) throws IOException { if (parquetFile.exists()) { Assert.assertTrue(parquetFile.delete()); } final AvroParquetWriter.Builder<GenericRecord> writerBuilder = createAvroParquetWriter(parquetFile, schemaWithNullableArray); // use the schemaWithArray here just to get the schema for the array part of the favorite_colors fields, the overall // schemaWithNullableArray has a union of the array schema and null final Schema favoriteColorsSchema = schemaWithArray.getField("favorite_colors").schema(); try (final ParquetWriter<GenericRecord> writer = writerBuilder.build()) { for (int i=0; i < numUsers; i++) { final GenericRecord user = new GenericData.Record(schema); user.put("name", "Bob" + i); user.put("favorite_number", i); final GenericData.Array<String> colors = new GenericData.Array<>(1, favoriteColorsSchema); colors.add("blue" + i); user.put("favorite_color", colors); writer.write(user); } } }
Example #15
Source File: PutParquet.java From nifi with Apache License 2.0 | 5 votes |
@Override public HDFSRecordWriter createHDFSRecordWriter(final ProcessContext context, final FlowFile flowFile, final Configuration conf, final Path path, final RecordSchema schema) throws IOException, SchemaNotFoundException { final Schema avroSchema = AvroTypeUtil.extractAvroSchema(schema); final AvroParquetWriter.Builder<GenericRecord> parquetWriter = AvroParquetWriter .<GenericRecord>builder(path) .withSchema(avroSchema); final ParquetConfig parquetConfig = createParquetConfig(context, flowFile.getAttributes()); applyCommonConfig(parquetWriter, conf, parquetConfig); return new AvroParquetHDFSRecordWriter(parquetWriter.build(), avroSchema); }
Example #16
Source File: WriteParquetResult.java From nifi with Apache License 2.0 | 5 votes |
public WriteParquetResult(final Schema schema, final OutputStream out, final ParquetConfig parquetConfig, final ComponentLog componentLogger) throws IOException { super(out); this.schema = schema; this.componentLogger = componentLogger; final Configuration conf = new Configuration(); final OutputFile outputFile = new NifiParquetOutputFile(out); final AvroParquetWriter.Builder<GenericRecord> writerBuilder = AvroParquetWriter.<GenericRecord>builder(outputFile).withSchema(schema); applyCommonConfig(writerBuilder, conf, parquetConfig); parquetWriter = writerBuilder.build(); }
Example #17
Source File: FetchParquetTest.java From nifi with Apache License 2.0 | 5 votes |
private AvroParquetWriter.Builder<GenericRecord> createAvroParquetWriter(final File parquetFile, final Schema schema) { final Path parquetPath = new Path(parquetFile.getPath()); return AvroParquetWriter .<GenericRecord>builder(parquetPath) .withSchema(schema) .withConf(testConf); }
Example #18
Source File: TestUtil.java From flink with Apache License 2.0 | 5 votes |
public static Path createTempParquetFile(File folder, Schema schema, List<IndexedRecord> records) throws IOException { Path path = new Path(folder.getPath(), UUID.randomUUID().toString()); ParquetWriter<IndexedRecord> writer = AvroParquetWriter.<IndexedRecord>builder( new org.apache.hadoop.fs.Path(path.toUri())).withSchema(schema).withRowGroupSize(10).build(); for (IndexedRecord record : records) { writer.write(record); } writer.close(); return path; }
Example #19
Source File: TestParquetReader.java From nifi with Apache License 2.0 | 5 votes |
private ParquetWriter<GenericRecord> createParquetWriter(final Schema schema, final File parquetFile) throws IOException { final Configuration conf = new Configuration(); final Path parquetPath = new Path(parquetFile.getPath()); final ParquetWriter<GenericRecord> writer = AvroParquetWriter.<GenericRecord>builder(parquetPath) .withSchema(schema) .withConf(conf) .build(); return writer; }
Example #20
Source File: AvroParquetMorphlineTest.java From kite with Apache License 2.0 | 5 votes |
@Test public void testMapWithUtf8Key() throws Exception { Schema schema = new Schema.Parser().parse(new File("src/test/resources/test-avro-schemas/map.avsc")); File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp"); tmp.deleteOnExit(); tmp.delete(); Path file = new Path(tmp.getPath()); AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(file, schema); // Write a record with a map with Utf8 keys. GenericData.Record record = new GenericRecordBuilder(schema) .set("mymap", new HashMap(ImmutableMap.of(utf8("a"), 1, utf8("b"), 2))) .build(); writer.write(record); writer.close(); for (String configFile : Arrays.asList( "readAvroParquetFile", "readAvroParquetFileWithProjectionSchema", "readAvroParquetFileWithReaderSchema1", "readAvroParquetFileWithReaderSchemaExternal" )) { morphline = createMorphline("test-morphlines/" + configFile); Record morphlineRecord = new Record(); morphlineRecord.put(ReadAvroParquetFileBuilder.FILE_UPLOAD_URL, file.toString()); collector.reset(); assertTrue(morphline.process(morphlineRecord)); assertEquals(1, collector.getRecords().size()); GenericData.Record actualRecord = (GenericData.Record) collector.getFirstRecord().getFirstValue(Fields.ATTACHMENT_BODY); assertEquals(record, actualRecord); } }
Example #21
Source File: ParquetAppender.java From kite with Apache License 2.0 | 5 votes |
@Override public void open() throws IOException { CompressionCodecName codecName = CompressionCodecName.UNCOMPRESSED; if (enableCompression) { codecName = getCompressionCodecName(); } avroParquetWriter = new AvroParquetWriter<E>(fileSystem.makeQualified(path), schema, codecName, DEFAULT_ROW_GROUP_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE, ParquetWriter.DEFAULT_IS_DICTIONARY_ENABLED, conf); }
Example #22
Source File: AvroParquetFileReaderWriterFactory.java From secor with Apache License 2.0 | 5 votes |
public AvroParquetFileWriter(LogFilePath logFilePath, CompressionCodec codec) throws IOException { Path path = new Path(logFilePath.getLogFilePath()); LOG.debug("Creating Brand new Writer for path {}", path); CompressionCodecName codecName = CompressionCodecName .fromCompressionCodec(codec != null ? codec.getClass() : null); topic = logFilePath.getTopic(); // Not setting blockSize, pageSize, enableDictionary, and validating writer = AvroParquetWriter.builder(path) .withSchema(schemaRegistry.getSchema(topic)) .withCompressionCodec(codecName) .build(); }
Example #23
Source File: ParquetAvroWriters.java From flink with Apache License 2.0 | 5 votes |
private static <T> ParquetWriter<T> createAvroParquetWriter( String schemaString, GenericData dataModel, OutputFile out) throws IOException { final Schema schema = new Schema.Parser().parse(schemaString); return AvroParquetWriter.<T>builder(out) .withSchema(schema) .withDataModel(dataModel) .build(); }
Example #24
Source File: AvroToParquetConverterUtil.java From datacollector with Apache License 2.0 | 5 votes |
private static ParquetWriter.Builder getParquetWriterBuilder(Path tempFile, Schema avroSchema, Configuration conf) { // Parquet Avro pre-1.9 doesn't work with logical types, so in that case we use custom Builder that injects our own // avro schema -> parquet schema generator class (which is a copy of the one that was provided in PARQUET-358). // Additionally, Parquet Avro 1.9.x does not support converting from Avro timestamps (logical types TIMESTAMP_MILLIS // and TIMESTAMP_MICROS) and so we have to extend Parquet Avro classes to support timestamps conversion. ParquetWriter.Builder builder = null; try { SemanticVersion parquetVersion = SemanticVersion.parse(Version.VERSION_NUMBER); if(parquetVersion.major > 1 || (parquetVersion.major == 1 && parquetVersion.minor >= 9)) { if (parquetVersion.major == 1 && parquetVersion.minor >= 9) { LOG.debug("Creating AvroParquetWriterBuilder190Int96"); if (propertyDefined(conf, AvroParquetConstants.TIMEZONE)) { String timeZoneId = conf.get(AvroParquetConstants.TIMEZONE); builder = new AvroParquetWriterBuilder190Int96(tempFile, timeZoneId).withSchema(avroSchema); } else { builder = new AvroParquetWriterBuilder190Int96(tempFile).withSchema(avroSchema); } } else { LOG.debug("Creating AvroParquetWriter.builder"); builder = AvroParquetWriter.builder(tempFile).withSchema(avroSchema); } } else { LOG.debug("Creating AvroParquetWriterBuilder"); builder = new AvroParquetWriterBuilder(tempFile).withSchema(avroSchema); } } catch (SemanticVersion.SemanticVersionParseException e) { LOG.warn("Can't parse parquet version string: " + Version.VERSION_NUMBER, e); builder = new AvroParquetWriterBuilder(tempFile).withSchema(avroSchema); } return builder; }
Example #25
Source File: UtilitiesTestBase.java From hudi with Apache License 2.0 | 5 votes |
public static void saveParquetToDFS(List<GenericRecord> records, Path targetFile) throws IOException { try (ParquetWriter<GenericRecord> writer = AvroParquetWriter.<GenericRecord>builder(targetFile) .withSchema(HoodieTestDataGenerator.AVRO_SCHEMA) .withConf(HoodieTestUtils.getDefaultHadoopConf()) .withWriteMode(Mode.OVERWRITE) .build()) { for (GenericRecord record : records) { writer.write(record); } } }
Example #26
Source File: TestParquetInLining.java From hudi with Apache License 2.0 | 5 votes |
@Test public void testSimpleInlineFileSystem() throws IOException { Path outerInMemFSPath = getRandomOuterInMemPath(); Path outerPath = new Path(FILE_SCHEME + outerInMemFSPath.toString().substring(outerInMemFSPath.toString().indexOf(':'))); generatedPath = outerPath; ParquetWriter inlineWriter = new AvroParquetWriter(outerInMemFSPath, HoodieTestDataGenerator.AVRO_SCHEMA, CompressionCodecName.GZIP, 100 * 1024 * 1024, 1024 * 1024, true, inMemoryConf); // write few records List<GenericRecord> recordsToWrite = getParquetHoodieRecords(); for (GenericRecord rec : recordsToWrite) { inlineWriter.write(rec); } inlineWriter.close(); byte[] inlineBytes = getBytesToInline(outerInMemFSPath); long startOffset = generateOuterFile(outerPath, inlineBytes); long inlineLength = inlineBytes.length; // Generate phantom inline file Path inlinePath = getPhantomFile(outerPath, startOffset, inlineLength); // instantiate Parquet reader ParquetReader inLineReader = AvroParquetReader.builder(inlinePath).withConf(inlineConf).build(); List<GenericRecord> records = readParquetGenericRecords(inLineReader); assertArrayEquals(recordsToWrite.toArray(), records.toArray()); inLineReader.close(); }
Example #27
Source File: DataLoad.java From arvo2parquet with MIT License | 5 votes |
private static ParquetWriter<GenericData.Record> createParquetWriterInstance(@Nonnull final Schema schema, @Nonnull final Path fileToWrite) throws IOException { return AvroParquetWriter .<GenericData.Record>builder(nioPathToOutputFile(fileToWrite)) .withRowGroupSize(256 * 1024 * 1024) .withPageSize(128 * 1024) .withSchema(schema) .withConf(new Configuration()) .withCompressionCodec(CompressionCodecName.GZIP) .withValidation(false) .withDictionaryEncoding(false) .build(); }
Example #28
Source File: TestUtil.java From flink with Apache License 2.0 | 5 votes |
public static Path createTempParquetFile(File folder, Schema schema, List<IndexedRecord> records) throws IOException { Path path = new Path(folder.getPath(), UUID.randomUUID().toString()); ParquetWriter<IndexedRecord> writer = AvroParquetWriter.<IndexedRecord>builder( new org.apache.hadoop.fs.Path(path.toUri())).withSchema(schema).withRowGroupSize(10).build(); for (IndexedRecord record : records) { writer.write(record); } writer.close(); return path; }
Example #29
Source File: ParquetAvroWriters.java From flink with Apache License 2.0 | 5 votes |
private static <T> ParquetWriter<T> createAvroParquetWriter( String schemaString, GenericData dataModel, OutputFile out) throws IOException { final Schema schema = new Schema.Parser().parse(schemaString); return AvroParquetWriter.<T>builder(out) .withSchema(schema) .withDataModel(dataModel) .build(); }
Example #30
Source File: ParquetAvroWriters.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private static <T> ParquetWriter<T> createAvroParquetWriter( String schemaString, GenericData dataModel, OutputFile out) throws IOException { final Schema schema = new Schema.Parser().parse(schemaString); return AvroParquetWriter.<T>builder(out) .withSchema(schema) .withDataModel(dataModel) .build(); }