Java Code Examples for org.apache.parquet.column.ParquetProperties#WriterVersion
The following examples show how to use
org.apache.parquet.column.ParquetProperties#WriterVersion .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SqlInterpreterTest.java From zeppelin with Apache License 2.0 | 6 votes |
public File createParquetFile(int[] values, ParquetProperties.WriterVersion version) throws IOException { File file = File.createTempFile("zeppelin-flink-input", ".par"); file.delete(); Path path = new Path(file.getAbsolutePath()); Configuration conf = new Configuration(); MessageType schema = MessageTypeParser.parseMessageType( "message test { " + "required int32 int32_field; " + "} "); GroupWriteSupport.setSchema(schema, conf); SimpleGroupFactory f = new SimpleGroupFactory(schema); ParquetWriter<Group> writer = new ParquetWriter<Group>( path, new GroupWriteSupport(), CompressionCodecName.UNCOMPRESSED, 1024, 1024, 512, true, false, version, conf); for (int i = 0; i < values.length; i++) { writer.write(f.newGroup() .append("int32_field", values[i])); } writer.close(); return file; }
Example 2
Source File: TestStatistics.java From parquet-mr with Apache License 2.0 | 6 votes |
public static void writeAndTest(WriteContext context) throws IOException { // Create the configuration, and then apply the schema to our configuration. Configuration configuration = new Configuration(); GroupWriteSupport.setSchema(context.schema, configuration); GroupWriteSupport groupWriteSupport = new GroupWriteSupport(); // Create the writer properties final int blockSize = context.blockSize; final int pageSize = context.pageSize; final int dictionaryPageSize = pageSize; final boolean enableDictionary = context.enableDictionary; final boolean enableValidation = context.enableValidation; ParquetProperties.WriterVersion writerVersion = context.version; CompressionCodecName codec = CompressionCodecName.UNCOMPRESSED; ParquetWriter<Group> writer = new ParquetWriter<Group>(context.fsPath, groupWriteSupport, codec, blockSize, pageSize, dictionaryPageSize, enableDictionary, enableValidation, writerVersion, configuration); context.write(writer); writer.close(); context.test(); context.path.delete(); }
Example 3
Source File: CompressionConveterTest.java From parquet-mr with Apache License 2.0 | 5 votes |
private void testInternal(String srcCodec, String destCodec, ParquetProperties.WriterVersion writerVersion, int pageSize) throws Exception { int numRecord = 1000; TestDocs testDocs = new TestDocs(numRecord); String inputFile = createParquetFile(conf, extraMeta, numRecord, "input", srcCodec, writerVersion, pageSize, testDocs); String outputFile = createTempFile("output_trans"); convertCompression(conf, inputFile, outputFile, destCodec); validateColumns(outputFile, numRecord, testDocs); validMeta(inputFile, outputFile); validColumnIndex(inputFile, outputFile); }
Example 4
Source File: CompressionConveterTest.java From parquet-mr with Apache License 2.0 | 5 votes |
private String createParquetFile(Configuration conf, Map<String, String> extraMeta, int numRecord, String prefix, String codec, ParquetProperties.WriterVersion writerVersion, int pageSize, TestDocs testDocs) throws IOException { MessageType schema = new MessageType("schema", new PrimitiveType(REQUIRED, INT64, "DocId"), new PrimitiveType(REQUIRED, BINARY, "Name"), new PrimitiveType(REQUIRED, BINARY, "Gender"), new GroupType(OPTIONAL, "Links", new PrimitiveType(REPEATED, BINARY, "Backward"), new PrimitiveType(REPEATED, BINARY, "Forward"))); conf.set(GroupWriteSupport.PARQUET_EXAMPLE_SCHEMA, schema.toString()); String file = createTempFile(prefix); ExampleParquetWriter.Builder builder = ExampleParquetWriter.builder(new Path(file)) .withConf(conf) .withWriterVersion(writerVersion) .withExtraMetaData(extraMeta) .withDictionaryEncoding("DocId", true) .withValidation(true) .enablePageWriteChecksum() .withPageSize(pageSize) .withCompressionCodec(CompressionCodecName.valueOf(codec)); try (ParquetWriter writer = builder.build()) { for (int i = 0; i < numRecord; i++) { SimpleGroup g = new SimpleGroup(schema); g.add("DocId", testDocs.docId[i]); g.add("Name", testDocs.name[i]); g.add("Gender", testDocs.gender[i]); Group links = g.addGroup("Links"); links.add(0, testDocs.linkBackward[i]); links.add(1, testDocs.linkForward[i]); writer.write(g); } } return file; }
Example 5
Source File: TestStatistics.java From parquet-mr with Apache License 2.0 | 5 votes |
public WriteContext(File path, MessageType schema, int blockSize, int pageSize, boolean enableDictionary, boolean enableValidation, ParquetProperties.WriterVersion version) throws IOException { this.path = path; this.fsPath = new Path(path.toString()); this.schema = schema; this.blockSize = blockSize; this.pageSize = pageSize; this.enableDictionary = enableDictionary; this.enableValidation = enableValidation; this.version = version; }
Example 6
Source File: ParquetFactory.java From sylph with Apache License 2.0 | 4 votes |
public ParquetFactory( final String writeTableDir, final String table, ParquetProperties.WriterVersion parquetVersion, MessageType schema) { requireNonNull(writeTableDir, "writeTableDir is null"); this.writeTableDir = writeTableDir.endsWith("/") ? writeTableDir : writeTableDir + "/"; this.table = requireNonNull(table, "table is null"); this.schema = requireNonNull(schema, "schema is null"); this.parquetVersion = requireNonNull(parquetVersion, "parquetVersion is null"); /** * 消费者 * */ final Callable<Void> consumer = () -> { Thread.currentThread().setName("Parquet_Factory_Consumer"); try { while (!closed) { Runnable value = streamData.poll(); //事件1 if (value != null) { value.run(); //put data line } //事件2 读取指示序列 Runnable event = monitorEvent.poll(); if (event != null) { event.run(); } //事件3 if (value == null && event == null) { TimeUnit.MILLISECONDS.sleep(1); } } } catch (Exception e) { logger.error("Parquet_Factory_Consumer error", e); System.exit(-1); } return null; }; //register consumer executorPool.submit(consumer); //register monitor executorPool.submit(monitor); Runtime.getRuntime().addShutdownHook(new Thread(shutdownHook)); }
Example 7
Source File: DataGenerator.java From parquet-mr with Apache License 2.0 | 4 votes |
public void generateData(Path outFile, Configuration configuration, ParquetProperties.WriterVersion version, int blockSize, int pageSize, int fixedLenByteArraySize, CompressionCodecName codec, int nRows) throws IOException { if (exists(configuration, outFile)) { System.out.println("File already exists " + outFile); return; } System.out.println("Generating data @ " + outFile); MessageType schema = parseMessageType( "message test { " + "required binary binary_field; " + "required int32 int32_field; " + "required int64 int64_field; " + "required boolean boolean_field; " + "required float float_field; " + "required double double_field; " + "required fixed_len_byte_array(" + fixedLenByteArraySize +") flba_field; " + "required int96 int96_field; " + "} "); GroupWriteSupport.setSchema(schema, configuration); SimpleGroupFactory f = new SimpleGroupFactory(schema); ParquetWriter<Group> writer = new ParquetWriter<Group>(outFile, new GroupWriteSupport(), codec, blockSize, pageSize, DICT_PAGE_SIZE, true, false, version, configuration); //generate some data for the fixed len byte array field char[] chars = new char[fixedLenByteArraySize]; Arrays.fill(chars, '*'); for (int i = 0; i < nRows; i++) { writer.write( f.newGroup() .append("binary_field", randomUUID().toString()) .append("int32_field", i) .append("int64_field", 64l) .append("boolean_field", true) .append("float_field", 1.0f) .append("double_field", 2.0d) .append("flba_field", new String(chars)) .append("int96_field", Binary.fromConstantByteArray(new byte[12])) ); } writer.close(); }
Example 8
Source File: TestStatistics.java From parquet-mr with Apache License 2.0 | 4 votes |
public DataContext(long seed, File path, int blockSize, int pageSize, boolean enableDictionary, ParquetProperties.WriterVersion version) throws IOException { super(path, buildSchema(seed), blockSize, pageSize, enableDictionary, true, version); this.random = new Random(seed); this.recordCount = random.nextInt(MAX_TOTAL_ROWS); int fixedLength = schema.getType("fixed-binary").asPrimitiveType().getTypeLength(); randomGenerators = Arrays.<RandomValueGenerator<?>>asList( new RandomValues.IntGenerator(random.nextLong()), new RandomValues.LongGenerator(random.nextLong()), new RandomValues.Int96Generator(random.nextLong()), new RandomValues.FloatGenerator(random.nextLong()), new RandomValues.DoubleGenerator(random.nextLong()), new RandomValues.StringGenerator(random.nextLong()), new RandomValues.BinaryGenerator(random.nextLong()), new RandomValues.FixedGenerator(random.nextLong(), fixedLength), new RandomValues.UnconstrainedIntGenerator(random.nextLong()), new RandomValues.UnconstrainedLongGenerator(random.nextLong()), new RandomValues.UnconstrainedFloatGenerator(random.nextLong()), new RandomValues.UnconstrainedDoubleGenerator(random.nextLong()), new RandomValues.IntGenerator(random.nextLong(), Byte.MIN_VALUE, Byte.MAX_VALUE), new RandomValues.UIntGenerator(random.nextLong(), Byte.MIN_VALUE, Byte.MAX_VALUE), new RandomValues.IntGenerator(random.nextLong(), Short.MIN_VALUE, Short.MAX_VALUE), new RandomValues.UIntGenerator(random.nextLong(), Short.MIN_VALUE, Short.MAX_VALUE), new RandomValues.UnconstrainedIntGenerator(random.nextLong()), new RandomValues.UnconstrainedIntGenerator(random.nextLong()), new RandomValues.UnconstrainedLongGenerator(random.nextLong()), new RandomValues.UnconstrainedLongGenerator(random.nextLong()), new RandomValues.UnconstrainedIntGenerator(random.nextLong()), new RandomValues.UnconstrainedLongGenerator(random.nextLong()), new RandomValues.FixedGenerator(random.nextLong(), fixedLength), new RandomValues.BinaryGenerator(random.nextLong()), new RandomValues.StringGenerator(random.nextLong()), new RandomValues.StringGenerator(random.nextLong()), new RandomValues.StringGenerator(random.nextLong()), new RandomValues.BinaryGenerator(random.nextLong()), new RandomValues.IntGenerator(random.nextLong()), new RandomValues.IntGenerator(random.nextLong()), new RandomValues.LongGenerator(random.nextLong()), new RandomValues.LongGenerator(random.nextLong()), new RandomValues.LongGenerator(random.nextLong()), new RandomValues.FixedGenerator(random.nextLong(), 12) ); }
Example 9
Source File: ParquetConfig.java From nifi with Apache License 2.0 | 4 votes |
public ParquetProperties.WriterVersion getWriterVersion() { return writerVersion; }
Example 10
Source File: ParquetConfig.java From nifi with Apache License 2.0 | 4 votes |
public void setWriterVersion(ParquetProperties.WriterVersion writerVersion) { this.writerVersion = writerVersion; }
Example 11
Source File: ExampleParquetWriter.java From parquet-mr with Apache License 2.0 | 3 votes |
/** * Create a new {@link ExampleParquetWriter}. * * @param file The file name to write to. * @param writeSupport The schema to write with. * @param compressionCodecName Compression code to use, or CompressionCodecName.UNCOMPRESSED * @param blockSize the block size threshold. * @param pageSize See parquet write up. Blocks are subdivided into pages for alignment and other purposes. * @param enableDictionary Whether to use a dictionary to compress columns. * @param conf The Configuration to use. * @throws IOException */ ExampleParquetWriter(Path file, WriteSupport<Group> writeSupport, CompressionCodecName compressionCodecName, int blockSize, int pageSize, boolean enableDictionary, boolean enableValidation, ParquetProperties.WriterVersion writerVersion, Configuration conf) throws IOException { super(file, writeSupport, compressionCodecName, blockSize, pageSize, pageSize, enableDictionary, enableValidation, writerVersion, conf); }