parquet.hadoop.ParquetOutputFormat Java Examples
The following examples show how to use
parquet.hadoop.ParquetOutputFormat.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ParquetAvroExample.java From parquet-flinktacular with Apache License 2.0 | 6 votes |
public static void writeAvro(DataSet<Tuple2<Void, Person>> data, String outputPath) throws IOException { // Set up the Hadoop Input Format Job job = Job.getInstance(); // Set up Hadoop Output Format HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat(new AvroParquetOutputFormat(), job); FileOutputFormat.setOutputPath(job, new Path(outputPath)); AvroParquetOutputFormat.setSchema(job, Person.getClassSchema()); ParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY); ParquetOutputFormat.setEnableDictionary(job, true); // Output & Execute data.output(hadoopOutputFormat); }
Example #2
Source File: ParquetThriftExample.java From parquet-flinktacular with Apache License 2.0 | 6 votes |
public static void writeThrift(DataSet<Tuple2<Void, Person>> data, String outputPath) throws IOException { // Set up the Hadoop Input Format Job job = Job.getInstance(); // Set up Hadoop Output Format HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat(new ParquetThriftOutputFormat(), job); FileOutputFormat.setOutputPath(job, new Path(outputPath)); ParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY); ParquetOutputFormat.setEnableDictionary(job, true); ParquetThriftOutputFormat.setThriftClass(job, Person.class); // Output & Execute data.output(hadoopOutputFormat); }
Example #3
Source File: PentahoTwitterOutputFormat.java From pentaho-hadoop-shims with Apache License 2.0 | 6 votes |
@Override public void setOutputFile( String file, boolean override ) throws Exception { inClassloader( () -> { S3NCredentialUtils.applyS3CredentialsToHadoopConfigurationIfNecessary( file, job.getConfiguration() ); outputFile = new Path( S3NCredentialUtils.scrubFilePathIfNecessary( file ) ); FileSystem fs = FileSystem.get( outputFile.toUri(), job.getConfiguration() ); if ( fs.exists( outputFile ) ) { if ( override ) { fs.delete( outputFile, true ); } else { throw new FileAlreadyExistsException( file ); } } ParquetOutputFormat.setOutputPath( job, outputFile.getParent() ); } ); }
Example #4
Source File: PentahoTwitterOutputFormat.java From pentaho-hadoop-shims with Apache License 2.0 | 6 votes |
@Override public void setVersion( VERSION version ) throws Exception { inClassloader( () -> { ParquetProperties.WriterVersion writerVersion; switch ( version ) { case VERSION_1_0: writerVersion = ParquetProperties.WriterVersion.PARQUET_1_0; break; case VERSION_2_0: writerVersion = ParquetProperties.WriterVersion.PARQUET_2_0; break; default: writerVersion = ParquetProperties.WriterVersion.PARQUET_2_0; break; } job.getConfiguration().set( ParquetOutputFormat.WRITER_VERSION, writerVersion.toString() ); } ); }
Example #5
Source File: PentahoTwitterOutputFormat.java From pentaho-hadoop-shims with Apache License 2.0 | 6 votes |
@Override public void setCompression( COMPRESSION comp ) throws Exception { inClassloader( () -> { CompressionCodecName codec; switch ( comp ) { case SNAPPY: codec = CompressionCodecName.SNAPPY; break; case GZIP: codec = CompressionCodecName.GZIP; break; case LZO: codec = CompressionCodecName.LZO; break; default: codec = CompressionCodecName.UNCOMPRESSED; break; } ParquetOutputFormat.setCompression( job, codec ); } ); }
Example #6
Source File: ParquetProtobufExample.java From parquet-flinktacular with Apache License 2.0 | 5 votes |
public static void writeProtobuf(DataSet<Tuple2<Void, Person>> data, String outputPath) throws IOException { Job job = Job.getInstance(); // Set up Hadoop Output Format HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat(new ProtoParquetOutputFormat(), job); FileOutputFormat.setOutputPath(job, new Path(outputPath)); ProtoParquetOutputFormat.setProtobufClass(job, Person.class); ParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY); ParquetOutputFormat.setEnableDictionary(job, true); // Output & Execute data.output(hadoopOutputFormat); }
Example #7
Source File: PentahoTwitterOutputFormat.java From pentaho-hadoop-shims with Apache License 2.0 | 5 votes |
public PentahoTwitterOutputFormat() { logger.info( "We are initializing parquet output format" ); inClassloader( () -> { ConfigurationProxy conf = new ConfigurationProxy(); job = Job.getInstance( conf ); job.getConfiguration().set( ParquetOutputFormat.ENABLE_JOB_SUMMARY, "false" ); ParquetOutputFormat.setEnableDictionary( job, false ); } ); }
Example #8
Source File: PentahoTwitterOutputFormat.java From pentaho-hadoop-shims with Apache License 2.0 | 4 votes |
@Override public void enableDictionary( boolean useDictionary ) throws Exception { inClassloader( () -> ParquetOutputFormat.setEnableDictionary( job, useDictionary ) ); }
Example #9
Source File: PentahoTwitterOutputFormat.java From pentaho-hadoop-shims with Apache License 2.0 | 4 votes |
@Override public void setRowGroupSize( int size ) throws Exception { inClassloader( () -> ParquetOutputFormat.setBlockSize( job, size ) ); }
Example #10
Source File: PentahoTwitterOutputFormat.java From pentaho-hadoop-shims with Apache License 2.0 | 4 votes |
@Override public void setDataPageSize( int size ) throws Exception { inClassloader( () -> ParquetOutputFormat.setPageSize( job, size ) ); }
Example #11
Source File: PentahoTwitterOutputFormat.java From pentaho-hadoop-shims with Apache License 2.0 | 4 votes |
@Override public void setDictionaryPageSize( int size ) throws Exception { inClassloader( () -> ParquetOutputFormat.setDictionaryPageSize( job, size ) ); }