Java Code Examples for org.apache.avro.file.CodecFactory#deflateCodec()
The following examples show how to use
org.apache.avro.file.CodecFactory#deflateCodec() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroAppender.java From kite with Apache License 2.0 | 6 votes |
private CodecFactory getCodecFactory() { switch (compressionType) { case Snappy: return CodecFactory.snappyCodec(); case Deflate: return CodecFactory.deflateCodec(9); case Bzip2: return CodecFactory.bzip2Codec(); default: throw new IllegalArgumentException(String.format( "Unsupported compression format %s. Supported formats: %s", compressionType.getName(), Arrays.toString( Formats.AVRO.getSupportedCompressionTypes().toArray()))); } }
Example 2
Source File: AvroRecordSetWriter.java From nifi with Apache License 2.0 | 6 votes |
private CodecFactory getCodecFactory(String property) { CodecType type = CodecType.valueOf(property); switch (type) { case BZIP2: return CodecFactory.bzip2Codec(); case DEFLATE: return CodecFactory.deflateCodec(CodecFactory.DEFAULT_DEFLATE_LEVEL); case LZO: return CodecFactory.xzCodec(CodecFactory.DEFAULT_XZ_LEVEL); case SNAPPY: return CodecFactory.snappyCodec(); case NONE: default: return CodecFactory.nullCodec(); } }
Example 3
Source File: AvroAsJsonOutputFormat.java From iow-hadoop-streaming with Apache License 2.0 | 6 votes |
static <K> void configureDataFileWriter(DataFileWriter<K> writer, JobConf job) throws UnsupportedEncodingException { if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY, org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL); String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); writer.setCodec(factory); } writer.setSyncInterval(job.getInt(org.apache.avro.mapred.AvroOutputFormat.SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL)); // copy metadata from job for (Map.Entry<String,String> e : job) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()),e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1") .getBytes("ISO-8859-1")); } }
Example 4
Source File: AvroUtil.java From nifi with Apache License 2.0 | 6 votes |
public static CodecFactory getCodecFactory(String property) { CodecType type = CodecType.valueOf(property); switch (type) { case BZIP2: return CodecFactory.bzip2Codec(); case DEFLATE: return CodecFactory.deflateCodec(CodecFactory.DEFAULT_DEFLATE_LEVEL); case LZO: return CodecFactory.xzCodec(CodecFactory.DEFAULT_XZ_LEVEL); case SNAPPY: return CodecFactory.snappyCodec(); case NONE: default: return CodecFactory.nullCodec(); } }
Example 5
Source File: AvroKeyValueSinkWriter.java From flink with Apache License 2.0 | 6 votes |
private CodecFactory getCompressionCodec(Map<String, String> conf) { if (getBoolean(conf, CONF_COMPRESS, false)) { int deflateLevel = getInt(conf, CONF_DEFLATE_LEVEL, CodecFactory.DEFAULT_DEFLATE_LEVEL); int xzLevel = getInt(conf, CONF_XZ_LEVEL, CodecFactory.DEFAULT_XZ_LEVEL); String outputCodec = conf.get(CONF_COMPRESS_CODEC); if (DataFileConstants.DEFLATE_CODEC.equals(outputCodec)) { return CodecFactory.deflateCodec(deflateLevel); } else if (DataFileConstants.XZ_CODEC.equals(outputCodec)) { return CodecFactory.xzCodec(xzLevel); } else { return CodecFactory.fromString(outputCodec); } } return CodecFactory.nullCodec(); }
Example 6
Source File: SerializableAvroCodecFactory.java From beam with Apache License 2.0 | 6 votes |
@Override public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { final String codecStr = in.readUTF(); switch (codecStr) { case NULL_CODEC: case SNAPPY_CODEC: case BZIP2_CODEC: codecFactory = CodecFactory.fromString(codecStr); return; } Matcher deflateMatcher = deflatePattern.matcher(codecStr); if (deflateMatcher.find()) { codecFactory = CodecFactory.deflateCodec(Integer.parseInt(deflateMatcher.group("level"))); return; } Matcher xzMatcher = xzPattern.matcher(codecStr); if (xzMatcher.find()) { codecFactory = CodecFactory.xzCodec(Integer.parseInt(xzMatcher.group("level"))); return; } throw new IllegalStateException(codecStr + " is not supported"); }
Example 7
Source File: AbstractKiteConvertProcessor.java From localization_nifi with Apache License 2.0 | 6 votes |
protected CodecFactory getCodecFactory(String property) { CodecType type = CodecType.valueOf(property); switch (type) { case BZIP2: return CodecFactory.bzip2Codec(); case DEFLATE: return CodecFactory.deflateCodec(CodecFactory.DEFAULT_DEFLATE_LEVEL); case NONE: return CodecFactory.nullCodec(); case LZO: return CodecFactory.xzCodec(CodecFactory.DEFAULT_XZ_LEVEL); case SNAPPY: default: return CodecFactory.snappyCodec(); } }
Example 8
Source File: AvroKeyValueSinkWriter.java From flink with Apache License 2.0 | 6 votes |
private CodecFactory getCompressionCodec(Map<String, String> conf) { if (getBoolean(conf, CONF_COMPRESS, false)) { int deflateLevel = getInt(conf, CONF_DEFLATE_LEVEL, CodecFactory.DEFAULT_DEFLATE_LEVEL); int xzLevel = getInt(conf, CONF_XZ_LEVEL, CodecFactory.DEFAULT_XZ_LEVEL); String outputCodec = conf.get(CONF_COMPRESS_CODEC); if (DataFileConstants.DEFLATE_CODEC.equals(outputCodec)) { return CodecFactory.deflateCodec(deflateLevel); } else if (DataFileConstants.XZ_CODEC.equals(outputCodec)) { return CodecFactory.xzCodec(xzLevel); } else { return CodecFactory.fromString(outputCodec); } } return CodecFactory.nullCodec(); }
Example 9
Source File: AvroKeyValueSinkWriter.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
private CodecFactory getCompressionCodec(Map<String, String> conf) { if (getBoolean(conf, CONF_COMPRESS, false)) { int deflateLevel = getInt(conf, CONF_DEFLATE_LEVEL, CodecFactory.DEFAULT_DEFLATE_LEVEL); int xzLevel = getInt(conf, CONF_XZ_LEVEL, CodecFactory.DEFAULT_XZ_LEVEL); String outputCodec = conf.get(CONF_COMPRESS_CODEC); if (DataFileConstants.DEFLATE_CODEC.equals(outputCodec)) { return CodecFactory.deflateCodec(deflateLevel); } else if (DataFileConstants.XZ_CODEC.equals(outputCodec)) { return CodecFactory.xzCodec(xzLevel); } else { return CodecFactory.fromString(outputCodec); } } return CodecFactory.nullCodec(); }
Example 10
Source File: AvroRecordWriter.java From presto with Apache License 2.0 | 6 votes |
public AvroRecordWriter(Path path, JobConf jobConf, boolean isCompressed, Properties properties) throws IOException { Schema schema; try { schema = AvroSerdeUtils.determineSchemaOrThrowException(jobConf, properties); } catch (AvroSerdeException e) { throw new IOException(e); } GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<>(schema); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(genericDatumWriter); if (isCompressed) { int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); dataFileWriter.setCodec(factory); } outputStream = path.getFileSystem(jobConf).create(path); dataFileWriter.create(schema, outputStream); delegate = new AvroGenericRecordWriter(dataFileWriter); }
Example 11
Source File: AvroOutputFormat.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
static <T> void configureDataFileWriter(DataFileWriter<T> writer, TaskAttemptContext context) throws UnsupportedEncodingException { if (FileOutputFormat.getCompressOutput(context)) { int level = context.getConfiguration() .getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = context.getConfiguration() .get(org.apache.avro.mapred.AvroJob.OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); writer.setCodec(factory); } writer.setSyncInterval(context.getConfiguration() .getInt(SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL)); // copy metadata from job for (Map.Entry<String, String> e : context.getConfiguration()) { if (e.getKey().startsWith(org.apache.avro.mapred.AvroJob.TEXT_PREFIX)) { writer.setMeta(e.getKey() .substring(org.apache.avro.mapred.AvroJob.TEXT_PREFIX.length()), e.getValue()); } if (e.getKey().startsWith(org.apache.avro.mapred.AvroJob.BINARY_PREFIX)) { writer.setMeta(e.getKey() .substring(org.apache.avro.mapred.AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1")); } } }
Example 12
Source File: JdbcAvroArgs.java From dbeam with Apache License 2.0 | 5 votes |
public CodecFactory getCodecFactory() { if (avroCodec().equals("snappy")) { return CodecFactory.snappyCodec(); } else if (avroCodec().startsWith("deflate")) { return CodecFactory.deflateCodec(Integer.valueOf(avroCodec().replace("deflate", ""))); } else if (avroCodec().startsWith("zstandard")) { return CodecFactory.zstandardCodec(Integer.valueOf(avroCodec().replace("zstandard", ""))); } throw new IllegalArgumentException("Invalid avroCodec " + avroCodec()); }
Example 13
Source File: SerializableAvroCodecFactoryTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testDeflateCodecSerDeWithLevels() throws Exception { for (int i = 0; i < 10; ++i) { SerializableAvroCodecFactory codecFactory = new SerializableAvroCodecFactory(CodecFactory.deflateCodec(i)); SerializableAvroCodecFactory serdeC = SerializableUtils.clone(codecFactory); assertEquals(CodecFactory.deflateCodec(i).toString(), serdeC.getCodec().toString()); } }
Example 14
Source File: Purge.java From Cubert with Apache License 2.0 | 5 votes |
private DataFileWriter<GenericRecord> createDataFileWriter(DataFileReader<GenericRecord> dataFileReader) throws IllegalArgumentException, IOException { Schema schema = dataFileReader.getSchema(); DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema); DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(datumWriter); // Get the codec of the reader String codecStr = dataFileReader.getMetaString(DataFileConstants.CODEC); int level = conf.getInt("avro.mapred.deflate.level", 1); String codecName = conf.get("avro.output.codec", codecStr); CodecFactory factory = codecName.equals("deflate") ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); // Set the codec of the writer writer.setCodec(factory); writer.setSyncInterval(conf.getInt("avro.mapred.sync.interval", Math.max(conf.getInt("io.file.buffer.size", 16000), 16000))); writer.create(schema, new Path(tempFileName).getFileSystem(conf) .create(new Path(tempFileName))); return writer; }
Example 15
Source File: AvroConfiguration.java From data-highway with Apache License 2.0 | 5 votes |
@Bean CodecFactory codecFactory( @Value("${avroCodec.name:deflate}") String codecName, @Value("${avroCodec.level:3}") String compressionLevel) { switch (codecName) { case DEFLATE_CODEC: return CodecFactory.deflateCodec(level(compressionLevel, DEFAULT_DEFLATE_LEVEL)); case XZ_CODEC: return CodecFactory.xzCodec(level(compressionLevel, DEFAULT_XZ_LEVEL)); default: return CodecFactory.fromString(codecName); } }
Example 16
Source File: AvroTeeWriter.java From Cubert with Apache License 2.0 | 5 votes |
@Override public void open(Configuration conf, JsonNode json, BlockSchema schema, Path root, String filename) throws IOException { Path teePath = new Path(root, filename + ".avro"); FileSystem fs = FileSystem.get(conf); Schema avroSchema = AvroUtils.convertFromBlockSchema("record", schema); GenericDatumWriter<Object> datumWriter = new PigAvroDatumWriter(avroSchema); dataFileWriter = new DataFileWriter<Object>(datumWriter); // if compression is requested, set the proper compression codec if (PhaseContext.getConf().getBoolean("mapred.output.compress", false)) { int level = conf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = conf.get(OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); dataFileWriter.setCodec(factory); } dataFileWriter.create(avroSchema, fs.create(teePath)); }
Example 17
Source File: WriterUtils.java From incubator-gobblin with Apache License 2.0 | 5 votes |
/** * Creates a {@link CodecFactory} based on the specified codec name and deflate level. If codecName is absent, then * a {@link CodecFactory#deflateCodec(int)} is returned. Otherwise the codecName is converted into a * {@link CodecFactory} via the {@link CodecFactory#fromString(String)} method. * * @param codecName the name of the codec to use (e.g. deflate, snappy, xz, etc.). * @param deflateLevel must be an integer from [0-9], and is only applicable if the codecName is "deflate". * @return a {@link CodecFactory}. */ public static CodecFactory getCodecFactory(Optional<String> codecName, Optional<String> deflateLevel) { if (!codecName.isPresent()) { return CodecFactory.deflateCodec(ConfigurationKeys.DEFAULT_DEFLATE_LEVEL); } else if (codecName.get().equalsIgnoreCase(DataFileConstants.DEFLATE_CODEC)) { if (!deflateLevel.isPresent()) { return CodecFactory.deflateCodec(ConfigurationKeys.DEFAULT_DEFLATE_LEVEL); } return CodecFactory.deflateCodec(Integer.parseInt(deflateLevel.get())); } else { return CodecFactory.fromString(codecName.get().toLowerCase()); } }
Example 18
Source File: AvroRecordWriter.java From spork with Apache License 2.0 | 5 votes |
static void configureDataFileWriter(DataFileWriter<GenericData.Record> writer, JobConf job) throws UnsupportedEncodingException { if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); writer.setCodec(factory); } // Do max as core-default.xml has io.file.buffer.size as 4K writer.setSyncInterval(job.getInt(SYNC_INTERVAL_KEY, Math.max( job.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL))); // copy metadata from job for (Map.Entry<String,String> e : job) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1") .getBytes("ISO-8859-1")); } }
Example 19
Source File: PigAvroOutputFormat.java From spork with Apache License 2.0 | 5 votes |
@Override public RecordWriter<NullWritable, Object> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { if (schema == null) throw new IOException("Must provide a schema"); Configuration conf = context.getConfiguration(); DataFileWriter<Object> writer = new DataFileWriter<Object>(new PigAvroDatumWriter(schema)); if (FileOutputFormat.getCompressOutput(context)) { int level = conf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = conf.get(OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); writer.setCodec(factory); } // Do max as core-default.xml has io.file.buffer.size as 4K writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY, Math.max( conf.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL))); Path path = getDefaultWorkFile(context, EXT); writer.create(schema, path.getFileSystem(conf).create(path)); return new PigAvroRecordWriter(writer); }
Example 20
Source File: PentahoAvroOutputFormat.java From pentaho-hadoop-shims with Apache License 2.0 | 5 votes |
@Override public void setCompression( COMPRESSION compression ) { switch ( compression ) { case SNAPPY: codecFactory = CodecFactory.snappyCodec(); break; case DEFLATE: codecFactory = CodecFactory.deflateCodec( Deflater.DEFAULT_COMPRESSION ); break; default: codecFactory = CodecFactory.nullCodec(); break; } }