Java Code Examples for org.apache.parquet.io.api.Binary#fromReusedByteArray()
The following examples show how to use
org.apache.parquet.io.api.Binary#fromReusedByteArray() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestStatistics.java From parquet-mr with Apache License 2.0 | 6 votes |
@Test public void testBinaryMinMaxForReusedBackingByteArray() { BinaryStatistics stats = new BinaryStatistics(); byte[] bytes = new byte[] { 10 }; final Binary value = Binary.fromReusedByteArray(bytes); stats.updateStats(value); bytes[0] = 20; stats.updateStats(value); bytes[0] = 15; stats.updateStats(value); assertArrayEquals(new byte[] { 20 }, stats.getMaxBytes()); assertArrayEquals(new byte[] { 10 }, stats.getMinBytes()); }
Example 2
Source File: BinaryDictionary.java From presto with Apache License 2.0 | 5 votes |
public BinaryDictionary(DictionaryPage dictionaryPage, Integer length) throws IOException { super(dictionaryPage.getEncoding()); content = new Binary[dictionaryPage.getDictionarySize()]; byte[] dictionaryBytes; int offset; Slice dictionarySlice = dictionaryPage.getSlice(); if (dictionarySlice.hasByteArray()) { dictionaryBytes = dictionarySlice.byteArray(); offset = dictionarySlice.byteArrayOffset(); } else { dictionaryBytes = dictionarySlice.getBytes(); offset = 0; } if (length == null) { for (int i = 0; i < content.length; i++) { int len = readIntLittleEndian(dictionaryBytes, offset); offset += 4; content[i] = Binary.fromReusedByteArray(dictionaryBytes, offset, len); offset += len; } } else { checkArgument(length > 0, "Invalid byte array length: %s", length); for (int i = 0; i < content.length; i++) { content[i] = Binary.fromReusedByteArray(dictionaryBytes, offset, length); offset += length; } } }
Example 3
Source File: AvroWriteSupportInt96Avro18.java From datacollector with Apache License 2.0 | 5 votes |
private Binary fromAvroString(Object value) { if (value instanceof Utf8) { Utf8 utf8 = (Utf8) value; return Binary.fromReusedByteArray(utf8.getBytes(), 0, utf8.getByteLength()); } return Binary.fromCharSequence((CharSequence) value); }
Example 4
Source File: AvroWriteSupportInt96Avro17.java From datacollector with Apache License 2.0 | 5 votes |
private Binary fromAvroString(Object value) { if (value instanceof Utf8) { Utf8 utf8 = (Utf8) value; return Binary.fromReusedByteArray(utf8.getBytes(), 0, utf8.getByteLength()); } return Binary.fromString(value.toString()); }
Example 5
Source File: BinaryStatistics.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * Sets min and max values, re-uses the byte[] passed in. * Any changes made to byte[] will be reflected in min and max values as well. * @param minBytes byte array to set the min value to * @param maxBytes byte array to set the max value to */ @Override public void setMinMaxFromBytes(byte[] minBytes, byte[] maxBytes) { max = Binary.fromReusedByteArray(maxBytes); min = Binary.fromReusedByteArray(minBytes); this.markAsNotEmpty(); }
Example 6
Source File: TestDictionary.java From parquet-mr with Apache License 2.0 | 5 votes |
private void writeRepeatedWithReuse(int COUNT, ValuesWriter cw, String prefix) { Binary reused = Binary.fromReusedByteArray((prefix + "0").getBytes(StandardCharsets.UTF_8)); for (int i = 0; i < COUNT; i++) { Binary content = Binary.fromString(prefix + i % 10); System.arraycopy(content.getBytesUnsafe(), 0, reused.getBytesUnsafe(), 0, reused.length()); cw.writeBytes(reused); } }
Example 7
Source File: RandomValues.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public Binary nextValue() { // use a random length, but ensure it is at least a few bytes int length = 5 + randomPositiveInt(buffer.length - 5); for (int index = 0; index < length; index++) { buffer[index] = (byte) randomInt(); } return Binary.fromReusedByteArray(buffer, 0, length); }
Example 8
Source File: RandomValues.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public Binary nextValue() { for (int index = 0; index < buffer.length; index++) { buffer[index] = (byte) randomInt(); } return Binary.fromReusedByteArray(buffer); }
Example 9
Source File: AvroWriteSupport.java From parquet-mr with Apache License 2.0 | 5 votes |
private Binary fromAvroString(Object value) { if (value instanceof Utf8) { Utf8 utf8 = (Utf8) value; return Binary.fromReusedByteArray(utf8.getBytes(), 0, utf8.getByteLength()); } else if (value instanceof CharSequence) { return Binary.fromCharSequence((CharSequence) value); } return Binary.fromCharSequence(value.toString()); }
Example 10
Source File: AvroWriteSupportInt96Avro18.java From datacollector with Apache License 2.0 | 4 votes |
/** * Calls an appropriate write method based on the value. * Value must not be null and the schema must not be nullable. * * @param type a Parquet type * @param avroSchema a non-nullable Avro schema * @param value a non-null value to write */ @SuppressWarnings("unchecked") private void writeValueWithoutConversion(Type type, Schema avroSchema, Object value) { switch (avroSchema.getType()) { case BOOLEAN: recordConsumer.addBoolean((Boolean) value); break; case INT: if (value instanceof Character) { recordConsumer.addInteger((Character) value); } else { recordConsumer.addInteger(((Number) value).intValue()); } break; case LONG: if (type.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType.PrimitiveTypeName.INT96)) { final long NANOS_PER_HOUR = TimeUnit.HOURS.toNanos(1); final long NANOS_PER_MINUTE = TimeUnit.MINUTES.toNanos(1); final long NANOS_PER_SECOND = TimeUnit.SECONDS.toNanos(1); long timestamp = ((Number) value).longValue(); Calendar calendar; if (timeZoneId != null && ! timeZoneId.isEmpty()) { calendar = Calendar.getInstance(TimeZone.getTimeZone(timeZoneId)); } else { calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC")); } calendar.setTime(new Date(timestamp)); // Calculate Julian days and nanoseconds in the day LocalDate dt = LocalDate.of(calendar.get(Calendar.YEAR), calendar.get(Calendar.MONTH)+1, calendar.get(Calendar.DAY_OF_MONTH)); int julianDays = (int) JulianFields.JULIAN_DAY.getFrom(dt); long nanos = (calendar.get(Calendar.HOUR_OF_DAY) * NANOS_PER_HOUR) + (calendar.get(Calendar.MINUTE) * NANOS_PER_MINUTE) + (calendar.get(Calendar.SECOND) * NANOS_PER_SECOND); // Write INT96 timestamp byte[] timestampBuffer = new byte[12]; ByteBuffer buf = ByteBuffer.wrap(timestampBuffer); buf.order(ByteOrder.LITTLE_ENDIAN).putLong(nanos).putInt(julianDays); // This is the properly encoded INT96 timestamp Binary timestampBinary = Binary.fromReusedByteArray(timestampBuffer); recordConsumer.addBinary(timestampBinary); } else { recordConsumer.addLong(((Number) value).longValue()); } break; case FLOAT: recordConsumer.addFloat(((Number) value).floatValue()); break; case DOUBLE: recordConsumer.addDouble(((Number) value).doubleValue()); break; case FIXED: recordConsumer.addBinary(Binary.fromReusedByteArray(((GenericFixed) value).bytes())); break; case BYTES: if (value instanceof byte[]) { recordConsumer.addBinary(Binary.fromReusedByteArray((byte[]) value)); } else { recordConsumer.addBinary(Binary.fromReusedByteBuffer((ByteBuffer) value)); } break; case STRING: recordConsumer.addBinary(fromAvroString(value)); break; case RECORD: writeRecord(type.asGroupType(), avroSchema, value); break; case ENUM: recordConsumer.addBinary(Binary.fromString(value.toString())); break; case ARRAY: listWriter.writeList(type.asGroupType(), avroSchema, value); break; case MAP: writeMap(type.asGroupType(), avroSchema, (Map<CharSequence, ?>) value); break; case UNION: writeUnion(type.asGroupType(), avroSchema, value); break; default: break; } }
Example 11
Source File: AvroWriteSupportInt96Avro17.java From datacollector with Apache License 2.0 | 4 votes |
@SuppressWarnings("unchecked") private void writeValue(Type type, Schema avroSchema, Object value) { Schema nonNullAvroSchema = AvroSchemaConverter.getNonNull(avroSchema); Schema.Type avroType = nonNullAvroSchema.getType(); if (avroType.equals(Schema.Type.BOOLEAN)) { recordConsumer.addBoolean((Boolean) value); } else if (avroType.equals(Schema.Type.INT)) { if (value instanceof Character) { recordConsumer.addInteger((Character) value); } else { recordConsumer.addInteger(((Number) value).intValue()); } } else if (avroType.equals(Schema.Type.LONG)) { if (type.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType.PrimitiveTypeName.INT96)) { final long NANOS_PER_HOUR = TimeUnit.HOURS.toNanos(1); final long NANOS_PER_MINUTE = TimeUnit.MINUTES.toNanos(1); final long NANOS_PER_SECOND = TimeUnit.SECONDS.toNanos(1); long timestamp = ((Number) value).longValue(); Calendar calendar; if (timeZoneId != null && ! timeZoneId.isEmpty()) { calendar = Calendar.getInstance(TimeZone.getTimeZone(timeZoneId)); } else { calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC")); } calendar.setTime(new Date(timestamp)); // Calculate Julian days and nanoseconds in the day LocalDate dt = LocalDate.of(calendar.get(Calendar.YEAR), calendar.get(Calendar.MONTH)+1, calendar.get(Calendar.DAY_OF_MONTH)); int julianDays = (int) JulianFields.JULIAN_DAY.getFrom(dt); long nanos = (calendar.get(Calendar.HOUR_OF_DAY) * NANOS_PER_HOUR) + (calendar.get(Calendar.MINUTE) * NANOS_PER_MINUTE) + (calendar.get(Calendar.SECOND) * NANOS_PER_SECOND); // Write INT96 timestamp byte[] timestampBuffer = new byte[12]; ByteBuffer buf = ByteBuffer.wrap(timestampBuffer); buf.order(ByteOrder.LITTLE_ENDIAN).putLong(nanos).putInt(julianDays); // This is the properly encoded INT96 timestamp Binary timestampBinary = Binary.fromReusedByteArray(timestampBuffer); recordConsumer.addBinary(timestampBinary); } else { recordConsumer.addLong(((Number) value).longValue()); } } else if (avroType.equals(Schema.Type.FLOAT)) { recordConsumer.addFloat(((Number) value).floatValue()); } else if (avroType.equals(Schema.Type.DOUBLE)) { recordConsumer.addDouble(((Number) value).doubleValue()); } else if (avroType.equals(Schema.Type.BYTES)) { if (value instanceof byte[]) { recordConsumer.addBinary(Binary.fromReusedByteArray((byte[]) value)); } else { recordConsumer.addBinary(Binary.fromReusedByteBuffer((ByteBuffer) value)); } } else if (avroType.equals(Schema.Type.STRING)) { recordConsumer.addBinary(fromAvroString(value)); } else if (avroType.equals(Schema.Type.RECORD)) { writeRecord(type.asGroupType(), nonNullAvroSchema, value); } else if (avroType.equals(Schema.Type.ENUM)) { recordConsumer.addBinary(Binary.fromString(value.toString())); } else if (avroType.equals(Schema.Type.ARRAY)) { listWriter.writeList(type.asGroupType(), nonNullAvroSchema, value); } else if (avroType.equals(Schema.Type.MAP)) { writeMap(type.asGroupType(), nonNullAvroSchema, (Map<CharSequence, ?>) value); } else if (avroType.equals(Schema.Type.UNION)) { writeUnion(type.asGroupType(), nonNullAvroSchema, value); } else if (avroType.equals(Schema.Type.FIXED)) { recordConsumer.addBinary(Binary.fromReusedByteArray(((GenericFixed) value).bytes())); } }
Example 12
Source File: RandomValues.java From parquet-mr with Apache License 2.0 | 4 votes |
public Binary asReusedBinary(byte[] data) { int length = Math.min(data.length, bufferLength); System.arraycopy(data, 0, buffer, 0, length); return Binary.fromReusedByteArray(data, 0, length); }
Example 13
Source File: ParquetStringInspector.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override public Object set(final Object o, final Text text) { return new BinaryWritable(text == null ? null : Binary.fromReusedByteArray(text.getBytes ())); }