org.apache.parquet.io.ParquetEncodingException Java Examples
The following examples show how to use
org.apache.parquet.io.ParquetEncodingException.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ParquetWriteProtocol.java From parquet-mr with Apache License 2.0 | 6 votes |
@Override public void writeFieldBegin(TField field) throws TException { if (field.type == TType.STOP) { return; } try { currentType = thriftFieldIdToParquetField[field.id]; if (currentType == null) { throw new ParquetEncodingException("field " + field.id + " was not found in " + thriftType + " and " + schema.getType()); } final int index = currentType.getIndex(); recordConsumer.startField(currentType.getName(), index); currentProtocol = children[index]; } catch (ArrayIndexOutOfBoundsException e) { throw new ParquetEncodingException("field " + field.id + " was not found in " + thriftType + " and " + schema.getType()); } }
Example #2
Source File: ColumnWriterBase.java From parquet-mr with Apache License 2.0 | 6 votes |
/** * Finalizes the Column chunk. Possibly adding extra pages if needed (dictionary, ...) * Is called right after writePage */ void finalizeColumnChunk() { final DictionaryPage dictionaryPage = dataColumn.toDictPageAndClose(); if (dictionaryPage != null) { if (DEBUG) LOG.debug("write dictionary"); try { pageWriter.writeDictionaryPage(dictionaryPage); } catch (IOException e) { throw new ParquetEncodingException("could not write dictionary page for " + path, e); } dataColumn.resetDictionary(); } if (bloomFilterWriter != null && bloomFilter != null) { bloomFilterWriter.writeBloomFilter(bloomFilter); } }
Example #3
Source File: ColumnWriterBase.java From parquet-mr with Apache License 2.0 | 6 votes |
/** * Writes the current data to a new page in the page store */ void writePage() { if (valueCount == 0) { throw new ParquetEncodingException("writing empty page"); } this.rowsWrittenSoFar += pageRowCount; if (DEBUG) LOG.debug("write page"); try { writePage(pageRowCount, valueCount, statistics, repetitionLevelColumn, definitionLevelColumn, dataColumn); } catch (IOException e) { throw new ParquetEncodingException("could not write page for " + path, e); } repetitionLevelColumn.reset(); definitionLevelColumn.reset(); dataColumn.reset(); valueCount = 0; resetStatistics(); pageRowCount = 0; }
Example #4
Source File: ParquetFileWriter.java From parquet-mr with Apache License 2.0 | 6 votes |
static ParquetMetadata mergeFooters(Path root, List<Footer> footers) { String rootPath = root.toUri().getPath(); GlobalMetaData fileMetaData = null; List<BlockMetaData> blocks = new ArrayList<BlockMetaData>(); for (Footer footer : footers) { String footerPath = footer.getFile().toUri().getPath(); if (!footerPath.startsWith(rootPath)) { throw new ParquetEncodingException(footerPath + " invalid: all the files must be contained in the root " + root); } footerPath = footerPath.substring(rootPath.length()); while (footerPath.startsWith("/")) { footerPath = footerPath.substring(1); } fileMetaData = mergeInto(footer.getParquetMetadata().getFileMetaData(), fileMetaData); for (BlockMetaData block : footer.getParquetMetadata().getBlocks()) { block.setPath(footerPath); blocks.add(block); } } return new ParquetMetadata(fileMetaData.merge(), blocks); }
Example #5
Source File: ByteStreamSplitValuesWriter.java From parquet-mr with Apache License 2.0 | 6 votes |
public ByteStreamSplitValuesWriter(int elementSizeInBytes, int initialCapacity, int pageSize, ByteBufferAllocator allocator) { if (elementSizeInBytes <= 0) { throw new ParquetEncodingException(String.format("Element byte size is invalid: %d", elementSizeInBytes)); } this.numStreams = elementSizeInBytes; this.elementSizeInBytes = elementSizeInBytes; this.byteStreams = new CapacityByteArrayOutputStream[elementSizeInBytes]; // Round-up the capacity hint. final int capacityPerStream = (pageSize + this.numStreams - 1) / this.numStreams; final int initialCapacityPerStream = (initialCapacity + this.numStreams - 1) / this.numStreams; for (int i = 0; i < this.numStreams; ++i) { this.byteStreams[i] = new CapacityByteArrayOutputStream( initialCapacityPerStream, capacityPerStream, allocator); } }
Example #6
Source File: ParquetColumnChunkPageWriteStore.java From Bats with Apache License 2.0 | 5 votes |
@Override public void writePage(BytesInput bytes, int valueCount, Statistics statistics, Encoding rlEncoding, Encoding dlEncoding, Encoding valuesEncoding) throws IOException { long uncompressedSize = bytes.size(); // Parquet library creates bad metadata if the uncompressed or compressed size of a page exceeds Integer.MAX_VALUE if (uncompressedSize > Integer.MAX_VALUE) { throw new ParquetEncodingException( "Cannot write page larger than Integer.MAX_VALUE bytes: " + uncompressedSize); } BytesInput compressedBytes = compressor.compress(bytes); long compressedSize = compressedBytes.size(); if (compressedSize > Integer.MAX_VALUE) { throw new ParquetEncodingException( "Cannot write compressed page larger than Integer.MAX_VALUE bytes: " + compressedSize); } parquetMetadataConverter.writeDataPageHeader( (int)uncompressedSize, (int)compressedSize, valueCount, statistics, rlEncoding, dlEncoding, valuesEncoding, buf); this.uncompressedLength += uncompressedSize; this.compressedLength += compressedSize; this.totalValueCount += valueCount; this.pageCount += 1; this.totalStatistics.mergeStatistics(statistics); compressedBytes.writeAllTo(buf); rlEncodings.add(rlEncoding); dlEncodings.add(dlEncoding); dataEncodings.add(valuesEncoding); }
Example #7
Source File: PlainValuesWriter.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public BytesInput getBytes() { try { out.flush(); } catch (IOException e) { throw new ParquetEncodingException("could not write page", e); } if (LOG.isDebugEnabled()) LOG.debug("writing a buffer of size {}", arrayOut.size()); return BytesInput.from(arrayOut); }
Example #8
Source File: FixedLenByteArrayPlainValuesWriter.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public final void writeBytes(Binary v) { if (v.length() != length) { throw new IllegalArgumentException("Fixed Binary size " + v.length() + " does not match field type length " + length); } try { v.writeTo(out); } catch (IOException e) { throw new ParquetEncodingException("could not write fixed bytes", e); } }
Example #9
Source File: FixedLenByteArrayPlainValuesWriter.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public BytesInput getBytes() { try { out.flush(); } catch (IOException e) { throw new ParquetEncodingException("could not write page", e); } LOG.debug("writing a buffer of size {}", arrayOut.size()); return BytesInput.from(arrayOut); }
Example #10
Source File: ColumnWriterV2.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public BytesInput getBytes() { try { return encoder.toBytes(); } catch (IOException e) { throw new ParquetEncodingException(e); } }
Example #11
Source File: Binary.java From parquet-mr with Apache License 2.0 | 5 votes |
private static ByteBuffer encodeUTF8(CharSequence value) { try { return ENCODER.get().encode(CharBuffer.wrap(value)); } catch (CharacterCodingException e) { throw new ParquetEncodingException("UTF-8 not supported.", e); } }
Example #12
Source File: MemPageWriter.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void writePage(BytesInput bytesInput, int valueCount, Statistics statistics, Encoding rlEncoding, Encoding dlEncoding, Encoding valuesEncoding) throws IOException { if (valueCount == 0) { throw new ParquetEncodingException("illegal page of 0 values"); } memSize += bytesInput.size(); pages.add(new DataPageV1(BytesInput.copy(bytesInput), valueCount, (int)bytesInput.size(), statistics, rlEncoding, dlEncoding, valuesEncoding)); totalValueCount += valueCount; LOG.debug("page written for {} bytes and {} records", bytesInput.size(), valueCount); }
Example #13
Source File: MemPageWriter.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void writePageV2(int rowCount, int nullCount, int valueCount, BytesInput repetitionLevels, BytesInput definitionLevels, Encoding dataEncoding, BytesInput data, Statistics<?> statistics) throws IOException { if (valueCount == 0) { throw new ParquetEncodingException("illegal page of 0 values"); } long size = repetitionLevels.size() + definitionLevels.size() + data.size(); memSize += size; pages.add(DataPageV2.uncompressed(rowCount, nullCount, valueCount, copy(repetitionLevels), copy(definitionLevels), dataEncoding, copy(data), statistics)); totalValueCount += valueCount; LOG.debug("page written for {} bytes and {} records", size, valueCount); }
Example #14
Source File: MemPageWriter.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException { if (this.dictionaryPage != null) { throw new ParquetEncodingException("Only one dictionary page per block"); } this.memSize += dictionaryPage.getBytes().size(); this.dictionaryPage = dictionaryPage.copy(); LOG.debug("dictionary page written for {} bytes and {} records", dictionaryPage.getBytes().size(), dictionaryPage.getDictionarySize()); }
Example #15
Source File: ColumnChunkPageWriteStore.java From parquet-mr with Apache License 2.0 | 5 votes |
private int toIntWithCheck(long size) { if (size > Integer.MAX_VALUE) { throw new ParquetEncodingException( "Cannot write page larger than " + Integer.MAX_VALUE + " bytes: " + size); } return (int)size; }
Example #16
Source File: ColumnChunkPageWriteStore.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException { if (this.dictionaryPage != null) { throw new ParquetEncodingException("Only one dictionary page is allowed"); } BytesInput dictionaryBytes = dictionaryPage.getBytes(); int uncompressedSize = (int)dictionaryBytes.size(); BytesInput compressedBytes = compressor.compress(dictionaryBytes); this.dictionaryPage = new DictionaryPage(BytesInput.copy(compressedBytes), uncompressedSize, dictionaryPage.getDictionarySize(), dictionaryPage.getEncoding()); }
Example #17
Source File: ScroogeWriteSupport.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void write(T record) { try { record.write(parquetWriteProtocol); } catch (TException e) { throw new ParquetEncodingException(e); } }
Example #18
Source File: ThriftBytesWriteSupport.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void write(BytesWritable record) { try { readToWrite.readOne(protocol(record), parquetWriteProtocol); } catch (TException e) { throw new ParquetEncodingException(e); } }
Example #19
Source File: TBaseWriteSupport.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void write(T record) { try { record.write(parquetWriteProtocol); } catch (TException e) { throw new ParquetEncodingException(e); } }
Example #20
Source File: ParquetThriftStorer.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * {@inheritDoc} */ @Override public void putNext(Tuple tuple) throws IOException { try { this.recordWriter.write(null, tuple); } catch (InterruptedException e) { throw new ParquetEncodingException("Interrupted while writing", e); } }
Example #21
Source File: ParquetWriteProtocol.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void writeI32(int i32) throws TException { start(); EnumValue value = type.getEnumValueById(i32); if (value == null) { throw new ParquetEncodingException("Can not find enum value of index " + i32 + " for field:" + columnIO.toString()); } recordConsumer.addBinary(Binary.fromString(value.getName())); end(); }
Example #22
Source File: DataWritableWriter.java From parquet-mr with Apache License 2.0 | 5 votes |
private void writeData(final ArrayWritable arr, final GroupType type) { if (arr == null) { return; } final int fieldCount = type.getFieldCount(); Writable[] values = arr.get(); for (int field = 0; field < fieldCount; ++field) { final Type fieldType = type.getType(field); final String fieldName = fieldType.getName(); final Writable value = values[field]; if (value == null) { continue; } recordConsumer.startField(fieldName, field); if (fieldType.isPrimitive()) { writePrimitive(value); } else { recordConsumer.startGroup(); if (value instanceof ArrayWritable) { if (fieldType.asGroupType().getRepetition().equals(Type.Repetition.REPEATED)) { writeArray((ArrayWritable) value, fieldType.asGroupType()); } else { writeData((ArrayWritable) value, fieldType.asGroupType()); } } else if (value != null) { throw new ParquetEncodingException("This should be an ArrayWritable or MapWritable: " + value); } recordConsumer.endGroup(); } recordConsumer.endField(fieldName, field); } }
Example #23
Source File: ParquetStorer.java From parquet-mr with Apache License 2.0 | 5 votes |
private Schema getSchema() { try { final String schemaString = getProperties().getProperty(SCHEMA); if (schemaString == null) { throw new ParquetEncodingException("Can not store relation in Parquet as the schema is unknown"); } return Utils.getSchemaFromString(schemaString); } catch (ParserException e) { throw new ParquetEncodingException("can not get schema from context", e); } }
Example #24
Source File: ParquetStorer.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * {@inheritDoc} */ @Override public void putNext(Tuple tuple) throws IOException { try { this.recordWriter.write(null, tuple); } catch (InterruptedException e) { Thread.interrupted(); throw new ParquetEncodingException("Interrupted while writing", e); } }
Example #25
Source File: PlainValuesWriter.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public final void writeDouble(double v) { try { out.writeDouble(v); } catch (IOException e) { throw new ParquetEncodingException("could not write double", e); } }
Example #26
Source File: ParquetColumnChunkPageWriteStore.java From Bats with Apache License 2.0 | 5 votes |
private int toIntWithCheck(long size) { if (size > Integer.MAX_VALUE) { throw new ParquetEncodingException( "Cannot write page larger than " + Integer.MAX_VALUE + " bytes: " + size); } return (int)size; }
Example #27
Source File: ParquetColumnChunkPageWriteStore.java From Bats with Apache License 2.0 | 5 votes |
@Override public void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException { if (this.dictionaryPage != null) { throw new ParquetEncodingException("Only one dictionary page is allowed"); } BytesInput dictionaryBytes = dictionaryPage.getBytes(); int uncompressedSize = (int)dictionaryBytes.size(); BytesInput compressedBytes = compressor.compress(dictionaryBytes); this.dictionaryPage = new DictionaryPage(BytesInput.copy(compressedBytes), uncompressedSize, dictionaryPage.getDictionarySize(), dictionaryPage.getEncoding()); }
Example #28
Source File: ParquetFilePOJOReaderTest.java From attic-apex-malhar with Apache License 2.0 | 5 votes |
@Override public void write(Object record) { recordConsumer.startMessage(); for (int i = 0; i < cols.size(); ++i) { String val = keyMethodMap.get(i).get(record).toString(); recordConsumer.startField(cols.get(i).getPath()[0], i); switch (cols.get(i).getType()) { case BOOLEAN: recordConsumer.addBoolean(Boolean.parseBoolean(val)); break; case FLOAT: recordConsumer.addFloat(Float.parseFloat(val)); break; case DOUBLE: recordConsumer.addDouble(Double.parseDouble(val)); break; case INT32: recordConsumer.addInteger(Integer.parseInt(val)); break; case INT64: recordConsumer.addLong(Long.parseLong(val)); break; case BINARY: recordConsumer.addBinary(stringToBinary(val)); break; default: throw new ParquetEncodingException("Unsupported column type: " + cols.get(i).getType()); } recordConsumer.endField(cols.get(i).getPath()[0], i); } recordConsumer.endMessage(); }
Example #29
Source File: DictionaryValuesWriter.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public BytesInput getBytes() { int maxDicId = getDictionarySize() - 1; LOG.debug("max dic id {}", maxDicId); int bitWidth = BytesUtils.getWidthFromMaxInt(maxDicId); int initialSlabSize = CapacityByteArrayOutputStream.initialSlabSizeHeuristic(MIN_INITIAL_SLAB_SIZE, maxDictionaryByteSize, 10); RunLengthBitPackingHybridEncoder encoder = new RunLengthBitPackingHybridEncoder(bitWidth, initialSlabSize, maxDictionaryByteSize, this.allocator); encoders.add(encoder); IntIterator iterator = encodedValues.iterator(); try { while (iterator.hasNext()) { encoder.writeInt(iterator.next()); } // encodes the bit width byte[] bytesHeader = new byte[] { (byte) bitWidth }; BytesInput rleEncodedBytes = encoder.toBytes(); LOG.debug("rle encoded bytes {}", rleEncodedBytes.size()); BytesInput bytes = concat(BytesInput.from(bytesHeader), rleEncodedBytes); // remember size of dictionary when we last wrote a page lastUsedDictionarySize = getDictionarySize(); lastUsedDictionaryByteSize = dictionaryByteSize; return bytes; } catch (IOException e) { throw new ParquetEncodingException("could not encode the values", e); } }
Example #30
Source File: DeltaLengthByteArrayValuesWriter.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void writeBytes(Binary v) { try { lengthWriter.writeInteger(v.length()); v.writeTo(out); } catch (IOException e) { throw new ParquetEncodingException("could not write bytes", e); } }