Java Code Examples for io.airlift.slice.Slice#slice()
The following examples show how to use
io.airlift.slice.Slice#slice() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RE2.java From hive-third-functions with Apache License 2.0 | 6 votes |
/** * Returns an array of {@link Slice}s holding the text of the leftmost match * of the regular expression in {@code s} and the matches, if any, of * its subexpressions, as defined by the <a * href='#submatch'>Submatch</a> description above. * * <p>A return value of null indicates no match. */ // This is visible for testing. Slice[] findSubmatch(Slice s) { int[] a = doExecute(com.github.aaronshan.functions.regexp.re2j.MachineInput.fromUTF8(s), 0, Anchor.UNANCHORED, prog.numCap); if (a == null) { return null; } Slice[] ret = new Slice[1 + numSubexp]; for (int i = 0; i < ret.length; i++) { if (2 * i < a.length && a[2 * i] >= 0) { int begin = a[2 * i]; int end = a[2 * i + 1]; ret[i] = s.slice(begin, end - begin); } } return ret; }
Example 2
Source File: PartitionTransforms.java From presto with Apache License 2.0 | 6 votes |
private static Block truncateVarbinary(Block block, int max) { BlockBuilder builder = VARBINARY.createBlockBuilder(null, block.getPositionCount()); for (int position = 0; position < block.getPositionCount(); position++) { if (block.isNull(position)) { builder.appendNull(); continue; } Slice value = VARBINARY.getSlice(block, position); if (value.length() > max) { value = value.slice(0, max); } VARBINARY.writeSlice(builder, value); } return builder.build(); }
Example 3
Source File: JoniRegexpFunctions.java From presto with Apache License 2.0 | 6 votes |
@SqlNullable @Description("Returns regex group of extracted string with a pattern") @ScalarFunction @LiteralParameters("x") @SqlType("varchar(x)") public static Slice regexpExtract(@SqlType("varchar(x)") Slice source, @SqlType(JoniRegexpType.NAME) JoniRegexp pattern, @SqlType(StandardTypes.BIGINT) long groupIndex) { Matcher matcher = pattern.matcher(source.getBytes()); validateGroup(groupIndex, matcher.getEagerRegion()); int group = toIntExact(groupIndex); int offset = matcher.search(0, source.length(), Option.DEFAULT); if (offset == -1) { return null; } Region region = matcher.getEagerRegion(); int beg = region.beg[group]; int end = region.end[group]; if (beg == -1) { // end == -1 must be true return null; } Slice slice = source.slice(beg, end - beg); return slice; }
Example 4
Source File: JoniRegexpFunctions.java From presto with Apache License 2.0 | 6 votes |
@ScalarFunction @LiteralParameters("x") @Description("Returns array of strings split by pattern") @SqlType("array(varchar(x))") public static Block regexpSplit(@SqlType("varchar(x)") Slice source, @SqlType(JoniRegexpType.NAME) JoniRegexp pattern) { Matcher matcher = pattern.matcher(source.getBytes()); BlockBuilder blockBuilder = VARCHAR.createBlockBuilder(null, 32); int lastEnd = 0; int nextStart = 0; while (true) { int offset = matcher.search(nextStart, source.length(), Option.DEFAULT); if (offset == -1) { break; } nextStart = getNextStart(source, matcher); Slice slice = source.slice(lastEnd, matcher.getBegin() - lastEnd); lastEnd = matcher.getEnd(); VARCHAR.writeSlice(blockBuilder, slice); } VARCHAR.writeSlice(blockBuilder, source.slice(lastEnd, source.length() - lastEnd)); return blockBuilder.build(); }
Example 5
Source File: Re2JRegexp.java From presto with Apache License 2.0 | 5 votes |
public Block split(Slice source) { Matcher matcher = re2jPattern.matcher(source); BlockBuilder blockBuilder = VARCHAR.createBlockBuilder(null, 32); int lastEnd = 0; while (matcher.find()) { Slice slice = source.slice(lastEnd, matcher.start() - lastEnd); lastEnd = matcher.end(); VARCHAR.writeSlice(blockBuilder, slice); } VARCHAR.writeSlice(blockBuilder, source.slice(lastEnd, source.length() - lastEnd)); return blockBuilder.build(); }
Example 6
Source File: Re2JRegexp.java From hive-third-functions with Apache License 2.0 | 5 votes |
public List<Object> split(Slice source) { Matcher matcher = re2jPattern.matcher(source); List<Object> list = Lists.newArrayList(); int lastEnd = 0; while (matcher.find()) { Slice slice = source.slice(lastEnd, matcher.start() - lastEnd); lastEnd = matcher.end(); list.add(slice.toString()); } list.add(source.slice(lastEnd, source.length() - lastEnd).toString()); return list; }
Example 7
Source File: PartitionTransforms.java From presto with Apache License 2.0 | 5 votes |
private static Slice truncateVarchar(Slice value, int max) { if (value.length() <= max) { return value; } int end = offsetOfCodePoint(value, 0, max); if (end < 0) { return value; } return value.slice(0, end); }
Example 8
Source File: Varchars.java From presto with Apache License 2.0 | 5 votes |
public static Slice truncateToLength(Slice slice, int maxLength) { requireNonNull(slice, "slice is null"); if (maxLength < 0) { throw new IllegalArgumentException("Max length must be greater or equal than zero"); } if (maxLength == 0) { return Slices.EMPTY_SLICE; } return slice.slice(0, byteCount(slice, 0, slice.length(), maxLength)); }
Example 9
Source File: ShardStats.java From presto with Apache License 2.0 | 5 votes |
public static Slice truncateIndexValue(Slice slice) { if (slice.length() > MAX_BINARY_INDEX_SIZE) { return slice.slice(0, MAX_BINARY_INDEX_SIZE); } return slice; }
Example 10
Source File: TestLikeFunctions.java From presto with Apache License 2.0 | 5 votes |
private static Slice offsetHeapSlice(String value) { Slice source = Slices.utf8Slice(value); Slice result = Slices.allocate(source.length() + 5); result.setBytes(2, source); return result.slice(2, source.length()); }
Example 11
Source File: JoniRegexpFunctions.java From presto with Apache License 2.0 | 5 votes |
@Description("Group(s) extracted using the given pattern") @ScalarFunction @LiteralParameters("x") @SqlType("array(varchar(x))") public static Block regexpExtractAll(@SqlType("varchar(x)") Slice source, @SqlType(JoniRegexpType.NAME) JoniRegexp pattern, @SqlType(StandardTypes.BIGINT) long groupIndex) { Matcher matcher = pattern.matcher(source.getBytes()); validateGroup(groupIndex, matcher.getEagerRegion()); BlockBuilder blockBuilder = VARCHAR.createBlockBuilder(null, 32); int group = toIntExact(groupIndex); int nextStart = 0; while (true) { int offset = matcher.search(nextStart, source.length(), Option.DEFAULT); if (offset == -1) { break; } nextStart = getNextStart(source, matcher); Region region = matcher.getEagerRegion(); int beg = region.beg[group]; int end = region.end[group]; if (beg == -1 || end == -1) { blockBuilder.appendNull(); } else { Slice slice = source.slice(beg, end - beg); VARCHAR.writeSlice(blockBuilder, slice); } } return blockBuilder.build(); }
Example 12
Source File: OrcDataSourceUtils.java From presto with Apache License 2.0 | 5 votes |
/** * Get a slice for the disk range from the provided buffers. The buffers ranges do not have * to exactly match {@code diskRange}, but {@code diskRange} must be completely contained within * one of the buffer ranges. */ public static Slice getDiskRangeSlice(DiskRange diskRange, Map<DiskRange, Slice> buffers) { for (Entry<DiskRange, Slice> bufferEntry : buffers.entrySet()) { DiskRange bufferRange = bufferEntry.getKey(); Slice buffer = bufferEntry.getValue(); if (bufferRange.contains(diskRange)) { int offset = toIntExact(diskRange.getOffset() - bufferRange.getOffset()); return buffer.slice(offset, diskRange.getLength()); } } throw new IllegalStateException("No matching buffer for disk range"); }
Example 13
Source File: CompressedOrcChunkLoader.java From presto with Apache License 2.0 | 5 votes |
@Override public Slice nextChunk() throws IOException { // 3 byte header // NOTE: this must match BLOCK_HEADER_SIZE ensureCompressedBytesAvailable(3); lastCheckpoint = createInputStreamCheckpoint(getCurrentCompressedOffset(), nextUncompressedOffset); int b0 = compressedBufferStream.readUnsignedByte(); int b1 = compressedBufferStream.readUnsignedByte(); int b2 = compressedBufferStream.readUnsignedByte(); boolean isUncompressed = (b0 & 0x01) == 1; int chunkLength = (b2 << 15) | (b1 << 7) | (b0 >>> 1); ensureCompressedBytesAvailable(chunkLength); Slice chunk = compressedBufferStream.readSlice(chunkLength); if (!isUncompressed) { int uncompressedSize = decompressor.decompress( chunk.byteArray(), chunk.byteArrayOffset(), chunk.length(), createOutputBuffer()); chunk = Slices.wrappedBuffer(decompressorOutputBuffer, 0, uncompressedSize); } if (nextUncompressedOffset != 0) { chunk = chunk.slice(nextUncompressedOffset, chunk.length() - nextUncompressedOffset); nextUncompressedOffset = 0; // if we positioned to the end of the chunk, read the next one if (chunk.length() == 0) { chunk = nextChunk(); } } return chunk; }
Example 14
Source File: BlackHolePageSourceProvider.java From presto with Apache License 2.0 | 4 votes |
private Block createZeroBlock(Type type, int rowsCount, Slice constantSlice) { checkArgument(isSupportedType(type), "Unsupported type [%s]", type); Slice slice; // do not exceed varchar limit if (isVarcharType(type) && !((VarcharType) type).isUnbounded()) { slice = constantSlice.slice(0, Math.min(((VarcharType) type).getBoundedLength(), constantSlice.length())); } else if (isLongDecimal(type)) { slice = encodeScaledValue(ZERO); } else { slice = constantSlice; } BlockBuilder builder; if (type instanceof FixedWidthType) { builder = type.createBlockBuilder(null, rowsCount); } else { builder = type.createBlockBuilder(null, rowsCount, slice.length()); } for (int i = 0; i < rowsCount; i++) { Class<?> javaType = type.getJavaType(); if (javaType == boolean.class) { type.writeBoolean(builder, false); } else if (javaType == long.class) { type.writeLong(builder, 0); } else if (javaType == double.class) { type.writeDouble(builder, 0.0); } else if (javaType == Slice.class) { requireNonNull(slice, "slice is null"); type.writeSlice(builder, slice, 0, slice.length()); } else { throw new UnsupportedOperationException("Unknown javaType: " + javaType.getName()); } } return builder.build(); }
Example 15
Source File: VarbinaryFunctions.java From presto with Apache License 2.0 | 4 votes |
private static Slice pad(Slice inputSlice, long targetLength, Slice padSlice, int paddingOffset) { checkCondition( 0 <= targetLength && targetLength <= Integer.MAX_VALUE, INVALID_FUNCTION_ARGUMENT, "Target length must be in the range [0.." + Integer.MAX_VALUE + "]"); checkCondition(padSlice.length() > 0, INVALID_FUNCTION_ARGUMENT, "Padding bytes must not be empty"); int inputLength = inputSlice.length(); int resultLength = (int) targetLength; // if our target length is the same as our string then return our string if (inputLength == resultLength) { return inputSlice; } // if our string is bigger than requested then truncate if (inputLength > resultLength) { return inputSlice.slice(0, resultLength); } // preallocate the result Slice buffer = Slices.allocate(resultLength); // fill in the existing string int fillLength = resultLength - inputLength; int startPointOfExistingText = (paddingOffset + fillLength) % resultLength; buffer.setBytes(startPointOfExistingText, inputSlice); // assign the pad string while there's enough space for it int byteIndex = paddingOffset; for (int i = 0; i < fillLength / padSlice.length(); i++) { buffer.setBytes(byteIndex, padSlice); byteIndex += padSlice.length(); } // handle the tail: at most we assign padStringLength - 1 code points buffer.setBytes(byteIndex, padSlice.getBytes(0, paddingOffset + fillLength - byteIndex)); return buffer; }
Example 16
Source File: SplitToMapFunction.java From presto with Apache License 2.0 | 4 votes |
@SqlType("map(varchar,varchar)") public Block splitToMap(@TypeParameter("map(varchar,varchar)") Type mapType, @SqlType(StandardTypes.VARCHAR) Slice string, @SqlType(StandardTypes.VARCHAR) Slice entryDelimiter, @SqlType(StandardTypes.VARCHAR) Slice keyValueDelimiter) { checkCondition(entryDelimiter.length() > 0, INVALID_FUNCTION_ARGUMENT, "entryDelimiter is empty"); checkCondition(keyValueDelimiter.length() > 0, INVALID_FUNCTION_ARGUMENT, "keyValueDelimiter is empty"); checkCondition(!entryDelimiter.equals(keyValueDelimiter), INVALID_FUNCTION_ARGUMENT, "entryDelimiter and keyValueDelimiter must not be the same"); Map<Slice, Slice> map = new HashMap<>(); int entryStart = 0; while (entryStart < string.length()) { // Extract key-value pair based on current index // then add the pair if it can be split by keyValueDelimiter Slice keyValuePair; int entryEnd = string.indexOf(entryDelimiter, entryStart); if (entryEnd >= 0) { keyValuePair = string.slice(entryStart, entryEnd - entryStart); } else { // The rest of the string is the last possible pair. keyValuePair = string.slice(entryStart, string.length() - entryStart); } int keyEnd = keyValuePair.indexOf(keyValueDelimiter); if (keyEnd >= 0) { int valueStart = keyEnd + keyValueDelimiter.length(); Slice key = keyValuePair.slice(0, keyEnd); Slice value = keyValuePair.slice(valueStart, keyValuePair.length() - valueStart); if (value.indexOf(keyValueDelimiter) >= 0) { throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Key-value delimiter must appear exactly once in each entry. Bad input: '" + keyValuePair.toStringUtf8() + "'"); } if (map.containsKey(key)) { throw new PrestoException(INVALID_FUNCTION_ARGUMENT, format("Duplicate keys (%s) are not allowed", key.toStringUtf8())); } map.put(key, value); } else { throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Key-value delimiter must appear exactly once in each entry. Bad input: '" + keyValuePair.toStringUtf8() + "'"); } if (entryEnd < 0) { // No more pairs to add break; } // Next possible pair is placed next to the current entryDelimiter entryStart = entryEnd + entryDelimiter.length(); } if (pageBuilder.isFull()) { pageBuilder.reset(); } BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(0); BlockBuilder singleMapBlockBuilder = blockBuilder.beginBlockEntry(); for (Map.Entry<Slice, Slice> entry : map.entrySet()) { VARCHAR.writeSlice(singleMapBlockBuilder, entry.getKey()); VARCHAR.writeSlice(singleMapBlockBuilder, entry.getValue()); } blockBuilder.closeEntry(); pageBuilder.declarePosition(); return (Block) mapType.getObject(blockBuilder, blockBuilder.getPositionCount() - 1); }
Example 17
Source File: Chars.java From presto with Apache License 2.0 | 4 votes |
public static Slice trimTrailingSpaces(Slice slice) { requireNonNull(slice, "slice is null"); return slice.slice(0, byteCountWithoutTrailingSpace(slice, 0, slice.length())); }
Example 18
Source File: OrcReader.java From spliceengine with GNU Affero General Public License v3.0 | 4 votes |
public OrcReader(OrcDataSource orcDataSource, MetadataReader metadataReader, DataSize maxMergeDistance, DataSize maxReadSize) throws IOException { orcDataSource = wrapWithCacheIfTiny(requireNonNull(orcDataSource, "orcDataSource is null"), maxMergeDistance); this.orcDataSource = orcDataSource; this.metadataReader = requireNonNull(metadataReader, "metadataReader is null"); this.maxMergeDistance = requireNonNull(maxMergeDistance, "maxMergeDistance is null"); this.maxReadSize = requireNonNull(maxReadSize, "maxReadSize is null"); // // Read the file tail: // // variable: Footer // variable: Metadata // variable: PostScript - contains length of footer and metadata // 3 bytes: file magic "ORC" // 1 byte: postScriptSize = PostScript + Magic // figure out the size of the file using the option or filesystem long size = orcDataSource.getSize(); if (size <= 0) { throw new OrcCorruptionException("Malformed ORC file %s. Invalid file size %s", orcDataSource, size); } // Read the tail of the file byte[] buffer = new byte[toIntExact(min(size, EXPECTED_FOOTER_SIZE))]; orcDataSource.readFully(size - buffer.length, buffer); // get length of PostScript - last byte of the file int postScriptSize = buffer[buffer.length - SIZE_OF_BYTE] & 0xff; // make sure this is an ORC file and not an RCFile or something else verifyOrcFooter(orcDataSource, postScriptSize, buffer); // decode the post script int postScriptOffset = buffer.length - SIZE_OF_BYTE - postScriptSize; PostScript postScript = metadataReader.readPostScript(buffer, postScriptOffset, postScriptSize); // verify this is a supported version checkOrcVersion(orcDataSource, postScript.getVersion()); // check compression codec is supported this.compressionKind = postScript.getCompression(); this.hiveWriterVersion = postScript.getHiveWriterVersion(); this.bufferSize = toIntExact(postScript.getCompressionBlockSize()); int footerSize = toIntExact(postScript.getFooterLength()); int metadataSize = toIntExact(postScript.getMetadataLength()); // check if extra bytes need to be read Slice completeFooterSlice; int completeFooterSize = footerSize + metadataSize + postScriptSize + SIZE_OF_BYTE; if (completeFooterSize > buffer.length) { // allocate a new buffer large enough for the complete footer byte[] newBuffer = new byte[completeFooterSize]; completeFooterSlice = Slices.wrappedBuffer(newBuffer); // initial read was not large enough, so read missing section orcDataSource.readFully(size - completeFooterSize, newBuffer, 0, completeFooterSize - buffer.length); // copy already read bytes into the new buffer completeFooterSlice.setBytes(completeFooterSize - buffer.length, buffer); } else { // footer is already in the bytes in buffer, just adjust position, length completeFooterSlice = Slices.wrappedBuffer(buffer, buffer.length - completeFooterSize, completeFooterSize); } // read metadata Slice metadataSlice = completeFooterSlice.slice(0, metadataSize); try (InputStream metadataInputStream = new OrcInputStream(orcDataSource.toString(), metadataSlice.getInput(), compressionKind, bufferSize, new AggregatedMemoryContext())) { this.metadata = metadataReader.readMetadata(hiveWriterVersion, metadataInputStream); } // read footer Slice footerSlice = completeFooterSlice.slice(metadataSize, footerSize); try (InputStream footerInputStream = new OrcInputStream(orcDataSource.toString(), footerSlice.getInput(), compressionKind, bufferSize, new AggregatedMemoryContext())) { this.footer = metadataReader.readFooter(hiveWriterVersion, footerInputStream); } }
Example 19
Source File: JoniRegexpReplaceLambdaFunction.java From presto with Apache License 2.0 | 4 votes |
@LiteralParameters("x") @SqlType("varchar") @SqlNullable public Slice regexpReplace( @SqlType("varchar") Slice source, @SqlType(JoniRegexpType.NAME) JoniRegexp pattern, @SqlType("function(array(varchar), varchar(x))") UnaryFunctionInterface replaceFunction) { // If there is no match we can simply return the original source without doing copy. Matcher matcher = pattern.matcher(source.getBytes()); if (matcher.search(0, source.length(), Option.DEFAULT) == -1) { return source; } SliceOutput output = new DynamicSliceOutput(source.length()); // Prepare a BlockBuilder that will be used to create the target block // that will be passed to the lambda function. if (pageBuilder.isFull()) { pageBuilder.reset(); } BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(0); int groupCount = pattern.regex().numberOfCaptures(); int appendPosition = 0; int nextStart; do { // nextStart is the same as the last appendPosition, unless the last match was zero-width. if (matcher.getEnd() == matcher.getBegin()) { if (matcher.getBegin() < source.length()) { nextStart = matcher.getEnd() + lengthOfCodePointFromStartByte(source.getByte(matcher.getBegin())); } else { // last match is empty and we matched end of source, move past the source length to terminate the loop nextStart = matcher.getEnd() + 1; } } else { nextStart = matcher.getEnd(); } // Append the un-matched part Slice unmatched = source.slice(appendPosition, matcher.getBegin() - appendPosition); appendPosition = matcher.getEnd(); output.appendBytes(unmatched); // Append the capturing groups to the target block that will be passed to lambda Region matchedRegion = matcher.getEagerRegion(); for (int i = 1; i <= groupCount; i++) { // Add to the block builder if the matched region is not null. In Joni null is represented as [-1, -1] if (matchedRegion.beg[i] >= 0 && matchedRegion.end[i] >= 0) { VARCHAR.writeSlice(blockBuilder, source, matchedRegion.beg[i], matchedRegion.end[i] - matchedRegion.beg[i]); } else { blockBuilder.appendNull(); } } pageBuilder.declarePositions(groupCount); Block target = blockBuilder.getRegion(blockBuilder.getPositionCount() - groupCount, groupCount); // Call the lambda function to replace the block, and append the result to output Slice replaced = (Slice) replaceFunction.apply(target); if (replaced == null) { // replacing a substring with null (unknown) makes the entire string null return null; } output.appendBytes(replaced); } while (matcher.search(nextStart, source.length(), Option.DEFAULT) != -1); // Append the last un-matched part output.writeBytes(source, appendPosition, source.length() - appendPosition); return output.slice(); }
Example 20
Source File: RE2.java From hive-third-functions with Apache License 2.0 | 3 votes |
/** * Returns a {@link Slice} holding the text of the leftmost match in * {@code s} of this regular expression. * * <p>If there is no match, the return value is an empty {@link Slice}, but it * will also be empty if the regular expression successfully matches * an empty {@link Slice}. Use {@link #findIndex} or * {@link #findSubmatch} if it is necessary to distinguish these * cases. */ // This is visible for testing. Slice find(Slice s) { int[] a = doExecute(com.github.aaronshan.functions.regexp.re2j.MachineInput.fromUTF8(s), 0, Anchor.UNANCHORED, 2); if (a == null) { return Slices.EMPTY_SLICE; } return s.slice(a[0], a[1] - a[0]); }