io.airlift.slice.Slice#toStringUtf8

Source File: AccumuloRowSerializer.java From presto with Apache License 2.0

6 votes

/**
 * Recursive helper function used by {@link AccumuloRowSerializer#getArrayFromBlock} and
 * {@link AccumuloRowSerializer#getMapFromBlock} to decode the Block into a Java type.
 *
 * @param type Presto type
 * @param block Block to decode
 * @param position Position in the block to get
 * @return Java object from the Block
 */
static Object readObject(Type type, Block block, int position)
{
    if (Types.isArrayType(type)) {
        Type elementType = Types.getElementType(type);
        return getArrayFromBlock(elementType, block.getObject(position, Block.class));
    }
    else if (Types.isMapType(type)) {
        return getMapFromBlock(type, block.getObject(position, Block.class));
    }
    else {
        if (type.getJavaType() == Slice.class) {
            Slice slice = (Slice) TypeUtils.readNativeValue(type, block, position);
            return type.equals(VarcharType.VARCHAR) ? slice.toStringUtf8() : slice.getBytes();
        }

        return TypeUtils.readNativeValue(type, block, position);
    }
}

Source File: Re2JRegexp.java From hive-third-functions with Apache License 2.0

6 votes

public Re2JRegexp(int dfaStatesLimit, int dfaRetries, Slice pattern) {
    this.dfaStatesLimit = dfaStatesLimit;
    this.dfaRetries = dfaRetries;

    Options options = Options.builder()
            .setAlgorithm(DFA_FALLBACK_TO_NFA)
            .setMaximumNumberOfDFAStates(dfaStatesLimit)
            .setNumberOfDFARetries(dfaRetries)
            .setEventsListener(new RE2JEventsListener())
            .build();

    String patternString = pattern.toStringUtf8();
    re2jPattern = Pattern.compile(patternString, options);

    // Remove .*? prefix. DFA has optimization which does fast lookup for first byte of a potential match.
    // When pattern is prefixed with .*? this optimization doesn't work in Pattern.find() function.
    java.util.regex.Matcher dotStarPrefixMatcher = DOT_STAR_PREFIX_PATTERN.matcher(patternString);
    checkState(dotStarPrefixMatcher.matches());
    String patternStringWithoutDotStartPrefix = dotStarPrefixMatcher.group(CORE_PATTERN_INDEX);

    if (!patternStringWithoutDotStartPrefix.equals(patternString)) {
        re2jPatternWithoutDotStartPrefix = Pattern.compile(patternStringWithoutDotStartPrefix, options);
    } else {
        re2jPatternWithoutDotStartPrefix = re2jPattern;
    }
}

Source File: UuidOperators.java From presto with Apache License 2.0

6 votes

@LiteralParameters("x")
@ScalarOperator(CAST)
@SqlType(StandardTypes.UUID)
public static Slice castFromVarcharToUuid(@SqlType("varchar(x)") Slice slice)
{
    try {
        java.util.UUID uuid = java.util.UUID.fromString(slice.toStringUtf8());
        if (slice.length() == 36) {
            return wrappedLongArray(uuid.getMostSignificantBits(), uuid.getLeastSignificantBits());
        }
        throw new PrestoException(INVALID_CAST_ARGUMENT, "Invalid UUID string length: " + slice.length());
    }
    catch (IllegalArgumentException e) {
        throw new PrestoException(INVALID_CAST_ARGUMENT, "Cannot cast value to UUID: " + slice.toStringUtf8());
    }
}

Source File: VarcharToTimestampWithTimeZoneCast.java From presto with Apache License 2.0

6 votes

@LiteralParameters({"x", "p"})
@SqlType("timestamp(p) with time zone")
public static long castToShort(@LiteralParameter("p") long precision, ConnectorSession session, @SqlType("varchar(x)") Slice value)
{
    try {
        return toShort((int) precision, trim(value).toStringUtf8(), timezone -> {
            if (timezone == null) {
                return session.getTimeZoneKey().getZoneId();
            }
            return ZoneId.of(timezone);
        });
    }
    catch (IllegalArgumentException e) {
        throw new PrestoException(INVALID_CAST_ARGUMENT, "Value cannot be cast to timestamp: " + value.toStringUtf8(), e);
    }
}

Source File: MatchQueryFunction.java From presto-connectors with Apache License 2.0

5 votes

@ScalarFunction("match_query")
@Description("es match_query")
@SqlType(StandardTypes.VARCHAR)
@SqlNullable
public static Slice matchQuery(
        @SqlType(StandardTypes.VARCHAR) Slice filter)
{
    if (filter == null) {
        return null;
    }
    String filterStr = filter.toStringUtf8();

    QueryBuilder builder = QueryBuilders.matchQuery(MATCH_COLUMN_SEP, filterStr);
    return Slices.utf8Slice(builder.toString());
}

Source File: TestVarBinaryMinAggregation.java From presto with Apache License 2.0

5 votes

@Override
protected Object getExpectedValue(int start, int length)
{
    if (length == 0) {
        return null;
    }
    Slice min = null;
    for (int i = start; i < start + length; i++) {
        Slice slice = Slices.wrappedBuffer(Ints.toByteArray(i));
        min = (min == null) ? slice : Ordering.natural().min(min, slice);
    }
    return min.toStringUtf8();
}

Source File: TimeOperators.java From presto with Apache License 2.0

5 votes

@ScalarOperator(CAST)
@LiteralParameters("x")
@SqlType(StandardTypes.TIME)
public static long castFromSlice(ConnectorSession session, @SqlType("varchar(x)") Slice value)
{
    try {
        if (session.isLegacyTimestamp()) {
            return DateTimeUtils.parseLegacyTime(session.getTimeZoneKey(), value.toStringUtf8());
        }
        return parseTimeWithoutTimeZone(value.toStringUtf8());
    }
    catch (IllegalArgumentException e) {
        throw new PrestoException(INVALID_CAST_ARGUMENT, "Value cannot be cast to time: " + value.toStringUtf8(), e);
    }
}

Source File: IpAddressOperators.java From presto with Apache License 2.0

5 votes

@LiteralParameters("x")
@ScalarOperator(CAST)
@SqlType(StandardTypes.IPADDRESS)
public static Slice castFromVarcharToIpAddress(@SqlType("varchar(x)") Slice slice)
{
    byte[] address;
    try {
        address = InetAddresses.forString(slice.toStringUtf8()).getAddress();
    }
    catch (IllegalArgumentException e) {
        throw new PrestoException(INVALID_CAST_ARGUMENT, "Cannot cast value to IPADDRESS: " + slice.toStringUtf8());
    }

    byte[] bytes;
    if (address.length == 4) {
        bytes = new byte[16];
        bytes[10] = (byte) 0xff;
        bytes[11] = (byte) 0xff;
        arraycopy(address, 0, bytes, 12, 4);
    }
    else if (address.length == 16) {
        bytes = address;
    }
    else {
        throw new PrestoException(GENERIC_INTERNAL_ERROR, "Invalid InetAddress length: " + address.length);
    }

    return wrappedBuffer(bytes);
}

Source File: JsonUtil.java From presto with Apache License 2.0

5 votes

public static String truncateIfNecessaryForErrorMessage(Slice json)
{
    if (json.length() <= MAX_JSON_LENGTH_IN_ERROR_MESSAGE) {
        return json.toStringUtf8();
    }
    else {
        return json.slice(0, MAX_JSON_LENGTH_IN_ERROR_MESSAGE).toStringUtf8() + "...(truncated)";
    }
}

Source File: Re2JRegexp.java From presto with Apache License 2.0

5 votes

public Re2JRegexp(int dfaStatesLimit, int dfaRetries, Slice pattern)
{
    this.dfaStatesLimit = dfaStatesLimit;
    this.dfaRetries = dfaRetries;

    Options options = Options.builder()
            .setAlgorithm(DFA_FALLBACK_TO_NFA)
            .setMaximumNumberOfDFAStates(dfaStatesLimit)
            .setNumberOfDFARetries(dfaRetries)
            .setEventsListener(new RE2JEventsListener())
            .build();

    String patternString = pattern.toStringUtf8();
    re2jPattern = Pattern.compile(patternString, options);

    // Remove .*? prefix. DFA has optimization which does fast lookup for first byte of a potential match.
    // When pattern is prefixed with .*? this optimization doesn't work in Pattern.find() function.
    java.util.regex.Matcher dotStarPrefixMatcher = DOT_STAR_PREFIX_PATTERN.matcher(patternString);
    checkState(dotStarPrefixMatcher.matches());
    String patternStringWithoutDotStartPrefix = dotStarPrefixMatcher.group(CORE_PATTERN_INDEX);

    if (!patternStringWithoutDotStartPrefix.equals(patternString)) {
        re2jPatternWithoutDotStartPrefix = Pattern.compile(patternStringWithoutDotStartPrefix, options);
    }
    else {
        re2jPatternWithoutDotStartPrefix = re2jPattern;
    }
}

Source File: WordStemFunction.java From presto with Apache License 2.0

5 votes

@Description("Returns the stem of a word in the given language")
@ScalarFunction
@LiteralParameters("x")
@SqlType("varchar(x)")
public static Slice wordStem(@SqlType("varchar(x)") Slice slice, @SqlType("varchar(2)") Slice language)
{
    Supplier<SnowballProgram> stemmer = STEMMERS.get(language);
    if (stemmer == null) {
        throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Unknown stemmer language: " + language.toStringUtf8());
    }
    return wordStem(slice, stemmer.get());
}

Source File: TestVarBinaryMaxAggregation.java From presto with Apache License 2.0

5 votes

@Override
protected Object getExpectedValue(int start, int length)
{
    if (length == 0) {
        return null;
    }
    Slice max = null;
    for (int i = start; i < start + length; i++) {
        Slice slice = Slices.wrappedBuffer(Ints.toByteArray(i));
        max = (max == null) ? slice : Ordering.natural().max(max, slice);
    }
    return max.toStringUtf8();
}

Source File: StringFunctions.java From presto with Apache License 2.0

5 votes

private static int safeCountCodePoints(Slice slice)
{
    int codePoints = 0;
    for (int position = 0; position < slice.length(); ) {
        int codePoint = tryGetCodePointAt(slice, position);
        if (codePoint < 0) {
            throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Invalid UTF-8 encoding in characters: " + slice.toStringUtf8());
        }
        position += lengthOfCodePoint(codePoint);
        codePoints++;
    }
    return codePoints;
}

Source File: MatchQueryFunction.java From presto-connectors with Apache License 2.0

5 votes

@ScalarFunction("match_query")
@Description("es match_query")
@SqlType(StandardTypes.VARCHAR)
@SqlNullable
public static Slice matchQuery(
        @SqlType(StandardTypes.VARCHAR) Slice filter)
{
    if (filter == null) {
        return null;
    }
    String filterStr = filter.toStringUtf8();

    QueryBuilder builder = QueryBuilders.matchQuery(MATCH_COLUMN_SEP, filterStr);
    return Slices.utf8Slice(builder.toString());
}

Source File: GeoFunctions.java From presto with Apache License 2.0

5 votes

private static OGCGeometry geometryFromText(Slice input)
{
    OGCGeometry geometry;
    try {
        geometry = OGCGeometry.fromText(input.toStringUtf8());
    }
    catch (IllegalArgumentException e) {
        throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Invalid WKT: " + input.toStringUtf8(), e);
    }
    geometry.setSpatialReference(null);
    return geometry;
}

Source File: MatchQueryFunction.java From presto-connectors with Apache License 2.0

5 votes

@ScalarFunction("match_phrase")
@Description("es match_phrase")
@SqlType(StandardTypes.VARCHAR)
@SqlNullable
public static Slice matchPhrase(
        @SqlType(StandardTypes.VARCHAR) Slice filter)
{
    if (filter == null) {
        return null;
    }
    String filterStr = filter.toStringUtf8();

    QueryBuilder builder = QueryBuilders.matchPhraseQuery(MATCH_COLUMN_SEP, filterStr);
    return Slices.utf8Slice(builder.toString());
}

Source File: SplitToMapFunction.java From presto with Apache License 2.0

4 votes

@SqlType("map(varchar,varchar)")
public Block splitToMap(@TypeParameter("map(varchar,varchar)") Type mapType, @SqlType(StandardTypes.VARCHAR) Slice string, @SqlType(StandardTypes.VARCHAR) Slice entryDelimiter, @SqlType(StandardTypes.VARCHAR) Slice keyValueDelimiter)
{
    checkCondition(entryDelimiter.length() > 0, INVALID_FUNCTION_ARGUMENT, "entryDelimiter is empty");
    checkCondition(keyValueDelimiter.length() > 0, INVALID_FUNCTION_ARGUMENT, "keyValueDelimiter is empty");
    checkCondition(!entryDelimiter.equals(keyValueDelimiter), INVALID_FUNCTION_ARGUMENT, "entryDelimiter and keyValueDelimiter must not be the same");

    Map<Slice, Slice> map = new HashMap<>();
    int entryStart = 0;
    while (entryStart < string.length()) {
        // Extract key-value pair based on current index
        // then add the pair if it can be split by keyValueDelimiter
        Slice keyValuePair;
        int entryEnd = string.indexOf(entryDelimiter, entryStart);
        if (entryEnd >= 0) {
            keyValuePair = string.slice(entryStart, entryEnd - entryStart);
        }
        else {
            // The rest of the string is the last possible pair.
            keyValuePair = string.slice(entryStart, string.length() - entryStart);
        }

        int keyEnd = keyValuePair.indexOf(keyValueDelimiter);
        if (keyEnd >= 0) {
            int valueStart = keyEnd + keyValueDelimiter.length();
            Slice key = keyValuePair.slice(0, keyEnd);
            Slice value = keyValuePair.slice(valueStart, keyValuePair.length() - valueStart);

            if (value.indexOf(keyValueDelimiter) >= 0) {
                throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Key-value delimiter must appear exactly once in each entry. Bad input: '" + keyValuePair.toStringUtf8() + "'");
            }
            if (map.containsKey(key)) {
                throw new PrestoException(INVALID_FUNCTION_ARGUMENT, format("Duplicate keys (%s) are not allowed", key.toStringUtf8()));
            }

            map.put(key, value);
        }
        else {
            throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Key-value delimiter must appear exactly once in each entry. Bad input: '" + keyValuePair.toStringUtf8() + "'");
        }

        if (entryEnd < 0) {
            // No more pairs to add
            break;
        }
        // Next possible pair is placed next to the current entryDelimiter
        entryStart = entryEnd + entryDelimiter.length();
    }

    if (pageBuilder.isFull()) {
        pageBuilder.reset();
    }
    BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(0);
    BlockBuilder singleMapBlockBuilder = blockBuilder.beginBlockEntry();
    for (Map.Entry<Slice, Slice> entry : map.entrySet()) {
        VARCHAR.writeSlice(singleMapBlockBuilder, entry.getKey());
        VARCHAR.writeSlice(singleMapBlockBuilder, entry.getValue());
    }
    blockBuilder.closeEntry();
    pageBuilder.declarePosition();

    return (Block) mapType.getObject(blockBuilder, blockBuilder.getPositionCount() - 1);
}

Source File: JoniRegexpFunctions.java From presto with Apache License 2.0

4 votes

private static void appendReplacement(SliceOutput result, Slice source, Regex pattern, Region region, Slice replacement)
{
    // Handle the following items:
    // 1. ${name};
    // 2. $0, $1, $123 (group 123, if exists; or group 12, if exists; or group 1);
    // 3. \\, \$, \t (literal 't').
    // 4. Anything that doesn't starts with \ or $ is considered regular bytes

    int idx = 0;

    while (idx < replacement.length()) {
        byte nextByte = replacement.getByte(idx);
        if (nextByte == '$') {
            idx++;
            if (idx == replacement.length()) { // not using checkArgument because `.toStringUtf8` is expensive
                throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Illegal replacement sequence: " + replacement.toStringUtf8());
            }
            nextByte = replacement.getByte(idx);
            int backref;
            if (nextByte == '{') { // case 1 in the above comment
                idx++;
                int startCursor = idx;
                while (idx < replacement.length()) {
                    nextByte = replacement.getByte(idx);
                    if (nextByte == '}') {
                        break;
                    }
                    idx++;
                }
                byte[] groupName = replacement.getBytes(startCursor, idx - startCursor);
                try {
                    backref = pattern.nameToBackrefNumber(groupName, 0, groupName.length, region);
                }
                catch (ValueException e) {
                    throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Illegal replacement sequence: unknown group { " + new String(groupName, StandardCharsets.UTF_8) + " }");
                }
                idx++;
            }
            else { // case 2 in the above comment
                backref = nextByte - '0';
                if (backref < 0 || backref > 9) { // not using checkArgument because `.toStringUtf8` is expensive
                    throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Illegal replacement sequence: " + replacement.toStringUtf8());
                }
                if (region.numRegs <= backref) {
                    throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Illegal replacement sequence: unknown group " + backref);
                }
                idx++;
                while (idx < replacement.length()) { // Adaptive group number: find largest group num that is not greater than actual number of groups
                    int nextDigit = replacement.getByte(idx) - '0';
                    if (nextDigit < 0 || nextDigit > 9) {
                        break;
                    }
                    int newBackref = (backref * 10) + nextDigit;
                    if (region.numRegs <= newBackref) {
                        break;
                    }
                    backref = newBackref;
                    idx++;
                }
            }
            int beg = region.beg[backref];
            int end = region.end[backref];
            if (beg != -1 && end != -1) { // the specific group doesn't exist in the current match, skip
                result.appendBytes(source.slice(beg, end - beg));
            }
        }
        else { // case 3 and 4 in the above comment
            if (nextByte == '\\') {
                idx++;
                if (idx == replacement.length()) { // not using checkArgument because `.toStringUtf8` is expensive
                    throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Illegal replacement sequence: " + replacement.toStringUtf8());
                }
                nextByte = replacement.getByte(idx);
            }
            result.appendByte(nextByte);
            idx++;
        }
    }
}

Source File: TestJsonExtract.java From presto with Apache License 2.0

4 votes

private static String doJsonExtract(String inputJson, String jsonPath)
{
    Slice value = JsonExtract.extract(Slices.utf8Slice(inputJson), generateExtractor(jsonPath, new JsonValueJsonExtractor()));
    return (value == null) ? null : value.toStringUtf8();
}

Source File: SliceUtils.java From hive-third-functions with Apache License 2.0

4 votes

static void appendReplacement(SliceOutput so, Slice replacement, Matcher matcher) {
    int idx = 0;

    // Handle the following items:
    // 1. ${name};
    // 2. $0, $1, $123 (group 123, if exists; or group 12, if exists; or group 1);
    // 3. \\, \$, \t (literal 't').
    // 4. Anything that doesn't starts with \ or $ is considered regular bytes
    while (idx < replacement.length()) {
        byte nextByte = replacement.getByte(idx);
        if (nextByte == '$') {
            idx++;
            if (idx == replacement.length()) {
                throw new IllegalArgumentException("Illegal replacement sequence: " + replacement.toStringUtf8());
            }
            nextByte = replacement.getByte(idx);
            int backref;
            if (nextByte == '{') { // case 1 in the above comment
                idx++;
                int startCursor = idx;
                while (idx < replacement.length()) {
                    nextByte = replacement.getByte(idx);
                    if (nextByte == '}') {
                        break;
                    }
                    idx++;
                }
                String groupName = replacement.slice(startCursor, idx - startCursor).toStringUtf8();
                Integer namedGroupIndex = matcher.pattern().re2().namedGroupIndexes.get(groupName);
                if (namedGroupIndex == null) {
                    throw new IndexOutOfBoundsException("Illegal replacement sequence: unknown group " + groupName);
                }
                backref = namedGroupIndex;
                idx++;
            } else { // case 2 in the above comment
                backref = nextByte - '0';
                if (backref < 0 || backref > 9) {
                    throw new IllegalArgumentException("Illegal replacement sequence: " + replacement.toStringUtf8());
                }
                if (matcher.groupCount() < backref) {
                    throw new IndexOutOfBoundsException("Illegal replacement sequence: unknown group " + backref);
                }
                idx++;
                while (idx < replacement.length()) { // Adaptive group number: find largest group num that is not greater than actual number of groups
                    int nextDigit = replacement.getByte(idx) - '0';
                    if (nextDigit < 0 || nextDigit > 9) {
                        break;
                    }
                    int newBackref = (backref * 10) + nextDigit;
                    if (matcher.groupCount() < newBackref) {
                        break;
                    }
                    backref = newBackref;
                    idx++;
                }
            }
            Slice group = matcher.group(backref);
            if (group != null) {
                so.writeBytes(group);
            }
        } else { // case 3 and 4 in the above comment
            if (nextByte == '\\') {
                idx++;
                if (idx == replacement.length()) {
                    throw new IllegalArgumentException("Illegal replacement sequence: " + replacement.toStringUtf8());
                }
                nextByte = replacement.getByte(idx);
            }
            so.appendByte(nextByte);
            idx++;
        }
    }
}

Java Code Examples for io.airlift.slice.Slice#toStringUtf8()