com.google.common.base.Utf8 Java Examples

The following examples show how to use com.google.common.base.Utf8. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Utf8UtilsTest.java    From hivemq-community-edition with Apache License 2.0 6 votes vote down vote up
@Test
public void test_is_well_formed_utf_8() {

    for (int b = 0xA0; b <= 0xBF; b++) {
        for (int b2 = 0; b2 < 0xFF; b2++) {

            final byte[] bytes = new byte[]{'a', 'b', 'c', (byte) 0xED, (byte) b, (byte) b2, 'd', 'e', 'f'};

            final ByteBuf buf = Unpooled.buffer();

            buf.writeShort(bytes.length);
            buf.writeBytes(bytes);

            //checking both original guava method for byte-arrays and the ByteBuf implementation
            assertFalse(Utf8Utils.isWellFormed(buf, bytes.length));
            assertFalse(Utf8.isWellFormed(bytes));

            buf.release();
        }
    }
}
 
Example #2
Source File: FDBRecordContext.java    From fdb-record-layer with Apache License 2.0 6 votes vote down vote up
@Nullable
private static String getSanitizedId(@Nonnull String id) {
    try {
        if (Utf8.encodedLength(id) > MAX_TR_ID_SIZE) {
            if (CharMatcher.ascii().matchesAllOf(id)) {
                // Most of the time, the string will be of ascii characters, so return a truncated ID based on length
                return id.substring(0, MAX_TR_ID_SIZE - 3) + "...";
            } else {
                // In theory, we could try and split the UTF-16 string and find a string that fits, but that
                // is fraught with peril, not the least of which because one might accidentally split a low
                // surrogate/high surrogate pair.
                return null;
            }
        } else {
            return id;
        }
    } catch (IllegalArgumentException e) {
        return null;
    }
}
 
Example #3
Source File: MoreStringUtil.java    From j360-dubbo-app-all with Apache License 2.0 5 votes vote down vote up
/**
 * 计算字符串被UTF8编码后的字节数 via guava
 * 
 * @see Utf8#encodedLength(CharSequence)
 */
public static int utf8EncodedLength(@Nullable CharSequence sequence) {
	if (StringUtils.isEmpty(sequence)) {
		return 0;
	}
	return Utf8.encodedLength(sequence);
}
 
Example #4
Source File: SqlUtil.java    From calcite with Apache License 2.0 5 votes vote down vote up
/**
 * Validate if value can be decoded by given charset.
 *
 * @param value nls string in byte array
 * @param charset charset
 * @throws RuntimeException If the given value cannot be represented in the
 *     given charset
 */
public static void validateCharset(ByteString value, Charset charset) {
  if (charset == StandardCharsets.UTF_8) {
    final byte[] bytes = value.getBytes();
    if (!Utf8.isWellFormed(bytes)) {
      //CHECKSTYLE: IGNORE 1
      final String string = new String(bytes, charset);
      throw RESOURCE.charsetEncoding(string, charset.name()).ex();
    }
  }
}
 
Example #5
Source File: UltimateFancy.java    From RedProtect with GNU General Public License v3.0 5 votes vote down vote up
private JSONObject parseHoverItem(ItemStack item) {
    JSONObject obj = new JSONObject();
    obj.put("action", "show_item");
    String jItem = convertItemStackToJson(item);
    if (Utf8.encodedLength(jItem) > 32767)
        obj.put("value", convertItemStackToJson(new ItemStack(item.getType())));

    obj.put("value", jItem);
    return obj;
}
 
Example #6
Source File: UltimateFancy.java    From RedProtect with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Item to show on chat message under this text.
 *
 * @param item {@link ItemStack}
 * @return instance of same {@link UltimateFancy}.
 */
public UltimateFancy hoverShowItem(ItemStack item) {
    JSONObject jItem = parseHoverItem(item);
    if (Utf8.encodedLength(jItem.toJSONString()) > 32767)
        pendentElements.add(new ExtraElement("hoverEvent", parseHoverItem(new ItemStack(item.getType()))));

    pendentElements.add(new ExtraElement("hoverEvent", jItem));
    return this;
}
 
Example #7
Source File: DocSizeProcessorTest.java    From sawmill with Apache License 2.0 5 votes vote down vote up
@Test
public void differentLangTest(){
    String s = "こんにちは世界!";
    Doc doc = createDoc("testField", s);
    Map<String, Object> origMap = new HashMap<>(doc.getSource());

    DocSizeProcessor sizeProcessor = createProcessor(DocSizeProcessor.class);
    ProcessResult processResult = sizeProcessor.process(doc);

    assertThat(processResult.isSucceeded()).isTrue();
    assertThat(doc.hasField("docSize")).isTrue();
    assertThat((int) doc.getField("docSize")).isEqualTo(Utf8.encodedLength(JsonUtils.toJsonString(origMap)));
}
 
Example #8
Source File: DocSizeProcessorTest.java    From sawmill with Apache License 2.0 5 votes vote down vote up
@Test
public void sanity(){
    String s = "Hello, World!";
    Doc doc = createDoc("testField", s);
    Map<String, Object> origMap = new HashMap<>(doc.getSource());

    DocSizeProcessor sizeProcessor = createProcessor(DocSizeProcessor.class);
    ProcessResult processResult = sizeProcessor.process(doc);

    assertThat(processResult.isSucceeded()).isTrue();
    assertThat(doc.hasField("docSize")).isTrue();
    assertThat((int) doc.getField("docSize")).isEqualTo(Utf8.encodedLength(JsonUtils.toJsonString(origMap)));
}
 
Example #9
Source File: UltimateFancy.java    From UltimateChat with GNU General Public License v3.0 5 votes vote down vote up
private JSONObject parseHoverItem(ItemStack item) {
    JSONObject obj = new JSONObject();
    obj.put("action", "show_item");
    String jItem = convertItemStackToJson(item);
    if (Utf8.encodedLength(jItem) > 32767)
        obj.put("value", convertItemStackToJson(new ItemStack(item.getType())));

    obj.put("value", jItem);
    return obj;
}
 
Example #10
Source File: UltimateFancy.java    From UltimateChat with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Item to show on chat message under this text.
 *
 * @param item {@link ItemStack}
 * @return instance of same {@link UltimateFancy}.
 */
public UltimateFancy hoverShowItem(ItemStack item) {
    JSONObject jItem = parseHoverItem(item);
    if (Utf8.encodedLength(jItem.toJSONString()) > 32767)
        pendentElements.add(new ExtraElement("hoverEvent", parseHoverItem(new ItemStack(item.getType()))));

    pendentElements.add(new ExtraElement("hoverEvent", jItem));
    return this;
}
 
Example #11
Source File: Utils.java    From ChatUI with MIT License 5 votes vote down vote up
public static void sendMessageSplitLarge(PlayerContext ctx, Text text) {
    String json = TextSerializers.JSON.serialize(text);
    int size = Utf8.encodedLength(json);
    if (size > 32767) {
        List<Text> lines = ctx.utils().splitLines(text, ctx.width);
        ctx.getPlayer().sendMessages(lines);
    } else {
        ctx.getPlayer().sendMessage(text);
    }
}
 
Example #12
Source File: SqlUtil.java    From Bats with Apache License 2.0 5 votes vote down vote up
/**
 * Validate if value can be decoded by given charset.
 *
 * @param value nls string in byte array
 * @param charset charset
 * @throws RuntimeException If the given value cannot be represented in the
 *     given charset
 */
public static void validateCharset(ByteString value, Charset charset) {
  if (charset == StandardCharsets.UTF_8) {
    final byte[] bytes = value.getBytes();
    if (!Utf8.isWellFormed(bytes)) {
      //CHECKSTYLE: IGNORE 1
      final String string = new String(bytes, charset);
      throw RESOURCE.charsetEncoding(string, charset.name()).ex();
    }
  }
}
 
Example #13
Source File: MoreStringUtil.java    From vjtools with Apache License 2.0 5 votes vote down vote up
/**
 * 计算字符串被UTF8编码后的字节数 via guava
 * 
 * @see Utf8#encodedLength(CharSequence)
 */
public static int utf8EncodedLength(@Nullable CharSequence sequence) {
	if (StringUtils.isEmpty(sequence)) {
		return 0;
	}
	return Utf8.encodedLength(sequence);
}
 
Example #14
Source File: MoreStringUtil.java    From vjtools with Apache License 2.0 5 votes vote down vote up
/**
 * 计算字符串被UTF8编码后的字节数 via guava
 * 
 * @see Utf8#encodedLength(CharSequence)
 */
public static int utf8EncodedLength(@Nullable CharSequence sequence) {
	if (StringUtils.isEmpty(sequence)) {
		return 0;
	}
	return Utf8.encodedLength(sequence);
}
 
Example #15
Source File: Utf8Utils.java    From hivemq-community-edition with Apache License 2.0 5 votes vote down vote up
/**
 * Checks whether the given UTF-8 encoded byte array contains characters a UTF-8 encoded String must not according
 * to the MQTT 5 specification.
 * <p>
 * These characters are the null character U+0000 and UTF-16 surrogates.
 *
 * @param binary the UTF-8 encoded byte array.
 * @return whether the binary data contains characters a UTF-8 encoded String must not.
 */
public static boolean containsMustNotCharacters(@NotNull final byte[] binary) {
    if (!Utf8.isWellFormed(binary)) {
        return true;
    }
    for (final byte b : binary) {
        if (b == 0) {
            return true;
        }
    }
    return false;
}
 
Example #16
Source File: AbstractMqttPublishDecoder.java    From hivemq-community-edition with Apache License 2.0 5 votes vote down vote up
/**
 * Decodes and optionally validates a publish payload
 * <p>
 * Results in {@link Mqtt5DisconnectReasonCode#PAYLOAD_FORMAT_INVALID} with DISCONNECT by:
 * <p>
 * - payloadFormatIndicator == UTF-8 AND validatePayloadFormat = true AND payload is not UTF-8 well formed.
 *
 * @param channel                the channel of the mqtt client
 * @param buf                    the encoded ByteBuf of the message
 * @param payloadLength          the length of the payload
 * @param payloadFormatIndicator the nullable {@link Mqtt5PayloadFormatIndicator}
 * @param validatePayloadFormat  the configured boolean for payload validation (default false)
 * @return the payload as a byte[] or {@code null} if this method disconnected.
 */
protected @Nullable byte[] decodePayload(final @NotNull Channel channel,
                                         final @NotNull ByteBuf buf,
                                         final int payloadLength,
                                         final @Nullable Mqtt5PayloadFormatIndicator payloadFormatIndicator,
                                         final boolean validatePayloadFormat) {

    final byte[] payload;
    if (payloadLength > 0) {
        payload = new byte[payloadLength];
        buf.readBytes(payload);

        if (Mqtt5PayloadFormatIndicator.UTF_8 == payloadFormatIndicator) {
            if (validatePayloadFormat) {
                if (!Utf8.isWellFormed(payload)) {
                    disconnector.disconnect(channel,
                            "A client (IP: {}) sent a PUBLISH with an invalid UTF-8 payload. This is not allowed. Disconnecting client.",
                            "Sent a PUBLISH with an invalid UTF-8 payload",
                            Mqtt5DisconnectReasonCode.PAYLOAD_FORMAT_INVALID,
                            ReasonStrings.DISCONNECT_PAYLOAD_FORMAT_INVALID_PUBLISH);
                    return null;
                }
            }
        }
    } else {
        payload = emptyPayload;
    }
    return payload;
}
 
Example #17
Source File: SqlUtil.java    From Quicksql with MIT License 5 votes vote down vote up
/**
 * Validate if value can be decoded by given charset.
 *
 * @param value nls string in byte array
 * @param charset charset
 * @throws RuntimeException If the given value cannot be represented in the
 *     given charset
 */
public static void validateCharset(ByteString value, Charset charset) {
  if (charset == StandardCharsets.UTF_8) {
    final byte[] bytes = value.getBytes();
    if (!Utf8.isWellFormed(bytes)) {
      //CHECKSTYLE: IGNORE 1
      final String string = new String(bytes, charset);
      throw RESOURCE.charsetEncoding(string, charset.name()).ex();
    }
  }
}
 
Example #18
Source File: AsnCharStringOerSerializer.java    From quilt with Apache License 2.0 4 votes vote down vote up
@Override
public void read(
    final AsnObjectSerializationContext context,
    final AsnCharStringBasedObjectCodec instance,
    final InputStream inputStream
) throws IOException {

  Objects.requireNonNull(context);
  Objects.requireNonNull(instance);
  Objects.requireNonNull(inputStream);

  // WARNING: This length can be maliciously specified by the packet creator, so be careful not to use it for unsafe
  // operations, such as creating a new array of initial size `length`. This usage is safe because it merely caps the
  // InputStream to the specified packet-length, whereas the InputStream is authoritative for when it actually ends,
  // and this limit may be well smaller than `length`.
  int lengthToRead;
  final AsnSizeConstraint sizeConstraint = instance.getSizeConstraint();
  if (sizeConstraint.isFixedSize()) {
    lengthToRead = sizeConstraint.getMax();
  } else {
    // Read the lengthToRead of the encoded OctetString...
    lengthToRead = OerLengthSerializer.readLength(inputStream);
  }

  final String result;
  /* beware the 0-lengthToRead string */
  if (lengthToRead == 0) {
    result = "";
  } else {
    // Use a limited input stream so we don't read too many bytes.
    final InputStream limitedInputStream = ByteStreams.limit(inputStream, lengthToRead);
    // WARNING: Don't close the InputStreamReader so that the underlying inputStream is not closed.
    result = CharStreams.toString(new InputStreamReader(limitedInputStream, instance.getCharacterSet().name()));

    // For UTF-8 characters, result.length() will report the viewable length (e.g., 3) but for certain encoded
    // characters, the actual byte-length will be larger (e.g., the String 元元元 is 3 viewable bytes, but 9 encoded
    // UTF-8 bytes). Thus, when we write the length-prefix, the code will write 9, so when we read, we need to
    // validate that 9 bytes were read, and not 3 (in this example).
    if (Utf8.encodedLength(result) != lengthToRead) {
      throw new IOException(
          format("Unable to properly decode %s bytes (could only read %s bytes)", lengthToRead, result.length())
      );
    }
  }

  instance.setCharString(result);
}
 
Example #19
Source File: DocSizeProcessor.java    From sawmill with Apache License 2.0 4 votes vote down vote up
@Override
public ProcessResult process(Doc doc) {
    String sourceAsJsonString = JsonUtils.toJsonString(doc.getSource());
    doc.addField(targetField, Utf8.encodedLength(sourceAsJsonString));
    return ProcessResult.success();
}
 
Example #20
Source File: XodusUtils.java    From hivemq-community-edition with Apache License 2.0 4 votes vote down vote up
public static int shortLengthStringSize(@Nullable final String string) {
    return Short.BYTES + ((string == null) ? 0 : Utf8.encodedLength(string));
}