java.nio.charset.StandardCharsets#UTF

Source File: ServerSentEventHttpMessageWriterTests.java From spring-analysis-note with MIT License

6 votes

@Test // SPR-16516, SPR-16539
public void writePojoWithCustomEncoding() {
	Flux<Pojo> source = Flux.just(new Pojo("foo\uD834\uDD1E", "bar\uD834\uDD1E"));
	Charset charset = StandardCharsets.UTF_16LE;
	MediaType mediaType = new MediaType("text", "event-stream", charset);
	testWrite(source, mediaType, outputMessage, Pojo.class);

	assertEquals(mediaType, outputMessage.getHeaders().getContentType());
	StepVerifier.create(outputMessage.getBody())
			.consumeNextWith(dataBuffer -> {
				String value = DataBufferTestUtils.dumpString(dataBuffer, charset);
				DataBufferUtils.release(dataBuffer);
				assertEquals("data:{\"foo\":\"foo\uD834\uDD1E\",\"bar\":\"bar\uD834\uDD1E\"}\n\n", value);
			})
			.expectComplete()
			.verify();
}

Source File: Source.java From openjdk-8 with GNU General Public License v2.0

6 votes

private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) {
        start = 2;
        cs = StandardCharsets.UTF_16LE;
    } else if (bytes.length > 2 && bytes[0] == (byte)0xEF && bytes[1] == (byte)0xBB && bytes[2] == (byte)0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE && bytes[2] == 0 && bytes[3] == 0) {
        start = 4;
        cs = Charset.forName("UTF-32LE");
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte)0xFE && bytes[3] == (byte)0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}

Source File: FileRenameInformation2.java From jcifs with GNU Lesser General Public License v2.1

6 votes

/**
 * {@inheritDoc}
 *
 * @see jcifs.Decodable#decode(byte[], int, int)
 */
@Override
public int decode ( byte[] buffer, int bufferIndex, int len ) throws SMBProtocolDecodingException {
    int start = bufferIndex;
    this.replaceIfExists = buffer[ bufferIndex ] != 0;
    bufferIndex += 8;
    bufferIndex += 8;

    int nameLen = SMBUtil.readInt4(buffer, bufferIndex);
    bufferIndex += 4;
    byte[] nameBytes = new byte[nameLen];
    System.arraycopy(buffer, bufferIndex, nameBytes, 0, nameBytes.length);
    bufferIndex += nameLen;
    this.fileName = new String(nameBytes, StandardCharsets.UTF_16LE);
    return bufferIndex - start;
}

Source File: Source.java From openjdk-8-source with GNU General Public License v2.0

6 votes

private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) {
        start = 2;
        cs = StandardCharsets.UTF_16LE;
    } else if (bytes.length > 2 && bytes[0] == (byte)0xEF && bytes[1] == (byte)0xBB && bytes[2] == (byte)0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE && bytes[2] == 0 && bytes[3] == 0) {
        start = 4;
        cs = Charset.forName("UTF-32LE");
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte)0xFE && bytes[3] == (byte)0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}

Source File: Source.java From hottub with GNU General Public License v2.0

6 votes

private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte) 0xFE && bytes[1] == (byte) 0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte) 0xFF && bytes[1] == (byte) 0xFE) {
        if (bytes.length > 3 && bytes[2] == 0 && bytes[3] == 0) {
            start = 4;
            cs = Charset.forName("UTF-32LE");
        } else {
            start = 2;
            cs = StandardCharsets.UTF_16LE;
        }
    } else if (bytes.length > 2 && bytes[0] == (byte) 0xEF && bytes[1] == (byte) 0xBB && bytes[2] == (byte) 0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte) 0xFE && bytes[3] == (byte) 0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}

Source File: Source.java From openjdk-jdk9 with GNU General Public License v2.0

6 votes

private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte) 0xFE && bytes[1] == (byte) 0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte) 0xFF && bytes[1] == (byte) 0xFE) {
        if (bytes.length > 3 && bytes[2] == 0 && bytes[3] == 0) {
            start = 4;
            cs = Charset.forName("UTF-32LE");
        } else {
            start = 2;
            cs = StandardCharsets.UTF_16LE;
        }
    } else if (bytes.length > 2 && bytes[0] == (byte) 0xEF && bytes[1] == (byte) 0xBB && bytes[2] == (byte) 0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte) 0xFE && bytes[3] == (byte) 0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}

Source File: MappedCharset.java From editorconfig-netbeans with MIT License

6 votes

private void init(String name) {
  switch (name) {
    case "ISO-8859-1":
      charset = StandardCharsets.ISO_8859_1;
      break;
    case "UTF-8":
      charset = StandardCharsets.UTF_8;
      break;
    case "UTF-8-BOM":
      charset = StandardCharsets.UTF_8;
      mark = FILE_MARK;
      break;
    case "UTF-16BE":
      charset = StandardCharsets.UTF_16BE;
      mark = FILE_MARK;
      break;
    case "UTF-16LE":
      charset = StandardCharsets.UTF_16LE;
      mark = FILE_MARK;
      break;
    default:
      charset = StandardCharsets.UTF_8;
      break;
  }
}

Source File: RpcInputStreamTest.java From p4ic4idea with Apache License 2.0

6 votes

@Test
public void testReadUtf16LEWithBomAndWinLineEnding() throws IOException, FileEncoderException {
  File mockFile = tmpDir.newFile("utf_16LE_win_line_ending.txt");
  P4ExtFileUtils.extractResource(this,
          "com/perforce/p4java/common/io/utf_16LE_win_line_ending.txt", mockFile, true);
  mockFileName = mockFile.getAbsolutePath();
  file = new RpcPerforceFile(mockFileName, RpcPerforceFileType.FST_UTF16, ClientLineEnding.FST_L_CRLF);
  rpcInputStream = new RpcInputStream(file, StandardCharsets.UTF_16LE);

  byte[] targetBytes = new byte[1001];
  int read = rpcInputStream.read(targetBytes, 0, 1000);
  assertThat(read, is(5));

  file.setFileType(RpcPerforceFileType.FST_UTF16);
  rpcInputStream = new RpcInputStream(file, null);
  read = rpcInputStream.read(targetBytes, 0, 1000);
  int fileLengthExcludeBom = Files.readAllBytes(file.toPath()).length - 2;
  assertThat(read, is(fileLengthExcludeBom));
}

Source File: Source.java From TencentKona-8 with GNU General Public License v2.0

6 votes

private static char[] byteToCharArray(final byte[] bytes) {
    Charset cs = StandardCharsets.UTF_8;
    int start = 0;
    // BOM detection.
    if (bytes.length > 1 && bytes[0] == (byte) 0xFE && bytes[1] == (byte) 0xFF) {
        start = 2;
        cs = StandardCharsets.UTF_16BE;
    } else if (bytes.length > 1 && bytes[0] == (byte) 0xFF && bytes[1] == (byte) 0xFE) {
        if (bytes.length > 3 && bytes[2] == 0 && bytes[3] == 0) {
            start = 4;
            cs = Charset.forName("UTF-32LE");
        } else {
            start = 2;
            cs = StandardCharsets.UTF_16LE;
        }
    } else if (bytes.length > 2 && bytes[0] == (byte) 0xEF && bytes[1] == (byte) 0xBB && bytes[2] == (byte) 0xBF) {
        start = 3;
        cs = StandardCharsets.UTF_8;
    } else if (bytes.length > 3 && bytes[0] == 0 && bytes[1] == 0 && bytes[2] == (byte) 0xFE && bytes[3] == (byte) 0xFF) {
        start = 4;
        cs = Charset.forName("UTF-32BE");
    }

    return new String(bytes, start, bytes.length - start, cs).toCharArray();
}

Source File: ProgramBuilder.java From ghidra with Apache License 2.0

6 votes

public void createEncodedString(String address, String string, Charset encoding,
		boolean nullTerminate) throws Exception {
	byte[] bytes = string.getBytes(encoding);

	if (encoding == StandardCharsets.US_ASCII || encoding == StandardCharsets.UTF_8) {
		if (nullTerminate) {
			bytes = Arrays.copyOf(bytes, bytes.length + 1);
		}
		setBytes(address, bytes);
		applyDataType(address, new StringDataType(), 1);
	}
	else if (encoding == StandardCharsets.UTF_16BE || encoding == StandardCharsets.UTF_16LE) {
		if (nullTerminate) {
			bytes = Arrays.copyOf(bytes, bytes.length + 2);
			setBytes(address, bytes);
			applyDataType(address, new TerminatedUnicodeDataType(), 1);
		}
		else {
			setBytes(address, bytes);
		}
	}
	else {
		setBytes(address, bytes);
	}
}

Source File: FileRenameInformation2.java From jcifs-ng with GNU Lesser General Public License v2.1

6 votes

/**
 * {@inheritDoc}
 *
 * @see jcifs.Decodable#decode(byte[], int, int)
 */
@Override
public int decode ( byte[] buffer, int bufferIndex, int len ) throws SMBProtocolDecodingException {
    int start = bufferIndex;
    this.replaceIfExists = buffer[ bufferIndex ] != 0;
    bufferIndex += 8;
    bufferIndex += 8;

    int nameLen = SMBUtil.readInt4(buffer, bufferIndex);
    bufferIndex += 4;
    byte[] nameBytes = new byte[nameLen];
    System.arraycopy(buffer, bufferIndex, nameBytes, 0, nameBytes.length);
    bufferIndex += nameLen;
    this.fileName = new String(nameBytes, StandardCharsets.UTF_16LE);
    return bufferIndex - start;
}

Source File: ServerSentEventHttpMessageWriterTests.java From java-technology-stack with MIT License

5 votes

@Test // SPR-16516, SPR-16539
public void writePojoWithCustomEncoding() {
	Flux<Pojo> source = Flux.just(new Pojo("foo\uD834\uDD1E", "bar\uD834\uDD1E"));
	Charset charset = StandardCharsets.UTF_16LE;
	MediaType mediaType = new MediaType("text", "event-stream", charset);
	testWrite(source, mediaType, outputMessage, Pojo.class);

	assertEquals(mediaType, outputMessage.getHeaders().getContentType());
	StepVerifier.create(outputMessage.getBody())
			.consumeNextWith(dataBuffer1 -> {
				String value1 =
						DataBufferTestUtils.dumpString(dataBuffer1, charset);
				DataBufferUtils.release(dataBuffer1);
				assertEquals("data:", value1);
			})
			.consumeNextWith(dataBuffer -> {
				String value = DataBufferTestUtils.dumpString(dataBuffer, charset);
				DataBufferUtils.release(dataBuffer);
				assertEquals("{\"foo\":\"foo\uD834\uDD1E\",\"bar\":\"bar\uD834\uDD1E\"}", value);
			})
			.consumeNextWith(dataBuffer2 -> {
				String value2 =
						DataBufferTestUtils.dumpString(dataBuffer2, charset);
				DataBufferUtils.release(dataBuffer2);
				assertEquals("\n", value2);
			})
			.consumeNextWith(dataBuffer3 -> {
				String value3 =
						DataBufferTestUtils.dumpString(dataBuffer3, charset);
				DataBufferUtils.release(dataBuffer3);
				assertEquals("\n", value3);
			})
			.expectComplete()
			.verify();
}

Source File: UnicodeBom.java From Strata with Apache License 2.0

5 votes

/**
 * Converts a {@code byte[]} to a {@code String}.
 * <p>
 * This ensures that any Unicode byte order marker is used correctly.
 * The default encoding is UTF-8 if no BOM is found.
 * 
 * @param input  the input byte array
 * @return the equivalent string
 */
public static String toString(byte[] input) {
  if (input.length >= 3 && input[0] == X_EF && input[1] == X_BB && input[2] == X_BF) {
    return new String(input, 3, input.length - 3, StandardCharsets.UTF_8);

  } else if (input.length >= 2 && input[0] == X_FE && input[1] == X_FF) {
    return new String(input, 2, input.length - 2, StandardCharsets.UTF_16BE);

  } else if (input.length >= 2 && input[0] == X_FF && input[1] == X_FE) {
    return new String(input, 2, input.length - 2, StandardCharsets.UTF_16LE);

  } else {
    return new String(input, StandardCharsets.UTF_8);
  }
}

Source File: NTLM.java From Bytecoder with Apache License 2.0

5 votes

String readSecurityBuffer(int offset, boolean unicode)
        throws NTLMException {
    byte[] raw = readSecurityBuffer(offset);
    return raw == null ? null : new String(
            raw, unicode ? StandardCharsets.UTF_16LE
                         : StandardCharsets.ISO_8859_1);
}

Source File: RpcInputStreamTest.java From p4ic4idea with Apache License 2.0

5 votes

@Test
public void testReadUtf16LEWithBomAndUnixLineEnding() throws IOException, FileEncoderException {
    mockFileName = loadFileFromClassPath(
            "com/perforce/p4java/common/io/utf-16le_with_bom_unix_line_ending_ko.txt")
                    .getPath();
    file = new RpcPerforceFile(mockFileName, RpcPerforceFileType.FST_UTF16);
    rpcInputStream = new RpcInputStream(file, StandardCharsets.UTF_16LE);

    byte[] targetBytes = new byte[1001];
    int read = rpcInputStream.read(targetBytes, 0, 1000);
    int fileLengthExcludeBom = 343;
    assertThat(read, is(fileLengthExcludeBom));
}

Source File: RpcInputStreamTest.java From p4ic4idea with Apache License 2.0

5 votes

@Test
public void testReadUtf16LEWithBomAndUnixLineEnding() throws IOException, FileEncoderException {
  File mockFile = tmpDir.newFile("utf-16le_with_bom_unix_line_ending_ko.txt");
  P4ExtFileUtils.extractResource(this,
          "com/perforce/p4java/common/io/utf-16le_with_bom_unix_line_ending_ko.txt", mockFile, false);
  mockFileName = mockFile.getAbsolutePath();
  file = new RpcPerforceFile(mockFileName, RpcPerforceFileType.FST_UTF16);
  rpcInputStream = new RpcInputStream(file, StandardCharsets.UTF_16LE);

  byte[] targetBytes = new byte[1001];
  int read = rpcInputStream.read(targetBytes, 0, 1000);
  int fileLengthExcludeBom = 343;
  assertThat(read, is(fileLengthExcludeBom));
}

Source File: SimpleStringSchemaTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testSerializability() throws Exception {
	final SimpleStringSchema schema = new SimpleStringSchema(StandardCharsets.UTF_16LE);
	final SimpleStringSchema copy = CommonTestUtils.createCopySerializable(schema);

	assertEquals(schema.getCharset(), copy.getCharset());
}

Source File: SecureStorageWindowsManager.java From snowflake-jdbc with Apache License 2.0

4 votes

public String getCredential(String host, String user)
{
  PointerByReference pCredential = new PointerByReference();
  String target = SecureStorageManager.convertTarget(host, user);

  try
  {
    boolean ret = false;
    synchronized (advapi32Lib)
    {
      ret =
          advapi32Lib.CredReadW(target, SecureStorageWindowsCredentialType.CRED_TYPE_GENERIC.getType(), 0, pCredential);
    }

    if (!ret)
    {
      logger.info(String.format("Failed to read target or could not find it in Windows Credential Manager. Error code = %d", Native.getLastError()));
      return null;
    }

    logger.debug("Found the token from Windows Credential Manager and now copying it");

    SecureStorageWindowsCredential cred = new SecureStorageWindowsCredential(pCredential.getValue());

    if (SecureStorageWindowsCredentialType.typeOf(cred.Type) != SecureStorageWindowsCredentialType.CRED_TYPE_GENERIC)
    {
      logger.info("Wrong type of credential. Expected: CRED_TYPE_GENERIC");
      return null;
    }

    if (cred.CredentialBlobSize == 0)
    {
      logger.info("Returned credential is empty");
      return null;
    }

    byte[] credBytes = cred.CredentialBlob.getByteArray(0, cred.CredentialBlobSize);
    String res = new String(credBytes, StandardCharsets.UTF_16LE);
    logger.debug("Successfully read the token. Will return it as String now");
    return res;
  }
  finally
  {
    if (pCredential.getValue() != null)
    {
      synchronized (advapi32Lib)
      {
        advapi32Lib.CredFree(pCredential.getValue());
      }
    }
  }
}

Source File: XMLCharsetDeterminator.java From ph-commons with Apache License 2.0

4 votes

/**
 * Determine the XML charset
 *
 * @param aBytes
 *        XML byte representation
 * @return <code>null</code> if no charset was found. In that case you might
 *         wanna try UTF-8 as the fallback.
 */
@Nullable
public static Charset determineXMLCharset (@Nonnull final byte [] aBytes)
{
  ValueEnforcer.notNull (aBytes, "Bytes");

  Charset aParseCharset = null;
  int nSearchOfs = 0;

  if (aBytes.length > 0)
  {
    // Check if a BOM is present
    // Read at maximum 4 bytes (max BOM bytes)
    try (
        NonBlockingByteArrayInputStream aIS = new NonBlockingByteArrayInputStream (aBytes,
                                                                                   0,
                                                                                   Math.min (EUnicodeBOM.getMaximumByteCount (),
                                                                                             aBytes.length)))
    {
      // Check for BOM first
      final InputStreamAndCharset aISC = CharsetHelper.getInputStreamAndCharsetFromBOM (aIS);
      if (aISC.hasBOM ())
      {
        // A BOM was found, but not necessarily a charset could uniquely be
        // identified - skip the
        // BOM bytes and continue determination from there
        nSearchOfs = aISC.getBOM ().getByteCount ();
      }

      if (aISC.hasCharset ())
      {
        // A BOM was found, and that BOM also has a unique charset assigned
        aParseCharset = aISC.getCharset ();
      }
    }
  }

  // No charset found and enough bytes left?
  if (aParseCharset == null && aBytes.length - nSearchOfs >= 4)
    if (_match (aBytes, nSearchOfs, CS_UTF32_BE))
      aParseCharset = CHARSET_UTF_32BE;
    else
      if (_match (aBytes, nSearchOfs, CS_UTF32_LE))
        aParseCharset = CHARSET_UTF_32LE;
      else
        if (_match (aBytes, nSearchOfs, CS_UTF16_BE))
          aParseCharset = StandardCharsets.UTF_16BE;
        else
          if (_match (aBytes, nSearchOfs, CS_UTF16_LE))
            aParseCharset = StandardCharsets.UTF_16LE;
          else
            if (_match (aBytes, nSearchOfs, CS_UTF8))
              aParseCharset = StandardCharsets.UTF_8;
            else
              if (_match (aBytes, nSearchOfs, CS_EBCDIC))
                aParseCharset = CHARSET_EBCDIC;
              else
                if (_match (aBytes, nSearchOfs, CS_IBM290))
                  aParseCharset = CHARSET_IBM290;

  if (aParseCharset == null)
  {
    // Fallback charset is always UTF-8
    aParseCharset = FALLBACK_CHARSET;
  }

  // Now read with a reader
  return _parseXMLEncoding (aBytes, nSearchOfs, aParseCharset);
}

Source File: DefaultServlet.java From Tomcat8-Source-Read with MIT License

4 votes

private static Charset processBom(InputStream is) throws IOException {
    // Java supported character sets do not use BOMs longer than 4 bytes
    byte[] bom = new byte[4];
    is.mark(bom.length);

    int count = is.read(bom);

    // BOMs are at least 2 bytes
    if (count < 2) {
        skip(is, 0);
        return null;
    }

    // Look for two byte BOMs
    int b0 = bom[0] & 0xFF;
    int b1 = bom[1] & 0xFF;
    if (b0 == 0xFE && b1 == 0xFF) {
        skip(is, 2);
        return StandardCharsets.UTF_16BE;
    }
    // Delay the UTF_16LE check if there are more that 2 bytes since it
    // overlaps with UTF-32LE.
    if (count == 2 && b0 == 0xFF && b1 == 0xFE) {
        skip(is, 2);
        return StandardCharsets.UTF_16LE;
    }

    // Remaining BOMs are at least 3 bytes
    if (count < 3) {
        skip(is, 0);
        return null;
    }

    // UTF-8 is only 3-byte BOM
    int b2 = bom[2] & 0xFF;
    if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
        skip(is, 3);
        return StandardCharsets.UTF_8;
    }

    if (count < 4) {
        skip(is, 0);
        return null;
    }

    // Look for 4-byte BOMs
    int b3 = bom[3] & 0xFF;
    if (b0 == 0x00 && b1 == 0x00 && b2 == 0xFE && b3 == 0xFF) {
        return Charset.forName("UTF-32BE");
    }
    if (b0 == 0xFF && b1 == 0xFE && b2 == 0x00 && b3 == 0x00) {
        return Charset.forName("UTF-32LE");
    }

    // Now we can check for UTF16-LE. There is an assumption here that we
    // won't see a UTF16-LE file with a BOM where the first real data is
    // 0x00 0x00
    if (b0 == 0xFF && b1 == 0xFE) {
        skip(is, 2);
        return StandardCharsets.UTF_16LE;
    }

    skip(is, 0);
    return null;
}

Java Code Examples for java.nio.charset.StandardCharsets#UTF_16LE