java.nio.charset.CharsetEncoder#maxBytesPerChar

Source File: StringUtils.java From tajo with Apache License 2.0

6 votes

public static byte[] convertCharsToBytes(char[] src, Charset charset) {
  CharsetEncoder encoder = charset.newEncoder();
  byte[] resultArray = new byte[(int) (src.length * encoder.maxBytesPerChar())];
  
  if (src.length != 0) {
    CharBuffer charBuffer = CharBuffer.wrap(src);
    ByteBuffer byteBuffer = ByteBuffer.wrap(resultArray);
    
    encoder.onMalformedInput(CodingErrorAction.REPLACE).onUnmappableCharacter(CodingErrorAction.REPLACE);
    encoder.reset();
    
    CoderResult coderResult = encoder.encode(charBuffer, byteBuffer, true);
    if (coderResult.isUnderflow()) {
      coderResult = encoder.flush(byteBuffer);
      
      if (coderResult.isUnderflow()) {
        if (resultArray.length != byteBuffer.position()) {
          resultArray = Arrays.copyOf(resultArray, byteBuffer.position());
        }
      }
    }
  }
  
  return resultArray;
}

Source File: ZipCoder.java From openjdk-jdk8u-backup with GNU General Public License v2.0

6 votes

byte[] getBytes(String s) {
    CharsetEncoder ce = encoder().reset();
    char[] ca = s.toCharArray();
    int len = (int)(ca.length * ce.maxBytesPerChar());
    byte[] ba = new byte[len];
    if (len == 0)
        return ba;
    ByteBuffer bb = ByteBuffer.wrap(ba);
    CharBuffer cb = CharBuffer.wrap(ca);
    CoderResult cr = ce.encode(cb, bb, true);
    if (!cr.isUnderflow())
        throw new IllegalArgumentException(cr.toString());
    cr = ce.flush(bb);
    if (!cr.isUnderflow())
        throw new IllegalArgumentException(cr.toString());
    if (bb.position() == ba.length)  // defensive copy?
        return ba;
    else
        return Arrays.copyOf(ba, bb.position());
}

Source File: ZipCoder.java From TencentKona-8 with GNU General Public License v2.0

6 votes

byte[] getBytes(String s) {
    CharsetEncoder ce = encoder().reset();
    char[] ca = s.toCharArray();
    int len = (int)(ca.length * ce.maxBytesPerChar());
    byte[] ba = new byte[len];
    if (len == 0)
        return ba;
    ByteBuffer bb = ByteBuffer.wrap(ba);
    CharBuffer cb = CharBuffer.wrap(ca);
    CoderResult cr = ce.encode(cb, bb, true);
    if (!cr.isUnderflow())
        throw new IllegalArgumentException(cr.toString());
    cr = ce.flush(bb);
    if (!cr.isUnderflow())
        throw new IllegalArgumentException(cr.toString());
    if (bb.position() == ba.length)  // defensive copy?
        return ba;
    else
        return Arrays.copyOf(ba, bb.position());
}

Source File: ZipCoder.java From jdk8u-dev-jdk with GNU General Public License v2.0

6 votes

byte[] getBytes(String s) {
    CharsetEncoder ce = encoder().reset();
    char[] ca = s.toCharArray();
    int len = (int)(ca.length * ce.maxBytesPerChar());
    byte[] ba = new byte[len];
    if (len == 0)
        return ba;
    ByteBuffer bb = ByteBuffer.wrap(ba);
    CharBuffer cb = CharBuffer.wrap(ca);
    CoderResult cr = ce.encode(cb, bb, true);
    if (!cr.isUnderflow())
        throw new IllegalArgumentException(cr.toString());
    cr = ce.flush(bb);
    if (!cr.isUnderflow())
        throw new IllegalArgumentException(cr.toString());
    if (bb.position() == ba.length)  // defensive copy?
        return ba;
    else
        return Arrays.copyOf(ba, bb.position());
}

Source File: ZipCoder.java From jdk8u_jdk with GNU General Public License v2.0

6 votes

byte[] getBytes(String s) {
    CharsetEncoder ce = encoder().reset();
    char[] ca = s.toCharArray();
    int len = (int)(ca.length * ce.maxBytesPerChar());
    byte[] ba = new byte[len];
    if (len == 0)
        return ba;
    ByteBuffer bb = ByteBuffer.wrap(ba);
    CharBuffer cb = CharBuffer.wrap(ca);
    CoderResult cr = ce.encode(cb, bb, true);
    if (!cr.isUnderflow())
        throw new IllegalArgumentException(cr.toString());
    cr = ce.flush(bb);
    if (!cr.isUnderflow())
        throw new IllegalArgumentException(cr.toString());
    if (bb.position() == ba.length)  // defensive copy?
        return ba;
    else
        return Arrays.copyOf(ba, bb.position());
}

Source File: ZipCoder.java From jdk8u60 with GNU General Public License v2.0

6 votes

byte[] getBytes(String s) {
    CharsetEncoder ce = encoder().reset();
    char[] ca = s.toCharArray();
    int len = (int)(ca.length * ce.maxBytesPerChar());
    byte[] ba = new byte[len];
    if (len == 0)
        return ba;
    ByteBuffer bb = ByteBuffer.wrap(ba);
    CharBuffer cb = CharBuffer.wrap(ca);
    CoderResult cr = ce.encode(cb, bb, true);
    if (!cr.isUnderflow())
        throw new IllegalArgumentException(cr.toString());
    cr = ce.flush(bb);
    if (!cr.isUnderflow())
        throw new IllegalArgumentException(cr.toString());
    if (bb.position() == ba.length)  // defensive copy?
        return ba;
    else
        return Arrays.copyOf(ba, bb.position());
}

Source File: ZipCoder.java From jdk8u-jdk with GNU General Public License v2.0

6 votes

byte[] getBytes(String s) {
    CharsetEncoder ce = encoder().reset();
    char[] ca = s.toCharArray();
    int len = (int)(ca.length * ce.maxBytesPerChar());
    byte[] ba = new byte[len];
    if (len == 0)
        return ba;
    ByteBuffer bb = ByteBuffer.wrap(ba);
    CharBuffer cb = CharBuffer.wrap(ca);
    CoderResult cr = ce.encode(cb, bb, true);
    if (!cr.isUnderflow())
        throw new IllegalArgumentException(cr.toString());
    cr = ce.flush(bb);
    if (!cr.isUnderflow())
        throw new IllegalArgumentException(cr.toString());
    if (bb.position() == ba.length)  // defensive copy?
        return ba;
    else
        return Arrays.copyOf(ba, bb.position());
}

Source File: ZipCoder.java From JDKSourceCode1.8 with MIT License

5 votes

byte[] getBytes(String s) {
    CharsetEncoder ce = encoder().reset();
    char[] ca = s.toCharArray();
    int len = (int)(ca.length * ce.maxBytesPerChar());
    byte[] ba = new byte[len];
    if (len == 0)
        return ba;
    // UTF-8 only for now. Other ArrayDeocder only handles
    // CodingErrorAction.REPLACE mode.
    if (isUTF8 && ce instanceof ArrayEncoder) {
        int blen = ((ArrayEncoder)ce).encode(ca, 0, ca.length, ba);
        if (blen == -1)    // malformed
            throw new IllegalArgumentException("MALFORMED");
        return Arrays.copyOf(ba, blen);
    }
    ByteBuffer bb = ByteBuffer.wrap(ba);
    CharBuffer cb = CharBuffer.wrap(ca);
    CoderResult cr = ce.encode(cb, bb, true);
    if (!cr.isUnderflow())
        throw new IllegalArgumentException(cr.toString());
    cr = ce.flush(bb);
    if (!cr.isUnderflow())
        throw new IllegalArgumentException(cr.toString());
    if (bb.position() == ba.length)  // defensive copy?
        return ba;
    else
        return Arrays.copyOf(ba, bb.position());
}

Source File: ByteBufUtil.java From netty4.0.27Learn with Apache License 2.0

5 votes

static ByteBuf encodeString0(ByteBufAllocator alloc, boolean enforceHeap, CharBuffer src, Charset charset) {
    final CharsetEncoder encoder = CharsetUtil.getEncoder(charset);
    int length = (int) ((double) src.remaining() * encoder.maxBytesPerChar());
    boolean release = true;
    final ByteBuf dst;
    if (enforceHeap) {
        dst = alloc.heapBuffer(length);
    } else {
        dst = alloc.buffer(length);
    }
    try {
        final ByteBuffer dstBuf = dst.internalNioBuffer(0, length);
        final int pos = dstBuf.position();
        CoderResult cr = encoder.encode(src, dstBuf, true);
        if (!cr.isUnderflow()) {
            cr.throwException();
        }
        cr = encoder.flush(dstBuf);
        if (!cr.isUnderflow()) {
            cr.throwException();
        }
        dst.writerIndex(dst.writerIndex() + dstBuf.position() - pos);
        release = false;
        return dst;
    } catch (CharacterCodingException x) {
        throw new IllegalStateException(x);
    } finally {
        if (release) {
            dst.release();
        }
    }
}

Source File: LogHandlerAccessor.java From trufflesqueak with MIT License

5 votes

@Override
public void publish(final LogRecord record) {
    final String message = record.getMessage();
    if (message == null) {
        return;
    }
    final CharsetEncoder encoder = ThreadLocalCoders.encoderFor(StandardCharsets.UTF_8);
    if (buffer.position() + 1 + message.length() * encoder.maxBytesPerChar() >= GIG) {
        close();
        initializeMappedBuffer();
    }
    encoder.encode(CharBuffer.wrap(message), buffer, true);
    encoder.flush(buffer);
    buffer.put((byte) 10);
}

Source File: DataBuffer.java From spring-analysis-note with MIT License

5 votes

/**
 * Write the given {@code CharSequence} using the given {@code Charset},
 * starting at the current writing position.
 * @param charSequence the char sequence to write into this buffer
 * @param charset the charset to encode the char sequence with
 * @return this buffer
 * @since 5.1.4
 */
default DataBuffer write(CharSequence charSequence, Charset charset) {
	Assert.notNull(charSequence, "CharSequence must not be null");
	Assert.notNull(charset, "Charset must not be null");
	if (charSequence.length() != 0) {
		CharsetEncoder charsetEncoder = charset.newEncoder()
				.onMalformedInput(CodingErrorAction.REPLACE)
				.onUnmappableCharacter(CodingErrorAction.REPLACE);
		CharBuffer inBuffer = CharBuffer.wrap(charSequence);
		int estimatedSize = (int) (inBuffer.remaining() * charsetEncoder.averageBytesPerChar());
		ByteBuffer outBuffer = ensureCapacity(estimatedSize)
				.asByteBuffer(writePosition(), writableByteCount());
		while (true) {
			CoderResult cr = (inBuffer.hasRemaining() ?
					charsetEncoder.encode(inBuffer, outBuffer, true) : CoderResult.UNDERFLOW);
			if (cr.isUnderflow()) {
				cr = charsetEncoder.flush(outBuffer);
			}
			if (cr.isUnderflow()) {
				break;
			}
			if (cr.isOverflow()) {
				writePosition(writePosition() + outBuffer.position());
				int maximumSize = (int) (inBuffer.remaining() * charsetEncoder.maxBytesPerChar());
				ensureCapacity(maximumSize);
				outBuffer = asByteBuffer(writePosition(), writableByteCount());
			}
		}
		writePosition(writePosition() + outBuffer.position());
	}
	return this;
}

Source File: ZipCoder.java From Java8CN with Apache License 2.0

5 votes

byte[] getBytes(String s) {
    CharsetEncoder ce = encoder().reset();
    char[] ca = s.toCharArray();
    int len = (int)(ca.length * ce.maxBytesPerChar());
    byte[] ba = new byte[len];
    if (len == 0)
        return ba;
    // UTF-8 only for now. Other ArrayDeocder only handles
    // CodingErrorAction.REPLACE mode.
    if (isUTF8 && ce instanceof ArrayEncoder) {
        int blen = ((ArrayEncoder)ce).encode(ca, 0, ca.length, ba);
        if (blen == -1)    // malformed
            throw new IllegalArgumentException("MALFORMED");
        return Arrays.copyOf(ba, blen);
    }
    ByteBuffer bb = ByteBuffer.wrap(ba);
    CharBuffer cb = CharBuffer.wrap(ca);
    CoderResult cr = ce.encode(cb, bb, true);
    if (!cr.isUnderflow())
        throw new IllegalArgumentException(cr.toString());
    cr = ce.flush(bb);
    if (!cr.isUnderflow())
        throw new IllegalArgumentException(cr.toString());
    if (bb.position() == ba.length)  // defensive copy?
        return ba;
    else
        return Arrays.copyOf(ba, bb.position());
}

Source File: DataBuffer.java From java-technology-stack with MIT License

5 votes

/**
 * Write the given {@code CharSequence} using the given {@code Charset},
 * starting at the current writing position.
 * @param charSequence the char sequence to write into this buffer
 * @param charset the charset to encode the char sequence with
 * @return this buffer
 * @since 5.1.4
 */
default DataBuffer write(CharSequence charSequence, Charset charset) {
	Assert.notNull(charSequence, "CharSequence must not be null");
	Assert.notNull(charset, "Charset must not be null");
	if (charSequence.length() != 0) {
		CharsetEncoder charsetEncoder = charset.newEncoder()
				.onMalformedInput(CodingErrorAction.REPLACE)
				.onUnmappableCharacter(CodingErrorAction.REPLACE);
		CharBuffer inBuffer = CharBuffer.wrap(charSequence);
		int estimatedSize = (int) (inBuffer.remaining() * charsetEncoder.averageBytesPerChar());
		ByteBuffer outBuffer = ensureCapacity(estimatedSize)
				.asByteBuffer(writePosition(), writableByteCount());
		while (true) {
			CoderResult cr = (inBuffer.hasRemaining() ?
					charsetEncoder.encode(inBuffer, outBuffer, true) : CoderResult.UNDERFLOW);
			if (cr.isUnderflow()) {
				cr = charsetEncoder.flush(outBuffer);
			}
			if (cr.isUnderflow()) {
				break;
			}
			if (cr.isOverflow()) {
				writePosition(outBuffer.position());
				int maximumSize = (int) (inBuffer.remaining() * charsetEncoder.maxBytesPerChar());
				ensureCapacity(maximumSize);
				outBuffer = asByteBuffer(writePosition(), writableByteCount());
			}
		}
		writePosition(outBuffer.position());
	}
	return this;
}

Source File: ZipCoder.java From jdk8u_jdk with GNU General Public License v2.0

5 votes

byte[] getBytes(String s) {
    CharsetEncoder ce = encoder().reset();
    char[] ca = s.toCharArray();
    int len = (int)(ca.length * ce.maxBytesPerChar());
    byte[] ba = new byte[len];
    if (len == 0)
        return ba;
    // UTF-8 only for now. Other ArrayDeocder only handles
    // CodingErrorAction.REPLACE mode.
    if (isUTF8 && ce instanceof ArrayEncoder) {
        int blen = ((ArrayEncoder)ce).encode(ca, 0, ca.length, ba);
        if (blen == -1)    // malformed
            throw new IllegalArgumentException("MALFORMED");
        return Arrays.copyOf(ba, blen);
    }
    ByteBuffer bb = ByteBuffer.wrap(ba);
    CharBuffer cb = CharBuffer.wrap(ca);
    CoderResult cr = ce.encode(cb, bb, true);
    if (!cr.isUnderflow())
        throw new IllegalArgumentException(cr.toString());
    cr = ce.flush(bb);
    if (!cr.isUnderflow())
        throw new IllegalArgumentException(cr.toString());
    if (bb.position() == ba.length)  // defensive copy?
        return ba;
    else
        return Arrays.copyOf(ba, bb.position());
}

Source File: ZipCoder.java From jdk8u-dev-jdk with GNU General Public License v2.0

5 votes

byte[] getBytes(String s) {
    CharsetEncoder ce = encoder().reset();
    char[] ca = s.toCharArray();
    int len = (int)(ca.length * ce.maxBytesPerChar());
    byte[] ba = new byte[len];
    if (len == 0)
        return ba;
    // UTF-8 only for now. Other ArrayDeocder only handles
    // CodingErrorAction.REPLACE mode.
    if (isUTF8 && ce instanceof ArrayEncoder) {
        int blen = ((ArrayEncoder)ce).encode(ca, 0, ca.length, ba);
        if (blen == -1)    // malformed
            throw new IllegalArgumentException("MALFORMED");
        return Arrays.copyOf(ba, blen);
    }
    ByteBuffer bb = ByteBuffer.wrap(ba);
    CharBuffer cb = CharBuffer.wrap(ca);
    CoderResult cr = ce.encode(cb, bb, true);
    if (!cr.isUnderflow())
        throw new IllegalArgumentException(cr.toString());
    cr = ce.flush(bb);
    if (!cr.isUnderflow())
        throw new IllegalArgumentException(cr.toString());
    if (bb.position() == ba.length)  // defensive copy?
        return ba;
    else
        return Arrays.copyOf(ba, bb.position());
}

Source File: ReversedLinesFileReader.java From aion-germany with GNU General Public License v3.0

4 votes

/**
 * Creates a ReversedLinesFileReader with the given block size and encoding.
 *
 * @param file
 *            the file to be read
 * @param blockSize
 *            size of the internal buffer (for ideal performance this should
 *            match with the block size of the underlying file system).
 * @param encoding
 *            the encoding of the file
 * @throws IOException  if an I/O error occurs
 * @since 2.3
 */
public ReversedLinesFileReader(final File file, final int blockSize, final Charset encoding) throws IOException {
    this.blockSize = blockSize;
    this.encoding = encoding;

    randomAccessFile = new RandomAccessFile(file, "r");
    totalByteLength = randomAccessFile.length();
    int lastBlockLength = (int) (totalByteLength % blockSize);
    if (lastBlockLength > 0) {
        totalBlockCount = totalByteLength / blockSize + 1;
    } else {
        totalBlockCount = totalByteLength / blockSize;
        if (totalByteLength > 0) {
            lastBlockLength = blockSize;
        }
    }
    currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);

    // --- check & prepare encoding ---
    Charset charset = Charsets.toCharset(encoding);
    CharsetEncoder charsetEncoder = charset.newEncoder();
    float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
    if(maxBytesPerChar==1f) {
        // all one byte encodings are no problem
        byteDecrement = 1;
    } else if(charset == Charset.forName("UTF-8")) {
        // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte
        // http://en.wikipedia.org/wiki/UTF-8
        byteDecrement = 1;
    } else if(charset == Charset.forName("Shift_JIS")) {
        // Same as for UTF-8
        // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
        byteDecrement = 1;
    } else if(charset == Charset.forName("UTF-16BE") || charset == Charset.forName("UTF-16LE")) {
        // UTF-16 new line sequences are not allowed as second tuple of four byte sequences,
        // however byte order has to be specified
        byteDecrement = 2;
    } else if(charset == Charset.forName("UTF-16")) {
        throw new UnsupportedEncodingException(
                "For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)");
    } else {
        throw new UnsupportedEncodingException(
                "Encoding "+encoding+" is not supported yet (feel free to submit a patch)");
    }
    // NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n
    newLineSequences = new byte[][] { "\r\n".getBytes(encoding), "\n".getBytes(encoding), "\r".getBytes(encoding) };

    avoidNewlineSplitBufferSize = newLineSequences[0].length;
}

Source File: GridReversedLinesFileReader.java From ignite with Apache License 2.0

4 votes

/**
 * Creates a ReverseLineReader with the given block size and encoding.
 *
 * @param file
 *            the file to be read
 * @param blockSize
 *            size of the internal buffer (for ideal performance this should
 *            match with the block size of the underlying file system).
 * @param charset
 *            the encoding of the file
 * @throws IOException  if an I/O error occurs
 * @since 2.3
 */
public GridReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException {
    this.blockSize = blockSize;
    this.encoding = charset;

    randomAccessFile = new RandomAccessFile(file, "r");
    totalByteLength = randomAccessFile.length();
    int lastBlockLength = (int) (totalByteLength % blockSize);
    if (lastBlockLength > 0) {
        totalBlockCount = totalByteLength / blockSize + 1;
    } else {
        totalBlockCount = totalByteLength / blockSize;
        if (totalByteLength > 0) {
            lastBlockLength = blockSize;
        }
    }
    currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);

    // --- check & prepare encoding ---
    CharsetEncoder charsetEncoder = charset.newEncoder();
    float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
    if (maxBytesPerChar == 1f) {
        // all one byte encodings are no problem
        byteDecrement = 1;
    } else if (charset == Charset.forName("UTF-8")) {
        // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte
        // http://en.wikipedia.org/wiki/UTF-8
        byteDecrement = 1;
    } else if (charset == Charset.forName("Shift_JIS")) {
        // Same as for UTF-8
        // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
        byteDecrement = 1;
    } else if (charset == Charset.forName("UTF-16BE") || charset == Charset.forName("UTF-16LE")) {
        // UTF-16 new line sequences are not allowed as second tuple of four byte sequences,
        // however byte order has to be specified
        byteDecrement = 2;
    } else if (charset == Charset.forName("UTF-16")) {
        throw new UnsupportedEncodingException(
            "For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)");
    } else {
        throw new UnsupportedEncodingException(
            "Encoding " + charset + " is not supported yet (feel free to submit a patch)");
    }
    // NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n
    newLineSequences = new byte[][] {"\r\n".getBytes(charset), "\n".getBytes(charset), "\r".getBytes(charset)};

    avoidNewlineSplitBufferSize = newLineSequences[0].length;
}

Source File: IsValidUtf8TestUtil.java From travelguide with Apache License 2.0

4 votes

/**
 * Variation of {@link #testBytes} that does less allocation using the
 * low-level encoders/decoders directly. Checked in because it's useful for
 * debugging when trying to process bytes faster, but since it doesn't use the
 * actual String class, it's possible for incompatibilities to develop
 * (although unlikely).
 *
 * @param numBytes the number of bytes in the byte array
 * @param expectedCount the expected number of roundtrippable permutations
 * @param start the starting bytes encoded as a long as big-endian
 * @param lim the limit of bytes to process encoded as a long as big-endian,
 *     or -1 to mean the max limit for numBytes
 */
void testBytesUsingByteBuffers(
    int numBytes, long expectedCount, long start, long lim)
    throws UnsupportedEncodingException {
  CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
      .onMalformedInput(CodingErrorAction.REPLACE)
      .onUnmappableCharacter(CodingErrorAction.REPLACE);
  CharsetEncoder encoder = Charset.forName("UTF-8").newEncoder()
      .onMalformedInput(CodingErrorAction.REPLACE)
      .onUnmappableCharacter(CodingErrorAction.REPLACE);
  byte[] bytes = new byte[numBytes];
  int maxChars = (int) (decoder.maxCharsPerByte() * numBytes) + 1;
  char[] charsDecoded =
      new char[(int) (decoder.maxCharsPerByte() * numBytes) + 1];
  int maxBytes = (int) (encoder.maxBytesPerChar() * maxChars) + 1;
  byte[] bytesReencoded = new byte[maxBytes];

  ByteBuffer bb = ByteBuffer.wrap(bytes);
  CharBuffer cb = CharBuffer.wrap(charsDecoded);
  ByteBuffer bbReencoded = ByteBuffer.wrap(bytesReencoded);
  if (lim == -1) {
    lim = 1L << (numBytes * 8);
  }
  long count = 0;
  long countRoundTripped = 0;
  for (long byteChar = start; byteChar < lim; byteChar++) {
    bb.rewind();
    bb.limit(bytes.length);
    cb.rewind();
    cb.limit(charsDecoded.length);
    bbReencoded.rewind();
    bbReencoded.limit(bytesReencoded.length);
    encoder.reset();
    decoder.reset();
    long tmpByteChar = byteChar;
    for (int i = 0; i < bytes.length; i++) {
      bytes[bytes.length - i - 1] = (byte) tmpByteChar;
      tmpByteChar = tmpByteChar >> 8;
    }
    boolean isRoundTrippable = ByteString.copyFrom(bytes).isValidUtf8();
    CoderResult result = decoder.decode(bb, cb, true);
    assertFalse(result.isError());
    result = decoder.flush(cb);
    assertFalse(result.isError());

    int charLen = cb.position();
    cb.rewind();
    cb.limit(charLen);
    result = encoder.encode(cb, bbReencoded, true);
    assertFalse(result.isError());
    result = encoder.flush(bbReencoded);
    assertFalse(result.isError());

    boolean bytesEqual = true;
    int bytesLen = bbReencoded.position();
    if (bytesLen != numBytes) {
      bytesEqual = false;
    } else {
      for (int i = 0; i < numBytes; i++) {
        if (bytes[i] != bytesReencoded[i]) {
          bytesEqual = false;
          break;
        }
      }
    }
    if (bytesEqual != isRoundTrippable) {
      outputFailure(byteChar, bytes, bytesReencoded, bytesLen);
    }

    count++;
    if (isRoundTrippable) {
      countRoundTripped++;
    }
    if (byteChar != 0 && byteChar % 1000000 == 0) {
      logger.info("Processed " + (byteChar / 1000000) +
          " million characters");
    }
  }
  logger.info("Round tripped " + countRoundTripped + " of " + count);
  assertEquals(expectedCount, countRoundTripped);
}

Source File: ReversedLinesFileReader.java From lams with GNU General Public License v2.0

4 votes

/**
 * Creates a ReversedLinesFileReader with the given block size and encoding.
 *
 * @param file
 *            the file to be read
 * @param blockSize
 *            size of the internal buffer (for ideal performance this should
 *            match with the block size of the underlying file system).
 * @param encoding
 *            the encoding of the file
 * @throws IOException  if an I/O error occurs
 * @since 2.3
 */
@SuppressWarnings("deprecation") // unavoidable until Java 7
public ReversedLinesFileReader(final File file, final int blockSize, final Charset encoding) throws IOException {
    this.blockSize = blockSize;
    this.encoding = encoding;

    // --- check & prepare encoding ---
    final Charset charset = Charsets.toCharset(encoding);
    final CharsetEncoder charsetEncoder = charset.newEncoder();
    final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
    if (maxBytesPerChar == 1f) {
        // all one byte encodings are no problem
        byteDecrement = 1;
    } else if (charset == Charsets.UTF_8) {
        // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte
        // http://en.wikipedia.org/wiki/UTF-8
        byteDecrement = 1;
    } else if(charset == Charset.forName("Shift_JIS") || // Same as for UTF-8
            // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
            charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese)
            charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean)
            charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese)
            charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese)
        byteDecrement = 1;
    } else if (charset == Charsets.UTF_16BE || charset == Charsets.UTF_16LE) {
        // UTF-16 new line sequences are not allowed as second tuple of four byte sequences,
        // however byte order has to be specified
        byteDecrement = 2;
    } else if (charset == Charsets.UTF_16) {
        throw new UnsupportedEncodingException("For UTF-16, you need to specify the byte order (use UTF-16BE or " +
                "UTF-16LE)");
    } else {
        throw new UnsupportedEncodingException("Encoding " + encoding + " is not supported yet (feel free to " +
                "submit a patch)");
    }

    // NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n
    newLineSequences = new byte[][] { "\r\n".getBytes(encoding), "\n".getBytes(encoding), "\r".getBytes(encoding) };

    avoidNewlineSplitBufferSize = newLineSequences[0].length;

    // Open file
    randomAccessFile = new RandomAccessFile(file, "r");
    totalByteLength = randomAccessFile.length();
    int lastBlockLength = (int) (totalByteLength % blockSize);
    if (lastBlockLength > 0) {
        totalBlockCount = totalByteLength / blockSize + 1;
    } else {
        totalBlockCount = totalByteLength / blockSize;
        if (totalByteLength > 0) {
            lastBlockLength = blockSize;
        }
    }
    currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);

}

Source File: PerforceShiftJISCharset.java From p4ic4idea with Apache License 2.0

4 votes

/**
 * Call the superclass constructor with the Charset object and the
 * encodings sizes from the encoder.
 */
Encoder(Charset cs, CharsetEncoder encoder) {
	super(cs, encoder.averageBytesPerChar(), encoder.maxBytesPerChar());
	this.encoder = encoder;
}

Java Code Examples for java.nio.charset.CharsetEncoder#maxBytesPerChar()