Java Code Examples for java.nio.charset.CharsetEncoder#maxBytesPerChar()
The following examples show how to use
java.nio.charset.CharsetEncoder#maxBytesPerChar() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: StringUtils.java From tajo with Apache License 2.0 | 6 votes |
public static byte[] convertCharsToBytes(char[] src, Charset charset) { CharsetEncoder encoder = charset.newEncoder(); byte[] resultArray = new byte[(int) (src.length * encoder.maxBytesPerChar())]; if (src.length != 0) { CharBuffer charBuffer = CharBuffer.wrap(src); ByteBuffer byteBuffer = ByteBuffer.wrap(resultArray); encoder.onMalformedInput(CodingErrorAction.REPLACE).onUnmappableCharacter(CodingErrorAction.REPLACE); encoder.reset(); CoderResult coderResult = encoder.encode(charBuffer, byteBuffer, true); if (coderResult.isUnderflow()) { coderResult = encoder.flush(byteBuffer); if (coderResult.isUnderflow()) { if (resultArray.length != byteBuffer.position()) { resultArray = Arrays.copyOf(resultArray, byteBuffer.position()); } } } } return resultArray; }
Example 2
Source File: ZipCoder.java From openjdk-jdk8u-backup with GNU General Public License v2.0 | 6 votes |
byte[] getBytes(String s) { CharsetEncoder ce = encoder().reset(); char[] ca = s.toCharArray(); int len = (int)(ca.length * ce.maxBytesPerChar()); byte[] ba = new byte[len]; if (len == 0) return ba; ByteBuffer bb = ByteBuffer.wrap(ba); CharBuffer cb = CharBuffer.wrap(ca); CoderResult cr = ce.encode(cb, bb, true); if (!cr.isUnderflow()) throw new IllegalArgumentException(cr.toString()); cr = ce.flush(bb); if (!cr.isUnderflow()) throw new IllegalArgumentException(cr.toString()); if (bb.position() == ba.length) // defensive copy? return ba; else return Arrays.copyOf(ba, bb.position()); }
Example 3
Source File: ZipCoder.java From TencentKona-8 with GNU General Public License v2.0 | 6 votes |
byte[] getBytes(String s) { CharsetEncoder ce = encoder().reset(); char[] ca = s.toCharArray(); int len = (int)(ca.length * ce.maxBytesPerChar()); byte[] ba = new byte[len]; if (len == 0) return ba; ByteBuffer bb = ByteBuffer.wrap(ba); CharBuffer cb = CharBuffer.wrap(ca); CoderResult cr = ce.encode(cb, bb, true); if (!cr.isUnderflow()) throw new IllegalArgumentException(cr.toString()); cr = ce.flush(bb); if (!cr.isUnderflow()) throw new IllegalArgumentException(cr.toString()); if (bb.position() == ba.length) // defensive copy? return ba; else return Arrays.copyOf(ba, bb.position()); }
Example 4
Source File: ZipCoder.java From jdk8u-dev-jdk with GNU General Public License v2.0 | 6 votes |
byte[] getBytes(String s) { CharsetEncoder ce = encoder().reset(); char[] ca = s.toCharArray(); int len = (int)(ca.length * ce.maxBytesPerChar()); byte[] ba = new byte[len]; if (len == 0) return ba; ByteBuffer bb = ByteBuffer.wrap(ba); CharBuffer cb = CharBuffer.wrap(ca); CoderResult cr = ce.encode(cb, bb, true); if (!cr.isUnderflow()) throw new IllegalArgumentException(cr.toString()); cr = ce.flush(bb); if (!cr.isUnderflow()) throw new IllegalArgumentException(cr.toString()); if (bb.position() == ba.length) // defensive copy? return ba; else return Arrays.copyOf(ba, bb.position()); }
Example 5
Source File: ZipCoder.java From jdk8u_jdk with GNU General Public License v2.0 | 6 votes |
byte[] getBytes(String s) { CharsetEncoder ce = encoder().reset(); char[] ca = s.toCharArray(); int len = (int)(ca.length * ce.maxBytesPerChar()); byte[] ba = new byte[len]; if (len == 0) return ba; ByteBuffer bb = ByteBuffer.wrap(ba); CharBuffer cb = CharBuffer.wrap(ca); CoderResult cr = ce.encode(cb, bb, true); if (!cr.isUnderflow()) throw new IllegalArgumentException(cr.toString()); cr = ce.flush(bb); if (!cr.isUnderflow()) throw new IllegalArgumentException(cr.toString()); if (bb.position() == ba.length) // defensive copy? return ba; else return Arrays.copyOf(ba, bb.position()); }
Example 6
Source File: ZipCoder.java From jdk8u60 with GNU General Public License v2.0 | 6 votes |
byte[] getBytes(String s) { CharsetEncoder ce = encoder().reset(); char[] ca = s.toCharArray(); int len = (int)(ca.length * ce.maxBytesPerChar()); byte[] ba = new byte[len]; if (len == 0) return ba; ByteBuffer bb = ByteBuffer.wrap(ba); CharBuffer cb = CharBuffer.wrap(ca); CoderResult cr = ce.encode(cb, bb, true); if (!cr.isUnderflow()) throw new IllegalArgumentException(cr.toString()); cr = ce.flush(bb); if (!cr.isUnderflow()) throw new IllegalArgumentException(cr.toString()); if (bb.position() == ba.length) // defensive copy? return ba; else return Arrays.copyOf(ba, bb.position()); }
Example 7
Source File: ZipCoder.java From jdk8u-jdk with GNU General Public License v2.0 | 6 votes |
byte[] getBytes(String s) { CharsetEncoder ce = encoder().reset(); char[] ca = s.toCharArray(); int len = (int)(ca.length * ce.maxBytesPerChar()); byte[] ba = new byte[len]; if (len == 0) return ba; ByteBuffer bb = ByteBuffer.wrap(ba); CharBuffer cb = CharBuffer.wrap(ca); CoderResult cr = ce.encode(cb, bb, true); if (!cr.isUnderflow()) throw new IllegalArgumentException(cr.toString()); cr = ce.flush(bb); if (!cr.isUnderflow()) throw new IllegalArgumentException(cr.toString()); if (bb.position() == ba.length) // defensive copy? return ba; else return Arrays.copyOf(ba, bb.position()); }
Example 8
Source File: ZipCoder.java From JDKSourceCode1.8 with MIT License | 5 votes |
byte[] getBytes(String s) { CharsetEncoder ce = encoder().reset(); char[] ca = s.toCharArray(); int len = (int)(ca.length * ce.maxBytesPerChar()); byte[] ba = new byte[len]; if (len == 0) return ba; // UTF-8 only for now. Other ArrayDeocder only handles // CodingErrorAction.REPLACE mode. if (isUTF8 && ce instanceof ArrayEncoder) { int blen = ((ArrayEncoder)ce).encode(ca, 0, ca.length, ba); if (blen == -1) // malformed throw new IllegalArgumentException("MALFORMED"); return Arrays.copyOf(ba, blen); } ByteBuffer bb = ByteBuffer.wrap(ba); CharBuffer cb = CharBuffer.wrap(ca); CoderResult cr = ce.encode(cb, bb, true); if (!cr.isUnderflow()) throw new IllegalArgumentException(cr.toString()); cr = ce.flush(bb); if (!cr.isUnderflow()) throw new IllegalArgumentException(cr.toString()); if (bb.position() == ba.length) // defensive copy? return ba; else return Arrays.copyOf(ba, bb.position()); }
Example 9
Source File: ByteBufUtil.java From netty4.0.27Learn with Apache License 2.0 | 5 votes |
static ByteBuf encodeString0(ByteBufAllocator alloc, boolean enforceHeap, CharBuffer src, Charset charset) { final CharsetEncoder encoder = CharsetUtil.getEncoder(charset); int length = (int) ((double) src.remaining() * encoder.maxBytesPerChar()); boolean release = true; final ByteBuf dst; if (enforceHeap) { dst = alloc.heapBuffer(length); } else { dst = alloc.buffer(length); } try { final ByteBuffer dstBuf = dst.internalNioBuffer(0, length); final int pos = dstBuf.position(); CoderResult cr = encoder.encode(src, dstBuf, true); if (!cr.isUnderflow()) { cr.throwException(); } cr = encoder.flush(dstBuf); if (!cr.isUnderflow()) { cr.throwException(); } dst.writerIndex(dst.writerIndex() + dstBuf.position() - pos); release = false; return dst; } catch (CharacterCodingException x) { throw new IllegalStateException(x); } finally { if (release) { dst.release(); } } }
Example 10
Source File: LogHandlerAccessor.java From trufflesqueak with MIT License | 5 votes |
@Override public void publish(final LogRecord record) { final String message = record.getMessage(); if (message == null) { return; } final CharsetEncoder encoder = ThreadLocalCoders.encoderFor(StandardCharsets.UTF_8); if (buffer.position() + 1 + message.length() * encoder.maxBytesPerChar() >= GIG) { close(); initializeMappedBuffer(); } encoder.encode(CharBuffer.wrap(message), buffer, true); encoder.flush(buffer); buffer.put((byte) 10); }
Example 11
Source File: DataBuffer.java From spring-analysis-note with MIT License | 5 votes |
/** * Write the given {@code CharSequence} using the given {@code Charset}, * starting at the current writing position. * @param charSequence the char sequence to write into this buffer * @param charset the charset to encode the char sequence with * @return this buffer * @since 5.1.4 */ default DataBuffer write(CharSequence charSequence, Charset charset) { Assert.notNull(charSequence, "CharSequence must not be null"); Assert.notNull(charset, "Charset must not be null"); if (charSequence.length() != 0) { CharsetEncoder charsetEncoder = charset.newEncoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE); CharBuffer inBuffer = CharBuffer.wrap(charSequence); int estimatedSize = (int) (inBuffer.remaining() * charsetEncoder.averageBytesPerChar()); ByteBuffer outBuffer = ensureCapacity(estimatedSize) .asByteBuffer(writePosition(), writableByteCount()); while (true) { CoderResult cr = (inBuffer.hasRemaining() ? charsetEncoder.encode(inBuffer, outBuffer, true) : CoderResult.UNDERFLOW); if (cr.isUnderflow()) { cr = charsetEncoder.flush(outBuffer); } if (cr.isUnderflow()) { break; } if (cr.isOverflow()) { writePosition(writePosition() + outBuffer.position()); int maximumSize = (int) (inBuffer.remaining() * charsetEncoder.maxBytesPerChar()); ensureCapacity(maximumSize); outBuffer = asByteBuffer(writePosition(), writableByteCount()); } } writePosition(writePosition() + outBuffer.position()); } return this; }
Example 12
Source File: ZipCoder.java From Java8CN with Apache License 2.0 | 5 votes |
byte[] getBytes(String s) { CharsetEncoder ce = encoder().reset(); char[] ca = s.toCharArray(); int len = (int)(ca.length * ce.maxBytesPerChar()); byte[] ba = new byte[len]; if (len == 0) return ba; // UTF-8 only for now. Other ArrayDeocder only handles // CodingErrorAction.REPLACE mode. if (isUTF8 && ce instanceof ArrayEncoder) { int blen = ((ArrayEncoder)ce).encode(ca, 0, ca.length, ba); if (blen == -1) // malformed throw new IllegalArgumentException("MALFORMED"); return Arrays.copyOf(ba, blen); } ByteBuffer bb = ByteBuffer.wrap(ba); CharBuffer cb = CharBuffer.wrap(ca); CoderResult cr = ce.encode(cb, bb, true); if (!cr.isUnderflow()) throw new IllegalArgumentException(cr.toString()); cr = ce.flush(bb); if (!cr.isUnderflow()) throw new IllegalArgumentException(cr.toString()); if (bb.position() == ba.length) // defensive copy? return ba; else return Arrays.copyOf(ba, bb.position()); }
Example 13
Source File: DataBuffer.java From java-technology-stack with MIT License | 5 votes |
/** * Write the given {@code CharSequence} using the given {@code Charset}, * starting at the current writing position. * @param charSequence the char sequence to write into this buffer * @param charset the charset to encode the char sequence with * @return this buffer * @since 5.1.4 */ default DataBuffer write(CharSequence charSequence, Charset charset) { Assert.notNull(charSequence, "CharSequence must not be null"); Assert.notNull(charset, "Charset must not be null"); if (charSequence.length() != 0) { CharsetEncoder charsetEncoder = charset.newEncoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE); CharBuffer inBuffer = CharBuffer.wrap(charSequence); int estimatedSize = (int) (inBuffer.remaining() * charsetEncoder.averageBytesPerChar()); ByteBuffer outBuffer = ensureCapacity(estimatedSize) .asByteBuffer(writePosition(), writableByteCount()); while (true) { CoderResult cr = (inBuffer.hasRemaining() ? charsetEncoder.encode(inBuffer, outBuffer, true) : CoderResult.UNDERFLOW); if (cr.isUnderflow()) { cr = charsetEncoder.flush(outBuffer); } if (cr.isUnderflow()) { break; } if (cr.isOverflow()) { writePosition(outBuffer.position()); int maximumSize = (int) (inBuffer.remaining() * charsetEncoder.maxBytesPerChar()); ensureCapacity(maximumSize); outBuffer = asByteBuffer(writePosition(), writableByteCount()); } } writePosition(outBuffer.position()); } return this; }
Example 14
Source File: ZipCoder.java From jdk8u_jdk with GNU General Public License v2.0 | 5 votes |
byte[] getBytes(String s) { CharsetEncoder ce = encoder().reset(); char[] ca = s.toCharArray(); int len = (int)(ca.length * ce.maxBytesPerChar()); byte[] ba = new byte[len]; if (len == 0) return ba; // UTF-8 only for now. Other ArrayDeocder only handles // CodingErrorAction.REPLACE mode. if (isUTF8 && ce instanceof ArrayEncoder) { int blen = ((ArrayEncoder)ce).encode(ca, 0, ca.length, ba); if (blen == -1) // malformed throw new IllegalArgumentException("MALFORMED"); return Arrays.copyOf(ba, blen); } ByteBuffer bb = ByteBuffer.wrap(ba); CharBuffer cb = CharBuffer.wrap(ca); CoderResult cr = ce.encode(cb, bb, true); if (!cr.isUnderflow()) throw new IllegalArgumentException(cr.toString()); cr = ce.flush(bb); if (!cr.isUnderflow()) throw new IllegalArgumentException(cr.toString()); if (bb.position() == ba.length) // defensive copy? return ba; else return Arrays.copyOf(ba, bb.position()); }
Example 15
Source File: ZipCoder.java From jdk8u-dev-jdk with GNU General Public License v2.0 | 5 votes |
byte[] getBytes(String s) { CharsetEncoder ce = encoder().reset(); char[] ca = s.toCharArray(); int len = (int)(ca.length * ce.maxBytesPerChar()); byte[] ba = new byte[len]; if (len == 0) return ba; // UTF-8 only for now. Other ArrayDeocder only handles // CodingErrorAction.REPLACE mode. if (isUTF8 && ce instanceof ArrayEncoder) { int blen = ((ArrayEncoder)ce).encode(ca, 0, ca.length, ba); if (blen == -1) // malformed throw new IllegalArgumentException("MALFORMED"); return Arrays.copyOf(ba, blen); } ByteBuffer bb = ByteBuffer.wrap(ba); CharBuffer cb = CharBuffer.wrap(ca); CoderResult cr = ce.encode(cb, bb, true); if (!cr.isUnderflow()) throw new IllegalArgumentException(cr.toString()); cr = ce.flush(bb); if (!cr.isUnderflow()) throw new IllegalArgumentException(cr.toString()); if (bb.position() == ba.length) // defensive copy? return ba; else return Arrays.copyOf(ba, bb.position()); }
Example 16
Source File: ReversedLinesFileReader.java From aion-germany with GNU General Public License v3.0 | 4 votes |
/** * Creates a ReversedLinesFileReader with the given block size and encoding. * * @param file * the file to be read * @param blockSize * size of the internal buffer (for ideal performance this should * match with the block size of the underlying file system). * @param encoding * the encoding of the file * @throws IOException if an I/O error occurs * @since 2.3 */ public ReversedLinesFileReader(final File file, final int blockSize, final Charset encoding) throws IOException { this.blockSize = blockSize; this.encoding = encoding; randomAccessFile = new RandomAccessFile(file, "r"); totalByteLength = randomAccessFile.length(); int lastBlockLength = (int) (totalByteLength % blockSize); if (lastBlockLength > 0) { totalBlockCount = totalByteLength / blockSize + 1; } else { totalBlockCount = totalByteLength / blockSize; if (totalByteLength > 0) { lastBlockLength = blockSize; } } currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null); // --- check & prepare encoding --- Charset charset = Charsets.toCharset(encoding); CharsetEncoder charsetEncoder = charset.newEncoder(); float maxBytesPerChar = charsetEncoder.maxBytesPerChar(); if(maxBytesPerChar==1f) { // all one byte encodings are no problem byteDecrement = 1; } else if(charset == Charset.forName("UTF-8")) { // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte // http://en.wikipedia.org/wiki/UTF-8 byteDecrement = 1; } else if(charset == Charset.forName("Shift_JIS")) { // Same as for UTF-8 // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html byteDecrement = 1; } else if(charset == Charset.forName("UTF-16BE") || charset == Charset.forName("UTF-16LE")) { // UTF-16 new line sequences are not allowed as second tuple of four byte sequences, // however byte order has to be specified byteDecrement = 2; } else if(charset == Charset.forName("UTF-16")) { throw new UnsupportedEncodingException( "For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)"); } else { throw new UnsupportedEncodingException( "Encoding "+encoding+" is not supported yet (feel free to submit a patch)"); } // NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n newLineSequences = new byte[][] { "\r\n".getBytes(encoding), "\n".getBytes(encoding), "\r".getBytes(encoding) }; avoidNewlineSplitBufferSize = newLineSequences[0].length; }
Example 17
Source File: GridReversedLinesFileReader.java From ignite with Apache License 2.0 | 4 votes |
/** * Creates a ReverseLineReader with the given block size and encoding. * * @param file * the file to be read * @param blockSize * size of the internal buffer (for ideal performance this should * match with the block size of the underlying file system). * @param charset * the encoding of the file * @throws IOException if an I/O error occurs * @since 2.3 */ public GridReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException { this.blockSize = blockSize; this.encoding = charset; randomAccessFile = new RandomAccessFile(file, "r"); totalByteLength = randomAccessFile.length(); int lastBlockLength = (int) (totalByteLength % blockSize); if (lastBlockLength > 0) { totalBlockCount = totalByteLength / blockSize + 1; } else { totalBlockCount = totalByteLength / blockSize; if (totalByteLength > 0) { lastBlockLength = blockSize; } } currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null); // --- check & prepare encoding --- CharsetEncoder charsetEncoder = charset.newEncoder(); float maxBytesPerChar = charsetEncoder.maxBytesPerChar(); if (maxBytesPerChar == 1f) { // all one byte encodings are no problem byteDecrement = 1; } else if (charset == Charset.forName("UTF-8")) { // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte // http://en.wikipedia.org/wiki/UTF-8 byteDecrement = 1; } else if (charset == Charset.forName("Shift_JIS")) { // Same as for UTF-8 // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html byteDecrement = 1; } else if (charset == Charset.forName("UTF-16BE") || charset == Charset.forName("UTF-16LE")) { // UTF-16 new line sequences are not allowed as second tuple of four byte sequences, // however byte order has to be specified byteDecrement = 2; } else if (charset == Charset.forName("UTF-16")) { throw new UnsupportedEncodingException( "For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)"); } else { throw new UnsupportedEncodingException( "Encoding " + charset + " is not supported yet (feel free to submit a patch)"); } // NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n newLineSequences = new byte[][] {"\r\n".getBytes(charset), "\n".getBytes(charset), "\r".getBytes(charset)}; avoidNewlineSplitBufferSize = newLineSequences[0].length; }
Example 18
Source File: IsValidUtf8TestUtil.java From travelguide with Apache License 2.0 | 4 votes |
/** * Variation of {@link #testBytes} that does less allocation using the * low-level encoders/decoders directly. Checked in because it's useful for * debugging when trying to process bytes faster, but since it doesn't use the * actual String class, it's possible for incompatibilities to develop * (although unlikely). * * @param numBytes the number of bytes in the byte array * @param expectedCount the expected number of roundtrippable permutations * @param start the starting bytes encoded as a long as big-endian * @param lim the limit of bytes to process encoded as a long as big-endian, * or -1 to mean the max limit for numBytes */ void testBytesUsingByteBuffers( int numBytes, long expectedCount, long start, long lim) throws UnsupportedEncodingException { CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE); CharsetEncoder encoder = Charset.forName("UTF-8").newEncoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE); byte[] bytes = new byte[numBytes]; int maxChars = (int) (decoder.maxCharsPerByte() * numBytes) + 1; char[] charsDecoded = new char[(int) (decoder.maxCharsPerByte() * numBytes) + 1]; int maxBytes = (int) (encoder.maxBytesPerChar() * maxChars) + 1; byte[] bytesReencoded = new byte[maxBytes]; ByteBuffer bb = ByteBuffer.wrap(bytes); CharBuffer cb = CharBuffer.wrap(charsDecoded); ByteBuffer bbReencoded = ByteBuffer.wrap(bytesReencoded); if (lim == -1) { lim = 1L << (numBytes * 8); } long count = 0; long countRoundTripped = 0; for (long byteChar = start; byteChar < lim; byteChar++) { bb.rewind(); bb.limit(bytes.length); cb.rewind(); cb.limit(charsDecoded.length); bbReencoded.rewind(); bbReencoded.limit(bytesReencoded.length); encoder.reset(); decoder.reset(); long tmpByteChar = byteChar; for (int i = 0; i < bytes.length; i++) { bytes[bytes.length - i - 1] = (byte) tmpByteChar; tmpByteChar = tmpByteChar >> 8; } boolean isRoundTrippable = ByteString.copyFrom(bytes).isValidUtf8(); CoderResult result = decoder.decode(bb, cb, true); assertFalse(result.isError()); result = decoder.flush(cb); assertFalse(result.isError()); int charLen = cb.position(); cb.rewind(); cb.limit(charLen); result = encoder.encode(cb, bbReencoded, true); assertFalse(result.isError()); result = encoder.flush(bbReencoded); assertFalse(result.isError()); boolean bytesEqual = true; int bytesLen = bbReencoded.position(); if (bytesLen != numBytes) { bytesEqual = false; } else { for (int i = 0; i < numBytes; i++) { if (bytes[i] != bytesReencoded[i]) { bytesEqual = false; break; } } } if (bytesEqual != isRoundTrippable) { outputFailure(byteChar, bytes, bytesReencoded, bytesLen); } count++; if (isRoundTrippable) { countRoundTripped++; } if (byteChar != 0 && byteChar % 1000000 == 0) { logger.info("Processed " + (byteChar / 1000000) + " million characters"); } } logger.info("Round tripped " + countRoundTripped + " of " + count); assertEquals(expectedCount, countRoundTripped); }
Example 19
Source File: ReversedLinesFileReader.java From lams with GNU General Public License v2.0 | 4 votes |
/** * Creates a ReversedLinesFileReader with the given block size and encoding. * * @param file * the file to be read * @param blockSize * size of the internal buffer (for ideal performance this should * match with the block size of the underlying file system). * @param encoding * the encoding of the file * @throws IOException if an I/O error occurs * @since 2.3 */ @SuppressWarnings("deprecation") // unavoidable until Java 7 public ReversedLinesFileReader(final File file, final int blockSize, final Charset encoding) throws IOException { this.blockSize = blockSize; this.encoding = encoding; // --- check & prepare encoding --- final Charset charset = Charsets.toCharset(encoding); final CharsetEncoder charsetEncoder = charset.newEncoder(); final float maxBytesPerChar = charsetEncoder.maxBytesPerChar(); if (maxBytesPerChar == 1f) { // all one byte encodings are no problem byteDecrement = 1; } else if (charset == Charsets.UTF_8) { // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte // http://en.wikipedia.org/wiki/UTF-8 byteDecrement = 1; } else if(charset == Charset.forName("Shift_JIS") || // Same as for UTF-8 // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese) charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean) charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese) charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese) byteDecrement = 1; } else if (charset == Charsets.UTF_16BE || charset == Charsets.UTF_16LE) { // UTF-16 new line sequences are not allowed as second tuple of four byte sequences, // however byte order has to be specified byteDecrement = 2; } else if (charset == Charsets.UTF_16) { throw new UnsupportedEncodingException("For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)"); } else { throw new UnsupportedEncodingException("Encoding " + encoding + " is not supported yet (feel free to " + "submit a patch)"); } // NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n newLineSequences = new byte[][] { "\r\n".getBytes(encoding), "\n".getBytes(encoding), "\r".getBytes(encoding) }; avoidNewlineSplitBufferSize = newLineSequences[0].length; // Open file randomAccessFile = new RandomAccessFile(file, "r"); totalByteLength = randomAccessFile.length(); int lastBlockLength = (int) (totalByteLength % blockSize); if (lastBlockLength > 0) { totalBlockCount = totalByteLength / blockSize + 1; } else { totalBlockCount = totalByteLength / blockSize; if (totalByteLength > 0) { lastBlockLength = blockSize; } } currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null); }
Example 20
Source File: PerforceShiftJISCharset.java From p4ic4idea with Apache License 2.0 | 4 votes |
/** * Call the superclass constructor with the Charset object and the * encodings sizes from the encoder. */ Encoder(Charset cs, CharsetEncoder encoder) { super(cs, encoder.averageBytesPerChar(), encoder.maxBytesPerChar()); this.encoder = encoder; }