Java Code Examples for java.nio.charset.CoderResult#isMalformed()
The following examples show how to use
java.nio.charset.CoderResult#isMalformed() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BasicURLCanonicalizer.java From webarchive-commons with Apache License 2.0 | 6 votes |
/** * Decodes bytes in bbuf as utf-8 and appends decoded characters to sb. If * decoding of any portion fails, appends the un-decodable %xx%xx sequence * extracted from inputStr instead of decoded characters. See "bad unicode" * tests in GoogleCanonicalizerTest#testDecode(). Variables only make sense * within context of {@link #decode(String)}. * * @param sb * StringBuilder to append to * @param bbuf * raw bytes decoded from %-encoded input * @param inputStr * full input string * @param seqStart * start index inclusive within inputStr of %-encoded sequence * @param seqEnd * end index exclusive within inputStr of %-encoded sequence * @param utf8decoder */ private void appendDecodedPctUtf8(StringBuilder sb, ByteBuffer bbuf, String inputStr, int seqStart, int seqEnd, CharsetDecoder utf8decoder) { // assert bbuf.position() * 3 == seqEnd - seqStart; utf8decoder.reset(); CharBuffer cbuf = CharBuffer.allocate(bbuf.position()); bbuf.flip(); while (bbuf.position() < bbuf.limit()) { CoderResult coderResult = utf8decoder.decode(bbuf, cbuf, true); sb.append(cbuf.flip()); if (coderResult.isMalformed()) { // put the malformed %xx%xx into the result un-decoded CharSequence undecodablePctHex = inputStr.subSequence(seqStart + 3 * bbuf.position(), seqStart + 3 * bbuf.position() + 3 * coderResult.length()); sb.append(undecodablePctHex); // there could be more good stuff after the bad bbuf.position(bbuf.position() + coderResult.length()); } cbuf.clear(); } }
Example 2
Source File: BinaryTruncator.java From parquet-mr with Apache License 2.0 | 6 votes |
Validity checkValidity(ByteBuffer buffer) { int pos = buffer.position(); CoderResult result = CoderResult.OVERFLOW; while (result.isOverflow()) { dummyBuffer.clear(); result = decoder.decode(buffer, dummyBuffer, true); } buffer.position(pos); if (result.isUnderflow()) { return Validity.VALID; } else if (result.isMalformed()) { return Validity.MALFORMED; } else { return Validity.UNMAPPABLE; } }
Example 3
Source File: ResettableFileInputStream.java From mt-flume with Apache License 2.0 | 5 votes |
@Override public synchronized int readChar() throws IOException { if (!buf.hasRemaining()) { refillBuf(); } int start = buf.position(); charBuf.clear(); boolean isEndOfInput = false; if (position >= fileSize) { isEndOfInput = true; } CoderResult res = decoder.decode(buf, charBuf, isEndOfInput); if (res.isMalformed() || res.isUnmappable()) { res.throwException(); } int delta = buf.position() - start; charBuf.flip(); if (charBuf.hasRemaining()) { char c = charBuf.get(); // don't increment the persisted location if we are in between a // surrogate pair, otherwise we may never recover if we seek() to this // location! incrPosition(delta, !Character.isHighSurrogate(c)); return c; // there may be a partial character in the decoder buffer } else { incrPosition(delta, false); return -1; } }
Example 4
Source File: UrlCanonicalizer.java From outbackcdx with Apache License 2.0 | 5 votes |
private static void tryDecodeUtf8(ByteBuffer bb, StringBuilder out) { CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder(); CharBuffer cb = CharBuffer.allocate(bb.remaining()); while (bb.hasRemaining()) { CoderResult result = decoder.decode(bb, cb, true); if (result.isMalformed()) { for (int i = 0; i < result.length(); i++) { out.append('%').append(String.format("%02x", bb.get())); } } out.append(cb.flip()); cb.clear(); } }
Example 5
Source File: MboxIterator.java From sling-samples with Apache License 2.0 | 5 votes |
private void decodeNextCharBuffer() throws CharConversionException { CoderResult coderResult = DECODER.decode(byteBuffer, mboxCharBuffer, endOfInputFlag); updateEndOfInputFlag(); mboxCharBuffer.flip(); if (coderResult.isError()) { if (coderResult.isMalformed()) { throw new CharConversionException("Malformed input!"); } else if (coderResult.isUnmappable()) { throw new CharConversionException("Unmappable character!"); } } }
Example 6
Source File: InputStreamReader.java From openjdk-jdk9 with GNU General Public License v2.0 | 4 votes |
/** * Reads at most {@code length} characters from this reader and stores them * at position {@code offset} in the character array {@code buf}. Returns * the number of characters actually read or -1 if the end of the reader has * been reached. The bytes are either obtained from converting bytes in this * reader's buffer or by first filling the buffer from the source * InputStream and then reading from the buffer. * * @param buf * the array to store the characters read. * @param offset * the initial position in {@code buf} to store the characters * read from this reader. * @param length * the maximum number of characters to read. * @return the number of characters read or -1 if the end of the reader has * been reached. * @throws IndexOutOfBoundsException * if {@code offset < 0} or {@code length < 0}, or if * {@code offset + length} is greater than the length of * {@code buf}. * @throws IOException * if this reader is closed or some other I/O error occurs. */ @Override public int read(char[] buf, int offset, int length) throws IOException { synchronized (lock) { if (!isOpen()) { throw new IOException("InputStreamReader is closed."); } if (offset < 0 || offset > buf.length - length || length < 0) { throw new IndexOutOfBoundsException(); } if (length == 0) { return 0; } CharBuffer out = CharBuffer.wrap(buf, offset, length); CoderResult result = CoderResult.UNDERFLOW; // bytes.remaining() indicates number of bytes in buffer // when 1-st time entered, it'll be equal to zero boolean needInput = !bytes.hasRemaining(); while (out.hasRemaining()) { // fill the buffer if needed if (needInput) { try { if ((in.available() == 0) && (out.position() > offset)) { // we could return the result without blocking read break; } } catch (IOException e) { // available didn't work so just try the read } int to_read = bytes.capacity() - bytes.limit(); int off = bytes.arrayOffset() + bytes.limit(); int was_red = in.read(bytes.array(), off, to_read); if (was_red == -1) { endOfInput = true; break; } else if (was_red == 0) { break; } bytes.limit(bytes.limit() + was_red); needInput = false; } // decode bytes result = decoder.decode(bytes, out, false); if (result.isUnderflow()) { // compact the buffer if no space left if (bytes.limit() == bytes.capacity()) { bytes.compact(); bytes.limit(bytes.position()); bytes.position(0); } needInput = true; } else { break; } } if (result == CoderResult.UNDERFLOW && endOfInput) { result = decoder.decode(bytes, out, true); decoder.flush(out); decoder.reset(); } if (result.isMalformed()) { throw new MalformedInputException(result.length()); } else if (result.isUnmappable()) { throw new UnmappableCharacterException(result.length()); } return out.position() - offset == 0 ? -1 : out.position() - offset; } }
Example 7
Source File: InputStreamReader.java From jtransc with Apache License 2.0 | 4 votes |
/** * Reads up to {@code count} characters from this reader and stores them * at position {@code offset} in the character array {@code buffer}. Returns * the number of characters actually read or -1 if the end of the reader has * been reached. The bytes are either obtained from converting bytes in this * reader's buffer or by first filling the buffer from the source * InputStream and then reading from the buffer. * * @throws IndexOutOfBoundsException * if {@code offset < 0 || count < 0 || offset + count > buffer.length}. * @throws IOException * if this reader is closed or some other I/O error occurs. */ @Override public int read(char[] buffer, int offset, int count) throws IOException { synchronized (lock) { if (!isOpen()) { throw new IOException("InputStreamReader is closed"); } JTranscArrays.checkOffsetAndCount(buffer.length, offset, count); if (count == 0) { return 0; } CharBuffer out = CharBuffer.wrap(buffer, offset, count); CoderResult result = CoderResult.UNDERFLOW; // bytes.remaining() indicates number of bytes in buffer // when 1-st time entered, it'll be equal to zero boolean needInput = !bytes.hasRemaining(); while (out.hasRemaining()) { // fill the buffer if needed if (needInput) { try { if (in.available() == 0 && out.position() > offset) { // we could return the result without blocking read break; } } catch (IOException e) { // available didn't work so just try the read } int desiredByteCount = bytes.capacity() - bytes.limit(); int off = bytes.arrayOffset() + bytes.limit(); int actualByteCount = in.read(bytes.array(), off, desiredByteCount); if (actualByteCount == -1) { endOfInput = true; break; } else if (actualByteCount == 0) { break; } bytes.limit(bytes.limit() + actualByteCount); needInput = false; } // decode bytes result = decoder.decode(bytes, out, false); if (result.isUnderflow()) { // compact the buffer if no space left if (bytes.limit() == bytes.capacity()) { bytes.compact(); bytes.limit(bytes.position()); bytes.position(0); } needInput = true; } else { break; } } if (result == CoderResult.UNDERFLOW && endOfInput) { result = decoder.decode(bytes, out, true); decoder.flush(out); decoder.reset(); } if (result.isMalformed() || result.isUnmappable()) { result.throwException(); } return out.position() - offset == 0 ? -1 : out.position() - offset; } }
Example 8
Source File: C2BConverter.java From tomcatsrc with Apache License 2.0 | 4 votes |
/** * Convert the given characters to bytes. * * @param cc char input * @param bc byte output */ public void convert(CharChunk cc, ByteChunk bc) throws IOException { if ((bb == null) || (bb.array() != bc.getBuffer())) { // Create a new byte buffer if anything changed bb = ByteBuffer.wrap(bc.getBuffer(), bc.getEnd(), bc.getBuffer().length - bc.getEnd()); } else { // Initialize the byte buffer bb.limit(bc.getBuffer().length); bb.position(bc.getEnd()); } if ((cb == null) || (cb.array() != cc.getBuffer())) { // Create a new char buffer if anything changed cb = CharBuffer.wrap(cc.getBuffer(), cc.getStart(), cc.getLength()); } else { // Initialize the char buffer cb.limit(cc.getEnd()); cb.position(cc.getStart()); } CoderResult result = null; // Parse leftover if any are present if (leftovers.position() > 0) { int pos = bb.position(); // Loop until one char is encoded or there is a encoder error do { leftovers.put((char) cc.substract()); leftovers.flip(); result = encoder.encode(leftovers, bb, false); leftovers.position(leftovers.limit()); leftovers.limit(leftovers.array().length); } while (result.isUnderflow() && (bb.position() == pos)); if (result.isError() || result.isMalformed()) { result.throwException(); } cb.position(cc.getStart()); leftovers.position(0); } // Do the decoding and get the results into the byte chunk and the char // chunk result = encoder.encode(cb, bb, false); if (result.isError() || result.isMalformed()) { result.throwException(); } else if (result.isOverflow()) { // Propagate current positions to the byte chunk and char chunk bc.setEnd(bb.position()); cc.setOffset(cb.position()); } else if (result.isUnderflow()) { // Propagate current positions to the byte chunk and char chunk bc.setEnd(bb.position()); cc.setOffset(cb.position()); // Put leftovers in the leftovers char buffer if (cc.getLength() > 0) { leftovers.limit(leftovers.array().length); leftovers.position(cc.getLength()); cc.substract(leftovers.array(), 0, cc.getLength()); } } }
Example 9
Source File: B2CConverter.java From tomcatsrc with Apache License 2.0 | 4 votes |
/** * Convert the given bytes to characters. * * @param bc byte input * @param cc char output * @param endOfInput Is this all of the available data */ public void convert(ByteChunk bc, CharChunk cc, boolean endOfInput) throws IOException { if ((bb == null) || (bb.array() != bc.getBuffer())) { // Create a new byte buffer if anything changed bb = ByteBuffer.wrap(bc.getBuffer(), bc.getStart(), bc.getLength()); } else { // Initialize the byte buffer bb.limit(bc.getEnd()); bb.position(bc.getStart()); } if ((cb == null) || (cb.array() != cc.getBuffer())) { // Create a new char buffer if anything changed cb = CharBuffer.wrap(cc.getBuffer(), cc.getEnd(), cc.getBuffer().length - cc.getEnd()); } else { // Initialize the char buffer cb.limit(cc.getBuffer().length); cb.position(cc.getEnd()); } CoderResult result = null; // Parse leftover if any are present if (leftovers.position() > 0) { int pos = cb.position(); // Loop until one char is decoded or there is a decoder error do { leftovers.put(bc.substractB()); leftovers.flip(); result = decoder.decode(leftovers, cb, endOfInput); leftovers.position(leftovers.limit()); leftovers.limit(leftovers.array().length); } while (result.isUnderflow() && (cb.position() == pos)); if (result.isError() || result.isMalformed()) { result.throwException(); } bb.position(bc.getStart()); leftovers.position(0); } // Do the decoding and get the results into the byte chunk and the char // chunk result = decoder.decode(bb, cb, endOfInput); if (result.isError() || result.isMalformed()) { result.throwException(); } else if (result.isOverflow()) { // Propagate current positions to the byte chunk and char chunk, if // this continues the char buffer will get resized bc.setOffset(bb.position()); cc.setEnd(cb.position()); } else if (result.isUnderflow()) { // Propagate current positions to the byte chunk and char chunk bc.setOffset(bb.position()); cc.setEnd(cb.position()); // Put leftovers in the leftovers byte buffer if (bc.getLength() > 0) { leftovers.limit(leftovers.array().length); leftovers.position(bc.getLength()); bc.substract(leftovers.array(), 0, bc.getLength()); } } }
Example 10
Source File: InputStreamReader.java From TorrentEngine with GNU General Public License v3.0 | 4 votes |
/** * Reads at most {@code length} characters from this reader and stores them * at position {@code offset} in the character array {@code buf}. Returns * the number of characters actually read or -1 if the end of the reader has * been reached. The bytes are either obtained from converting bytes in this * reader's buffer or by first filling the buffer from the source * InputStream and then reading from the buffer. * * @param buf * the array to store the characters read. * @param offset * the initial position in {@code buf} to store the characters * read from this reader. * @param length * the maximum number of characters to read. * @return the number of characters read or -1 if the end of the reader has * been reached. * @throws IndexOutOfBoundsException * if {@code offset < 0} or {@code length < 0}, or if * {@code offset + length} is greater than the length of * {@code buf}. * @throws IOException * if this reader is closed or some other I/O error occurs. */ @Override public int read(char[] buf, int offset, int length) throws IOException { synchronized (lock) { if (!isOpen()) { throw new IOException("InputStreamReader is closed."); } if (offset < 0 || offset > buf.length - length || length < 0) { throw new IndexOutOfBoundsException(); } if (length == 0) { return 0; } CharBuffer out = CharBuffer.wrap(buf, offset, length); CoderResult result = CoderResult.UNDERFLOW; // bytes.remaining() indicates number of bytes in buffer // when 1-st time entered, it'll be equal to zero boolean needInput = !bytes.hasRemaining(); while (out.hasRemaining()) { // fill the buffer if needed if (needInput) { try { if ((in.available() == 0) && (out.position() > offset)) { // we could return the result without blocking read break; } } catch (IOException e) { // available didn't work so just try the read } int to_read = bytes.capacity() - bytes.limit(); int off = bytes.arrayOffset() + bytes.limit(); int was_red = in.read(bytes.array(), off, to_read); if (was_red == -1) { endOfInput = true; break; } else if (was_red == 0) { break; } bytes.limit(bytes.limit() + was_red); needInput = false; } // decode bytes result = decoder.decode(bytes, out, false); if (result.isUnderflow()) { // compact the buffer if no space left if (bytes.limit() == bytes.capacity()) { bytes.compact(); bytes.limit(bytes.position()); bytes.position(0); } needInput = true; } else { break; } } if (result == CoderResult.UNDERFLOW && endOfInput) { result = decoder.decode(bytes, out, true); decoder.flush(out); decoder.reset(); } if (result.isMalformed()) { throw new MalformedInputException(result.length()); } else if (result.isUnmappable()) { throw new UnmappableCharacterException(result.length()); } return out.position() - offset == 0 ? -1 : out.position() - offset; } }
Example 11
Source File: B2CConverter.java From Tomcat8-Source-Read with MIT License | 4 votes |
/** * Convert the given bytes to characters. * * @param bc byte input * @param cc char output * @param endOfInput Is this all of the available data * * @throws IOException If the conversion can not be completed */ public void convert(ByteChunk bc, CharChunk cc, boolean endOfInput) throws IOException { if ((bb == null) || (bb.array() != bc.getBuffer())) { // Create a new byte buffer if anything changed bb = ByteBuffer.wrap(bc.getBuffer(), bc.getStart(), bc.getLength()); } else { // Initialize the byte buffer bb.limit(bc.getEnd()); bb.position(bc.getStart()); } if ((cb == null) || (cb.array() != cc.getBuffer())) { // Create a new char buffer if anything changed cb = CharBuffer.wrap(cc.getBuffer(), cc.getEnd(), cc.getBuffer().length - cc.getEnd()); } else { // Initialize the char buffer cb.limit(cc.getBuffer().length); cb.position(cc.getEnd()); } CoderResult result = null; // Parse leftover if any are present if (leftovers.position() > 0) { int pos = cb.position(); // Loop until one char is decoded or there is a decoder error do { leftovers.put(bc.substractB()); leftovers.flip(); result = decoder.decode(leftovers, cb, endOfInput); leftovers.position(leftovers.limit()); leftovers.limit(leftovers.array().length); } while (result.isUnderflow() && (cb.position() == pos)); if (result.isError() || result.isMalformed()) { result.throwException(); } bb.position(bc.getStart()); leftovers.position(0); } // Do the decoding and get the results into the byte chunk and the char // chunk result = decoder.decode(bb, cb, endOfInput); if (result.isError() || result.isMalformed()) { result.throwException(); } else if (result.isOverflow()) { // Propagate current positions to the byte chunk and char chunk, if // this continues the char buffer will get resized bc.setOffset(bb.position()); cc.setEnd(cb.position()); } else if (result.isUnderflow()) { // Propagate current positions to the byte chunk and char chunk bc.setOffset(bb.position()); cc.setEnd(cb.position()); // Put leftovers in the leftovers byte buffer if (bc.getLength() > 0) { leftovers.limit(leftovers.array().length); leftovers.position(bc.getLength()); bc.substract(leftovers.array(), 0, bc.getLength()); } } }
Example 12
Source File: BaseFileManager.java From openjdk-jdk9 with GNU General Public License v2.0 | 4 votes |
@SuppressWarnings("cast")
public CharBuffer decode(ByteBuffer inbuf, boolean ignoreEncodingErrors) {
String encName = getEncodingName();
CharsetDecoder decoder;
try {
decoder = getDecoder(encName, ignoreEncodingErrors);
} catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
log.error("unsupported.encoding", encName);
return (CharBuffer)CharBuffer.allocate(1).flip();
}
// slightly overestimate the buffer size to avoid reallocation.
float factor =
decoder.averageCharsPerByte() * 0.8f +
decoder.maxCharsPerByte() * 0.2f;
CharBuffer dest = CharBuffer.
allocate(10 + (int)(inbuf.remaining()*factor));
while (true) {
CoderResult result = decoder.decode(inbuf, dest, true);
dest.flip();
if (result.isUnderflow()) { // done reading
// make sure there is at least one extra character
if (dest.limit() == dest.capacity()) {
dest = CharBuffer.allocate(dest.capacity()+1).put(dest);
dest.flip();
}
return dest;
} else if (result.isOverflow()) { // buffer too small; expand
int newCapacity =
10 + dest.capacity() +
(int)(inbuf.remaining()*decoder.maxCharsPerByte());
dest = CharBuffer.allocate(newCapacity).put(dest);
} else if (result.isMalformed() || result.isUnmappable()) {
// bad character in input
StringBuilder unmappable = new StringBuilder();
int len = result.length();
for (int i = 0; i < len; i++) {
unmappable.append(String.format("%02X", inbuf.get()));
}
String charsetName = charset == null ? encName : charset.name();
log.error(dest.limit(),
Errors.IllegalCharForEncoding(unmappable.toString(), charsetName));
// undo the flip() to prepare the output buffer
// for more translation
dest.position(dest.limit());
dest.limit(dest.capacity());
dest.put((char)0xfffd); // backward compatible
} else {
throw new AssertionError(result);
}
}
// unreached
}
Example 13
Source File: C2BConverter.java From Tomcat7.0.67 with Apache License 2.0 | 4 votes |
/** * Convert the given characters to bytes. * * @param cc char input * @param bc byte output */ public void convert(CharChunk cc, ByteChunk bc) throws IOException { if ((bb == null) || (bb.array() != bc.getBuffer())) { // Create a new byte buffer if anything changed bb = ByteBuffer.wrap(bc.getBuffer(), bc.getEnd(), bc.getBuffer().length - bc.getEnd()); } else { // Initialize the byte buffer bb.limit(bc.getBuffer().length); bb.position(bc.getEnd()); } if ((cb == null) || (cb.array() != cc.getBuffer())) { // Create a new char buffer if anything changed cb = CharBuffer.wrap(cc.getBuffer(), cc.getStart(), cc.getLength()); } else { // Initialize the char buffer cb.limit(cc.getEnd()); cb.position(cc.getStart()); } CoderResult result = null; // Parse leftover if any are present if (leftovers.position() > 0) { int pos = bb.position(); // Loop until one char is encoded or there is a encoder error do { leftovers.put((char) cc.substract()); leftovers.flip(); result = encoder.encode(leftovers, bb, false); leftovers.position(leftovers.limit()); leftovers.limit(leftovers.array().length); } while (result.isUnderflow() && (bb.position() == pos)); if (result.isError() || result.isMalformed()) { result.throwException(); } cb.position(cc.getStart()); leftovers.position(0); } // Do the decoding and get the results into the byte chunk and the char // chunk result = encoder.encode(cb, bb, false); if (result.isError() || result.isMalformed()) { result.throwException(); } else if (result.isOverflow()) { // Propagate current positions to the byte chunk and char chunk bc.setEnd(bb.position()); cc.setOffset(cb.position()); } else if (result.isUnderflow()) { // Propagate current positions to the byte chunk and char chunk bc.setEnd(bb.position()); cc.setOffset(cb.position()); // Put leftovers in the leftovers char buffer if (cc.getLength() > 0) { leftovers.limit(leftovers.array().length); leftovers.position(cc.getLength()); cc.substract(leftovers.array(), 0, cc.getLength()); } } }
Example 14
Source File: B2CConverter.java From Tomcat7.0.67 with Apache License 2.0 | 4 votes |
/** * Convert the given bytes to characters. * * @param bc byte input * @param cc char output * @param endOfInput Is this all of the available data */ public void convert(ByteChunk bc, CharChunk cc, boolean endOfInput) throws IOException { if ((bb == null) || (bb.array() != bc.getBuffer())) { // Create a new byte buffer if anything changed bb = ByteBuffer.wrap(bc.getBuffer(), bc.getStart(), bc.getLength()); } else { // Initialize the byte buffer bb.limit(bc.getEnd()); bb.position(bc.getStart()); } if ((cb == null) || (cb.array() != cc.getBuffer())) { // Create a new char buffer if anything changed cb = CharBuffer.wrap(cc.getBuffer(), cc.getEnd(), cc.getBuffer().length - cc.getEnd()); } else { // Initialize the char buffer cb.limit(cc.getBuffer().length); cb.position(cc.getEnd()); } CoderResult result = null; // Parse leftover if any are present if (leftovers.position() > 0) { int pos = cb.position(); // Loop until one char is decoded or there is a decoder error do { leftovers.put(bc.substractB()); leftovers.flip(); result = decoder.decode(leftovers, cb, endOfInput); leftovers.position(leftovers.limit()); leftovers.limit(leftovers.array().length); } while (result.isUnderflow() && (cb.position() == pos)); if (result.isError() || result.isMalformed()) { result.throwException(); } bb.position(bc.getStart()); leftovers.position(0); } // Do the decoding and get the results into the byte chunk and the char // chunk result = decoder.decode(bb, cb, endOfInput); if (result.isError() || result.isMalformed()) { result.throwException(); } else if (result.isOverflow()) { // Propagate current positions to the byte chunk and char chunk, if // this continues the char buffer will get resized bc.setOffset(bb.position()); cc.setEnd(cb.position()); } else if (result.isUnderflow()) { // Propagate current positions to the byte chunk and char chunk bc.setOffset(bb.position()); cc.setEnd(cb.position()); // Put leftovers in the leftovers byte buffer if (bc.getLength() > 0) { leftovers.limit(leftovers.array().length); leftovers.position(bc.getLength()); bc.substract(leftovers.array(), 0, bc.getLength()); } } }
Example 15
Source File: FastMatcher.java From netbeans with Apache License 2.0 | 4 votes |
@Override
public char charAt(int index) {
if (index < lastIndex) {
returns++;
}
lastIndex = index;
if (index > length()) {
throw new IndexOutOfBoundsException();
}
if (isInBuffer(index)) {
return getFromBuffer(index);
} else {
if (index < currentStart || currentStart == -1) {
reset();
}
retrieves++;
MappedByteBuffer mappedByteBuffer = null;
try {
while (readBytes < fileSize) {
try {
mappedByteBuffer = fileChannel.map(
FileChannel.MapMode.READ_ONLY,
readBytes,
Math.min(SIZE_LIMIT, fileSize - readBytes));
maps++;
CoderResult result;
do {
currentStart = currentStart == -1 ? 0
: currentStart + currentBuffer.limit();
currentBuffer.clear();
result = currentDecoder.decode(mappedByteBuffer,
currentBuffer,
readBytes + SIZE_LIMIT >= fileSize);
currentBuffer.flip();
int readChars = currentBuffer.limit();
if (currentStart + readChars > index) {
return getFromBuffer(index);
}
if (result.isUnmappable() || result.isMalformed()
|| result.isError()) {
throw new IOException("Error decoding file: "
+ result.toString() + " ");
}
} while (result.isOverflow());
} finally {
if (mappedByteBuffer != null) {
int readNow = mappedByteBuffer.position();
readBytes += readNow;
unmap(mappedByteBuffer);
}
}
}
boolean repeat;
do {
repeat = currentDecoder.flush(currentBuffer).isOverflow();
int size = currentBuffer.position();
if (size + currentStart > index) {
currentBuffer.flip();
return currentBuffer.get(index - currentStart);
}
currentBuffer.clear();
currentStart += size;
} while (repeat);
} catch (IOException ex) {
if (mappedByteBuffer != null) {
unmap(mappedByteBuffer);
}
Exceptions.printStackTrace(ex);
}
}
throw new IllegalStateException(
"Cannot get character."); //NOI18N
}
Example 16
Source File: FastMatcher.java From netbeans with Apache License 2.0 | 4 votes |
/**
* Compute lenght of this sequence - quite expensive operation, indeed.
*/
@Override
public int length() {
if (length != -1) {
return length;
}
long start = System.currentTimeMillis();
int charactersRead = 0;
long bytesRead = 0;
MappedByteBuffer mappedByteBuffer = null;
CharBuffer charBuffer = CharBuffer.allocate(SIZE_LIMIT);
CharsetDecoder decoder = prepareDecoder(charset);
decoder.onUnmappableCharacter(CodingErrorAction.IGNORE);
try {
while (bytesRead < fileSize) {
mappedByteBuffer = fileChannel.map(
FileChannel.MapMode.READ_ONLY, bytesRead,
Math.min(SIZE_LIMIT, fileSize - bytesRead));
CoderResult result;
do {
charBuffer.clear();
result = decoder.decode(
mappedByteBuffer, charBuffer,
bytesRead + SIZE_LIMIT >= fileSize);
if (result.isUnmappable() || result.isMalformed()
|| result.isError()) {
throw new IOException("Error decoding file: "
+ result.toString() + " ");
}
if (bytesRead + SIZE_LIMIT >= fileSize) {
LOG.info("Coding end");
}
charactersRead += charBuffer.position();
} while (result.isOverflow());
int readNow = mappedByteBuffer.position();
bytesRead += readNow;
unmap(mappedByteBuffer);
}
charBuffer.clear();
boolean repeat;
do {
repeat = decoder.flush(charBuffer).isOverflow();
charactersRead += charBuffer.position();
charBuffer.clear();
} while (repeat);
} catch (IOException ex) {
if (mappedByteBuffer != null) {
unmap(mappedByteBuffer);
}
Exceptions.printStackTrace(ex);
}
length = charactersRead;
LOG.log(Level.INFO, "Length computed in {0} ms.", //NOI18N
System.currentTimeMillis() - start);
return length;
}
Example 17
Source File: BaseFileManager.java From lua-for-android with BSD 3-Clause "New" or "Revised" License | 4 votes |
public CharBuffer decode(ByteBuffer inbuf, boolean ignoreEncodingErrors) {
String encName = getEncodingName();
CharsetDecoder decoder;
try {
decoder = getDecoder(encName, ignoreEncodingErrors);
} catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
log.error(Errors.UnsupportedEncoding(encName));
return (CharBuffer) CharBuffer.allocate(1).flip();
}
// slightly overestimate the buffer size to avoid reallocation.
float factor =
decoder.averageCharsPerByte() * 0.8f +
decoder.maxCharsPerByte() * 0.2f;
CharBuffer dest = CharBuffer.
allocate(10 + (int)(inbuf.remaining()*factor));
while (true) {
CoderResult result = decoder.decode(inbuf, dest, true);
dest.flip();
if (result.isUnderflow()) { // done reading
// make sure there is at least one extra character
if (dest.limit() == dest.capacity()) {
dest = CharBuffer.allocate(dest.capacity()+1).put(dest);
dest.flip();
}
return dest;
} else if (result.isOverflow()) { // buffer too small; expand
int newCapacity =
10 + dest.capacity() +
(int)(inbuf.remaining()*decoder.maxCharsPerByte());
dest = CharBuffer.allocate(newCapacity).put(dest);
} else if (result.isMalformed() || result.isUnmappable()) {
// bad character in input
StringBuilder unmappable = new StringBuilder();
int len = result.length();
for (int i = 0; i < len; i++) {
unmappable.append(String.format("%02X", inbuf.get()));
}
String charsetName = charset == null ? encName : charset.name();
log.error(dest.limit(),
Errors.IllegalCharForEncoding(unmappable.toString(), charsetName));
// undo the flip() to prepare the output buffer
// for more translation
dest.position(dest.limit());
dest.limit(dest.capacity());
dest.put((char)0xfffd); // backward compatible
} else {
throw new AssertionError(result);
}
}
// unreached
}
Example 18
Source File: C2BConverter.java From Tomcat8-Source-Read with MIT License | 4 votes |
/** * Convert the given characters to bytes. * * @param cc char input * @param bc byte output * @throws IOException An encoding error occurred */ public void convert(CharBuffer cc, ByteBuffer bc) throws IOException { if ((bb == null) || (bb.array() != bc.array())) { // Create a new byte buffer if anything changed bb = ByteBuffer.wrap(bc.array(), bc.limit(), bc.capacity() - bc.limit()); } else { // Initialize the byte buffer bb.limit(bc.capacity()); bb.position(bc.limit()); } if ((cb == null) || (cb.array() != cc.array())) { // Create a new char buffer if anything changed cb = CharBuffer.wrap(cc.array(), cc.arrayOffset() + cc.position(), cc.remaining()); } else { // Initialize the char buffer cb.limit(cc.limit()); cb.position(cc.position()); } CoderResult result = null; // Parse leftover if any are present if (leftovers.position() > 0) { int pos = bb.position(); // Loop until one char is encoded or there is a encoder error do { leftovers.put(cc.get()); leftovers.flip(); result = encoder.encode(leftovers, bb, false); leftovers.position(leftovers.limit()); leftovers.limit(leftovers.array().length); } while (result.isUnderflow() && (bb.position() == pos)); if (result.isError() || result.isMalformed()) { result.throwException(); } cb.position(cc.position()); leftovers.position(0); } // Do the decoding and get the results into the byte chunk and the char // chunk result = encoder.encode(cb, bb, false); if (result.isError() || result.isMalformed()) { result.throwException(); } else if (result.isOverflow()) { // Propagate current positions to the byte chunk and char chunk bc.limit(bb.position()); cc.position(cb.position()); } else if (result.isUnderflow()) { // Propagate current positions to the byte chunk and char chunk bc.limit(bb.position()); cc.position(cb.position()); // Put leftovers in the leftovers char buffer if (cc.remaining() > 0) { leftovers.limit(leftovers.array().length); leftovers.position(cc.remaining()); cc.get(leftovers.array(), 0, cc.remaining()); } } }
Example 19
Source File: C2BConverter.java From Tomcat8-Source-Read with MIT License | 4 votes |
/** * Convert the given characters to bytes. * * @param cc char input * @param bc byte output * @throws IOException An encoding error occurred */ public void convert(CharChunk cc, ByteChunk bc) throws IOException { if ((bb == null) || (bb.array() != bc.getBuffer())) { // Create a new byte buffer if anything changed bb = ByteBuffer.wrap(bc.getBuffer(), bc.getEnd(), bc.getBuffer().length - bc.getEnd()); } else { // Initialize the byte buffer bb.limit(bc.getBuffer().length); bb.position(bc.getEnd()); } if ((cb == null) || (cb.array() != cc.getBuffer())) { // Create a new char buffer if anything changed cb = CharBuffer.wrap(cc.getBuffer(), cc.getStart(), cc.getLength()); } else { // Initialize the char buffer cb.limit(cc.getEnd()); cb.position(cc.getStart()); } CoderResult result = null; // Parse leftover if any are present if (leftovers.position() > 0) { int pos = bb.position(); // Loop until one char is encoded or there is a encoder error do { leftovers.put((char) cc.substract()); leftovers.flip(); result = encoder.encode(leftovers, bb, false); leftovers.position(leftovers.limit()); leftovers.limit(leftovers.array().length); } while (result.isUnderflow() && (bb.position() == pos)); if (result.isError() || result.isMalformed()) { result.throwException(); } cb.position(cc.getStart()); leftovers.position(0); } // Do the decoding and get the results into the byte chunk and the char // chunk result = encoder.encode(cb, bb, false); if (result.isError() || result.isMalformed()) { result.throwException(); } else if (result.isOverflow()) { // Propagate current positions to the byte chunk and char chunk bc.setEnd(bb.position()); cc.setOffset(cb.position()); } else if (result.isUnderflow()) { // Propagate current positions to the byte chunk and char chunk bc.setEnd(bb.position()); cc.setOffset(cb.position()); // Put leftovers in the leftovers char buffer if (cc.getLength() > 0) { leftovers.limit(leftovers.array().length); leftovers.position(cc.getLength()); cc.substract(leftovers.array(), 0, cc.getLength()); } } }
Example 20
Source File: Speller.java From morfologik-stemming with BSD 3-Clause "New" or "Revised" License | 4 votes |
private void findRepl(List<CandidateData> candidates, final int depth, final int node, final byte[] prevBytes, final int wordIndex, final int candIndex) {
int dist = 0;
for (int arc = fsa.getFirstArc(node); arc != 0; arc = fsa.getNextArc(arc)) {
byteBuffer = BufferUtils.clearAndEnsureCapacity(byteBuffer, prevBytes.length + 1);
byteBuffer.put(prevBytes);
byteBuffer.put(fsa.getArcLabel(arc));
final int bufPos = byteBuffer.position();
byteBuffer.flip();
decoder.reset();
// FIXME: this isn't correct -- no checks for overflows, no decoder flush. I don't think this should be in here
// too, the decoder should run once on accumulated temporary byte buffer (current path) only when there's
// a potential that this buffer can become a replacement candidate (isEndOfCandidate). Because we assume candidates
// are valid input strings (this is verified when building the dictionary), it's save a lot of conversions.
final CoderResult c = decoder.decode(byteBuffer, charBuffer, true);
if (c.isMalformed()) { // assume that only valid
// encodings are there
final byte[] prev = new byte[bufPos];
byteBuffer.position(0);
byteBuffer.get(prev);
if (!fsa.isArcTerminal(arc)) {
findRepl(candidates, depth, fsa.getEndNode(arc), prev, wordIndex, candIndex); // note: depth is not incremented
}
byteBuffer.clear();
} else if (!c.isError()) { // unmappable characters are silently discarded
charBuffer.flip();
candidate[candIndex] = charBuffer.get();
charBuffer.clear();
byteBuffer.clear();
int lengthReplacement;
// replacement "any to two"
if ((lengthReplacement = matchAnyToTwo(wordIndex, candIndex)) > 0) {
// the replacement takes place at the end of the candidate
if (isEndOfCandidate(arc, wordIndex) && (dist = hMatrix.get(depth - 1, depth - 1)) <= effectEditDistance) {
if (Math.abs(wordLen - 1 - (wordIndex + lengthReplacement - 2)) > 0) {
// there are extra letters in the word after the replacement
dist = dist + Math.abs(wordLen - 1 - (wordIndex + lengthReplacement - 2));
}
if (dist <= effectEditDistance) {
candidates.add(new CandidateData(String.valueOf(candidate, 0, candIndex + 1), dist));
}
}
if (isArcNotTerminal(arc, candIndex)) {
int x = hMatrix.get(depth, depth);
hMatrix.set(depth, depth, hMatrix.get(depth - 1, depth - 1));
findRepl(candidates, Math.max(0, depth), fsa.getEndNode(arc), new byte[0], wordIndex + lengthReplacement - 1,
candIndex + 1);
hMatrix.set(depth, depth, x);
}
}
//replacement "any to one"
if ((lengthReplacement = matchAnyToOne(wordIndex, candIndex)) > 0) {
// the replacement takes place at the end of the candidate
if (isEndOfCandidate(arc, wordIndex) && (dist = hMatrix.get(depth, depth)) <= effectEditDistance) {
if (Math.abs(wordLen - 1 - (wordIndex + lengthReplacement - 1)) > 0) {
// there are extra letters in the word after the replacement
dist = dist + Math.abs(wordLen - 1 - (wordIndex + lengthReplacement - 1));
}
if (dist <= effectEditDistance) {
candidates.add(new CandidateData(String.valueOf(candidate, 0, candIndex + 1), dist));
}
}
if (isArcNotTerminal(arc, candIndex)) {
findRepl(candidates, depth, fsa.getEndNode(arc), new byte[0], wordIndex + lengthReplacement, candIndex + 1);
}
}
//general
if (cuted(depth, wordIndex, candIndex) <= effectEditDistance) {
if ((isEndOfCandidate(arc, wordIndex))
&& (dist = ed(wordLen - 1 - (wordIndex - depth), depth, wordLen - 1, candIndex)) <= effectEditDistance) {
candidates.add(new CandidateData(String.valueOf(candidate, 0, candIndex + 1), dist));
}
if (isArcNotTerminal(arc, candIndex)) {
findRepl(candidates, depth + 1, fsa.getEndNode(arc), new byte[0], wordIndex + 1, candIndex + 1);
}
}
}
}
}