java.nio.charset.CoderResult#isMalformed

Source File: BasicURLCanonicalizer.java From webarchive-commons with Apache License 2.0

6 votes

/**
 * Decodes bytes in bbuf as utf-8 and appends decoded characters to sb. If
 * decoding of any portion fails, appends the un-decodable %xx%xx sequence
 * extracted from inputStr instead of decoded characters. See "bad unicode"
 * tests in GoogleCanonicalizerTest#testDecode(). Variables only make sense
 * within context of {@link #decode(String)}.
 * 
 * @param sb
 *            StringBuilder to append to
 * @param bbuf
 *            raw bytes decoded from %-encoded input
 * @param inputStr
 *            full input string
 * @param seqStart
 *            start index inclusive within inputStr of %-encoded sequence
 * @param seqEnd
 *            end index exclusive within inputStr of %-encoded sequence
 * @param utf8decoder
 */
private void appendDecodedPctUtf8(StringBuilder sb, ByteBuffer bbuf,
		String inputStr, int seqStart, int seqEnd,
		CharsetDecoder utf8decoder) {
	// assert bbuf.position() * 3 == seqEnd - seqStart;
	utf8decoder.reset();
	CharBuffer cbuf = CharBuffer.allocate(bbuf.position());
	bbuf.flip();
	while (bbuf.position() < bbuf.limit()) {
		CoderResult coderResult = utf8decoder.decode(bbuf, cbuf, true);
		sb.append(cbuf.flip());
		if (coderResult.isMalformed()) {
			// put the malformed %xx%xx into the result un-decoded
			CharSequence undecodablePctHex = inputStr.subSequence(seqStart
					+ 3 * bbuf.position(), seqStart + 3 * bbuf.position()
					+ 3 * coderResult.length());
			sb.append(undecodablePctHex);

			// there could be more good stuff after the bad
			bbuf.position(bbuf.position() + coderResult.length());
		}
		cbuf.clear();
	}
}

Source File: BinaryTruncator.java From parquet-mr with Apache License 2.0

6 votes

Validity checkValidity(ByteBuffer buffer) {
  int pos = buffer.position();
  CoderResult result = CoderResult.OVERFLOW;
  while (result.isOverflow()) {
    dummyBuffer.clear();
    result = decoder.decode(buffer, dummyBuffer, true);
  }
  buffer.position(pos);
  if (result.isUnderflow()) {
    return Validity.VALID;
  } else if (result.isMalformed()) {
    return Validity.MALFORMED;
  } else {
    return Validity.UNMAPPABLE;
  }
}

Source File: ResettableFileInputStream.java From mt-flume with Apache License 2.0

5 votes

@Override
public synchronized int readChar() throws IOException {
  if (!buf.hasRemaining()) {
    refillBuf();
  }

  int start = buf.position();
  charBuf.clear();

  boolean isEndOfInput = false;
  if (position >= fileSize) {
    isEndOfInput = true;
  }

  CoderResult res = decoder.decode(buf, charBuf, isEndOfInput);
  if (res.isMalformed() || res.isUnmappable()) {
    res.throwException();
  }

  int delta = buf.position() - start;

  charBuf.flip();
  if (charBuf.hasRemaining()) {
    char c = charBuf.get();
    // don't increment the persisted location if we are in between a
    // surrogate pair, otherwise we may never recover if we seek() to this
    // location!
    incrPosition(delta, !Character.isHighSurrogate(c));
    return c;

  // there may be a partial character in the decoder buffer
  } else {
    incrPosition(delta, false);
    return -1;
  }

}

Source File: UrlCanonicalizer.java From outbackcdx with Apache License 2.0

5 votes

private static void tryDecodeUtf8(ByteBuffer bb, StringBuilder out) {
    CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder();
    CharBuffer cb = CharBuffer.allocate(bb.remaining());
    while (bb.hasRemaining()) {
        CoderResult result = decoder.decode(bb, cb, true);
        if (result.isMalformed()) {
            for (int i = 0; i < result.length(); i++) {
                out.append('%').append(String.format("%02x", bb.get()));
            }
        }
        out.append(cb.flip());
        cb.clear();
    }
}

Source File: MboxIterator.java From sling-samples with Apache License 2.0

5 votes

private void decodeNextCharBuffer() throws CharConversionException {
    CoderResult coderResult = DECODER.decode(byteBuffer, mboxCharBuffer, endOfInputFlag);
    updateEndOfInputFlag();
    mboxCharBuffer.flip();
    if (coderResult.isError()) {
        if (coderResult.isMalformed()) {
            throw new CharConversionException("Malformed input!");
        } else if (coderResult.isUnmappable()) {
            throw new CharConversionException("Unmappable character!");
        }
    }
}

Source File: InputStreamReader.java From openjdk-jdk9 with GNU General Public License v2.0

4 votes

/**
 * Reads at most {@code length} characters from this reader and stores them
 * at position {@code offset} in the character array {@code buf}. Returns
 * the number of characters actually read or -1 if the end of the reader has
 * been reached. The bytes are either obtained from converting bytes in this
 * reader's buffer or by first filling the buffer from the source
 * InputStream and then reading from the buffer.
 *
 * @param buf
 *            the array to store the characters read.
 * @param offset
 *            the initial position in {@code buf} to store the characters
 *            read from this reader.
 * @param length
 *            the maximum number of characters to read.
 * @return the number of characters read or -1 if the end of the reader has
 *         been reached.
 * @throws IndexOutOfBoundsException
 *             if {@code offset < 0} or {@code length < 0}, or if
 *             {@code offset + length} is greater than the length of
 *             {@code buf}.
 * @throws IOException
 *             if this reader is closed or some other I/O error occurs.
 */
@Override
public int read(char[] buf, int offset, int length) throws IOException {
    synchronized (lock) {
        if (!isOpen()) {
            throw new IOException("InputStreamReader is closed.");
        }
        if (offset < 0 || offset > buf.length - length || length < 0) {
            throw new IndexOutOfBoundsException();
        }
        if (length == 0) {
            return 0;
        }

        CharBuffer out = CharBuffer.wrap(buf, offset, length);
        CoderResult result = CoderResult.UNDERFLOW;

        // bytes.remaining() indicates number of bytes in buffer
        // when 1-st time entered, it'll be equal to zero
        boolean needInput = !bytes.hasRemaining();

        while (out.hasRemaining()) {
            // fill the buffer if needed
            if (needInput) {
                try {
                    if ((in.available() == 0)
                        && (out.position() > offset)) {
                        // we could return the result without blocking read
                        break;
                    }
                } catch (IOException e) {
                    // available didn't work so just try the read
                }

                int to_read = bytes.capacity() - bytes.limit();
                int off = bytes.arrayOffset() + bytes.limit();
                int was_red = in.read(bytes.array(), off, to_read);

                if (was_red == -1) {
                    endOfInput = true;
                    break;
                } else if (was_red == 0) {
                    break;
                }
                bytes.limit(bytes.limit() + was_red);
                needInput = false;
            }

            // decode bytes
            result = decoder.decode(bytes, out, false);

            if (result.isUnderflow()) {
                // compact the buffer if no space left
                if (bytes.limit() == bytes.capacity()) {
                    bytes.compact();
                    bytes.limit(bytes.position());
                    bytes.position(0);
                }
                needInput = true;
            } else {
                break;
            }
        }

        if (result == CoderResult.UNDERFLOW && endOfInput) {
            result = decoder.decode(bytes, out, true);
            decoder.flush(out);
            decoder.reset();
        }
        if (result.isMalformed()) {
            throw new MalformedInputException(result.length());
        } else if (result.isUnmappable()) {
            throw new UnmappableCharacterException(result.length());
        }

        return out.position() - offset == 0 ? -1 : out.position() - offset;
    }
}

Source File: InputStreamReader.java From jtransc with Apache License 2.0

4 votes

/**
 * Reads up to {@code count} characters from this reader and stores them
 * at position {@code offset} in the character array {@code buffer}. Returns
 * the number of characters actually read or -1 if the end of the reader has
 * been reached. The bytes are either obtained from converting bytes in this
 * reader's buffer or by first filling the buffer from the source
 * InputStream and then reading from the buffer.
 *
 * @throws IndexOutOfBoundsException
 *     if {@code offset < 0 || count < 0 || offset + count > buffer.length}.
 * @throws IOException
 *             if this reader is closed or some other I/O error occurs.
 */
@Override
public int read(char[] buffer, int offset, int count) throws IOException {
    synchronized (lock) {
        if (!isOpen()) {
            throw new IOException("InputStreamReader is closed");
        }

        JTranscArrays.checkOffsetAndCount(buffer.length, offset, count);
        if (count == 0) {
            return 0;
        }

        CharBuffer out = CharBuffer.wrap(buffer, offset, count);
        CoderResult result = CoderResult.UNDERFLOW;

        // bytes.remaining() indicates number of bytes in buffer
        // when 1-st time entered, it'll be equal to zero
        boolean needInput = !bytes.hasRemaining();

        while (out.hasRemaining()) {
            // fill the buffer if needed
            if (needInput) {
                try {
                    if (in.available() == 0 && out.position() > offset) {
                        // we could return the result without blocking read
                        break;
                    }
                } catch (IOException e) {
                    // available didn't work so just try the read
                }

                int desiredByteCount = bytes.capacity() - bytes.limit();
                int off = bytes.arrayOffset() + bytes.limit();
                int actualByteCount = in.read(bytes.array(), off, desiredByteCount);

                if (actualByteCount == -1) {
                    endOfInput = true;
                    break;
                } else if (actualByteCount == 0) {
                    break;
                }
                bytes.limit(bytes.limit() + actualByteCount);
                needInput = false;
            }

            // decode bytes
            result = decoder.decode(bytes, out, false);

            if (result.isUnderflow()) {
                // compact the buffer if no space left
                if (bytes.limit() == bytes.capacity()) {
                    bytes.compact();
                    bytes.limit(bytes.position());
                    bytes.position(0);
                }
                needInput = true;
            } else {
                break;
            }
        }

        if (result == CoderResult.UNDERFLOW && endOfInput) {
            result = decoder.decode(bytes, out, true);
            decoder.flush(out);
            decoder.reset();
        }
        if (result.isMalformed() || result.isUnmappable()) {
            result.throwException();
        }

        return out.position() - offset == 0 ? -1 : out.position() - offset;
    }
}

Source File: C2BConverter.java From tomcatsrc with Apache License 2.0

4 votes

/**
 * Convert the given characters to bytes.
 * 
 * @param cc char input
 * @param bc byte output
 */
public void convert(CharChunk cc, ByteChunk bc) 
        throws IOException {
    if ((bb == null) || (bb.array() != bc.getBuffer())) {
        // Create a new byte buffer if anything changed
        bb = ByteBuffer.wrap(bc.getBuffer(), bc.getEnd(), 
                bc.getBuffer().length - bc.getEnd());
    } else {
        // Initialize the byte buffer
        bb.limit(bc.getBuffer().length);
        bb.position(bc.getEnd());
    }
    if ((cb == null) || (cb.array() != cc.getBuffer())) {
        // Create a new char buffer if anything changed
        cb = CharBuffer.wrap(cc.getBuffer(), cc.getStart(), 
                cc.getLength());
    } else {
        // Initialize the char buffer
        cb.limit(cc.getEnd());
        cb.position(cc.getStart());
    }
    CoderResult result = null;
    // Parse leftover if any are present
    if (leftovers.position() > 0) {
        int pos = bb.position();
        // Loop until one char is encoded or there is a encoder error
        do {
            leftovers.put((char) cc.substract());
            leftovers.flip();
            result = encoder.encode(leftovers, bb, false);
            leftovers.position(leftovers.limit());
            leftovers.limit(leftovers.array().length);
        } while (result.isUnderflow() && (bb.position() == pos));
        if (result.isError() || result.isMalformed()) {
            result.throwException();
        }
        cb.position(cc.getStart());
        leftovers.position(0);
    }
    // Do the decoding and get the results into the byte chunk and the char
    // chunk
    result = encoder.encode(cb, bb, false);
    if (result.isError() || result.isMalformed()) {
        result.throwException();
    } else if (result.isOverflow()) {
        // Propagate current positions to the byte chunk and char chunk
        bc.setEnd(bb.position());
        cc.setOffset(cb.position());
    } else if (result.isUnderflow()) {
        // Propagate current positions to the byte chunk and char chunk
        bc.setEnd(bb.position());
        cc.setOffset(cb.position());
        // Put leftovers in the leftovers char buffer
        if (cc.getLength() > 0) {
            leftovers.limit(leftovers.array().length);
            leftovers.position(cc.getLength());
            cc.substract(leftovers.array(), 0, cc.getLength());
        }
    }
}

Source File: B2CConverter.java From tomcatsrc with Apache License 2.0

4 votes

/**
 * Convert the given bytes to characters.
 * 
 * @param bc byte input
 * @param cc char output
 * @param endOfInput    Is this all of the available data
 */
public void convert(ByteChunk bc, CharChunk cc, boolean endOfInput)
        throws IOException {
    if ((bb == null) || (bb.array() != bc.getBuffer())) {
        // Create a new byte buffer if anything changed
        bb = ByteBuffer.wrap(bc.getBuffer(), bc.getStart(), bc.getLength());
    } else {
        // Initialize the byte buffer
        bb.limit(bc.getEnd());
        bb.position(bc.getStart());
    }
    if ((cb == null) || (cb.array() != cc.getBuffer())) {
        // Create a new char buffer if anything changed
        cb = CharBuffer.wrap(cc.getBuffer(), cc.getEnd(), 
                cc.getBuffer().length - cc.getEnd());
    } else {
        // Initialize the char buffer
        cb.limit(cc.getBuffer().length);
        cb.position(cc.getEnd());
    }
    CoderResult result = null;
    // Parse leftover if any are present
    if (leftovers.position() > 0) {
        int pos = cb.position();
        // Loop until one char is decoded or there is a decoder error
        do {
            leftovers.put(bc.substractB());
            leftovers.flip();
            result = decoder.decode(leftovers, cb, endOfInput);
            leftovers.position(leftovers.limit());
            leftovers.limit(leftovers.array().length);
        } while (result.isUnderflow() && (cb.position() == pos));
        if (result.isError() || result.isMalformed()) {
            result.throwException();
        }
        bb.position(bc.getStart());
        leftovers.position(0);
    }
    // Do the decoding and get the results into the byte chunk and the char
    // chunk
    result = decoder.decode(bb, cb, endOfInput);
    if (result.isError() || result.isMalformed()) {
        result.throwException();
    } else if (result.isOverflow()) {
        // Propagate current positions to the byte chunk and char chunk, if
        // this continues the char buffer will get resized
        bc.setOffset(bb.position());
        cc.setEnd(cb.position());
    } else if (result.isUnderflow()) {
        // Propagate current positions to the byte chunk and char chunk
        bc.setOffset(bb.position());
        cc.setEnd(cb.position());
        // Put leftovers in the leftovers byte buffer
        if (bc.getLength() > 0) {
            leftovers.limit(leftovers.array().length);
            leftovers.position(bc.getLength());
            bc.substract(leftovers.array(), 0, bc.getLength());
        }
    }
}

Source File: InputStreamReader.java From TorrentEngine with GNU General Public License v3.0

4 votes

/**
 * Reads at most {@code length} characters from this reader and stores them
 * at position {@code offset} in the character array {@code buf}. Returns
 * the number of characters actually read or -1 if the end of the reader has
 * been reached. The bytes are either obtained from converting bytes in this
 * reader's buffer or by first filling the buffer from the source
 * InputStream and then reading from the buffer.
 * 
 * @param buf
 *            the array to store the characters read.
 * @param offset
 *            the initial position in {@code buf} to store the characters
 *            read from this reader.
 * @param length
 *            the maximum number of characters to read.
 * @return the number of characters read or -1 if the end of the reader has
 *         been reached.
 * @throws IndexOutOfBoundsException
 *             if {@code offset < 0} or {@code length < 0}, or if
 *             {@code offset + length} is greater than the length of
 *             {@code buf}.
 * @throws IOException
 *             if this reader is closed or some other I/O error occurs.
 */
@Override
public int read(char[] buf, int offset, int length) throws IOException {
    synchronized (lock) {
        if (!isOpen()) {
            throw new IOException("InputStreamReader is closed.");
        }
        if (offset < 0 || offset > buf.length - length || length < 0) {
            throw new IndexOutOfBoundsException();
        }
        if (length == 0) {
            return 0;
        }

        CharBuffer out = CharBuffer.wrap(buf, offset, length);
        CoderResult result = CoderResult.UNDERFLOW;

        // bytes.remaining() indicates number of bytes in buffer
        // when 1-st time entered, it'll be equal to zero
        boolean needInput = !bytes.hasRemaining();

        while (out.hasRemaining()) {
            // fill the buffer if needed
            if (needInput) {
                try {
                    if ((in.available() == 0) 
                        && (out.position() > offset)) {
                        // we could return the result without blocking read
                        break;
                    }
                } catch (IOException e) {
                    // available didn't work so just try the read
                }

                int to_read = bytes.capacity() - bytes.limit();
                int off = bytes.arrayOffset() + bytes.limit();
                int was_red = in.read(bytes.array(), off, to_read);

                if (was_red == -1) {
                    endOfInput = true;
                    break;
                } else if (was_red == 0) {
                    break;
                }
                bytes.limit(bytes.limit() + was_red);
                needInput = false;
            }

            // decode bytes
            result = decoder.decode(bytes, out, false);

            if (result.isUnderflow()) {
                // compact the buffer if no space left
                if (bytes.limit() == bytes.capacity()) {
                    bytes.compact();
                    bytes.limit(bytes.position());
                    bytes.position(0);
                }
                needInput = true;
            } else {
                break;
            }
        }

        if (result == CoderResult.UNDERFLOW && endOfInput) {
            result = decoder.decode(bytes, out, true);
            decoder.flush(out);
            decoder.reset();
        }
        if (result.isMalformed()) {
            throw new MalformedInputException(result.length());
        } else if (result.isUnmappable()) {
            throw new UnmappableCharacterException(result.length());
        }

        return out.position() - offset == 0 ? -1 : out.position() - offset;
    }
}

Source File: B2CConverter.java From Tomcat8-Source-Read with MIT License

4 votes

/**
 * Convert the given bytes to characters.
 *
 * @param bc byte input
 * @param cc char output
 * @param endOfInput    Is this all of the available data
 *
 * @throws IOException If the conversion can not be completed
 */
public void convert(ByteChunk bc, CharChunk cc, boolean endOfInput)
        throws IOException {
    if ((bb == null) || (bb.array() != bc.getBuffer())) {
        // Create a new byte buffer if anything changed
        bb = ByteBuffer.wrap(bc.getBuffer(), bc.getStart(), bc.getLength());
    } else {
        // Initialize the byte buffer
        bb.limit(bc.getEnd());
        bb.position(bc.getStart());
    }
    if ((cb == null) || (cb.array() != cc.getBuffer())) {
        // Create a new char buffer if anything changed
        cb = CharBuffer.wrap(cc.getBuffer(), cc.getEnd(),
                cc.getBuffer().length - cc.getEnd());
    } else {
        // Initialize the char buffer
        cb.limit(cc.getBuffer().length);
        cb.position(cc.getEnd());
    }
    CoderResult result = null;
    // Parse leftover if any are present
    if (leftovers.position() > 0) {
        int pos = cb.position();
        // Loop until one char is decoded or there is a decoder error
        do {
            leftovers.put(bc.substractB());
            leftovers.flip();
            result = decoder.decode(leftovers, cb, endOfInput);
            leftovers.position(leftovers.limit());
            leftovers.limit(leftovers.array().length);
        } while (result.isUnderflow() && (cb.position() == pos));
        if (result.isError() || result.isMalformed()) {
            result.throwException();
        }
        bb.position(bc.getStart());
        leftovers.position(0);
    }
    // Do the decoding and get the results into the byte chunk and the char
    // chunk
    result = decoder.decode(bb, cb, endOfInput);
    if (result.isError() || result.isMalformed()) {
        result.throwException();
    } else if (result.isOverflow()) {
        // Propagate current positions to the byte chunk and char chunk, if
        // this continues the char buffer will get resized
        bc.setOffset(bb.position());
        cc.setEnd(cb.position());
    } else if (result.isUnderflow()) {
        // Propagate current positions to the byte chunk and char chunk
        bc.setOffset(bb.position());
        cc.setEnd(cb.position());
        // Put leftovers in the leftovers byte buffer
        if (bc.getLength() > 0) {
            leftovers.limit(leftovers.array().length);
            leftovers.position(bc.getLength());
            bc.substract(leftovers.array(), 0, bc.getLength());
        }
    }
}

Source File: BaseFileManager.java From openjdk-jdk9 with GNU General Public License v2.0

4 votes

@SuppressWarnings("cast")
public CharBuffer decode(ByteBuffer inbuf, boolean ignoreEncodingErrors) {
    String encName = getEncodingName();
    CharsetDecoder decoder;
    try {
        decoder = getDecoder(encName, ignoreEncodingErrors);
    } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
        log.error("unsupported.encoding", encName);
        return (CharBuffer)CharBuffer.allocate(1).flip();
    }

    // slightly overestimate the buffer size to avoid reallocation.
    float factor =
        decoder.averageCharsPerByte() * 0.8f +
        decoder.maxCharsPerByte() * 0.2f;
    CharBuffer dest = CharBuffer.
        allocate(10 + (int)(inbuf.remaining()*factor));

    while (true) {
        CoderResult result = decoder.decode(inbuf, dest, true);
        dest.flip();

        if (result.isUnderflow()) { // done reading
            // make sure there is at least one extra character
            if (dest.limit() == dest.capacity()) {
                dest = CharBuffer.allocate(dest.capacity()+1).put(dest);
                dest.flip();
            }
            return dest;
        } else if (result.isOverflow()) { // buffer too small; expand
            int newCapacity =
                10 + dest.capacity() +
                (int)(inbuf.remaining()*decoder.maxCharsPerByte());
            dest = CharBuffer.allocate(newCapacity).put(dest);
        } else if (result.isMalformed() || result.isUnmappable()) {
            // bad character in input
            StringBuilder unmappable = new StringBuilder();
            int len = result.length();

            for (int i = 0; i < len; i++) {
                unmappable.append(String.format("%02X", inbuf.get()));
            }

            String charsetName = charset == null ? encName : charset.name();

            log.error(dest.limit(),
                      Errors.IllegalCharForEncoding(unmappable.toString(), charsetName));

            // undo the flip() to prepare the output buffer
            // for more translation
            dest.position(dest.limit());
            dest.limit(dest.capacity());
            dest.put((char)0xfffd); // backward compatible
        } else {
            throw new AssertionError(result);
        }
    }
    // unreached
}

Source File: C2BConverter.java From Tomcat7.0.67 with Apache License 2.0

4 votes

/**
 * Convert the given characters to bytes.
 * 
 * @param cc char input
 * @param bc byte output
 */
public void convert(CharChunk cc, ByteChunk bc) 
        throws IOException {
    if ((bb == null) || (bb.array() != bc.getBuffer())) {
        // Create a new byte buffer if anything changed
        bb = ByteBuffer.wrap(bc.getBuffer(), bc.getEnd(), 
                bc.getBuffer().length - bc.getEnd());
    } else {
        // Initialize the byte buffer
        bb.limit(bc.getBuffer().length);
        bb.position(bc.getEnd());
    }
    if ((cb == null) || (cb.array() != cc.getBuffer())) {
        // Create a new char buffer if anything changed
        cb = CharBuffer.wrap(cc.getBuffer(), cc.getStart(), 
                cc.getLength());
    } else {
        // Initialize the char buffer
        cb.limit(cc.getEnd());
        cb.position(cc.getStart());
    }
    CoderResult result = null;
    // Parse leftover if any are present
    if (leftovers.position() > 0) {
        int pos = bb.position();
        // Loop until one char is encoded or there is a encoder error
        do {
            leftovers.put((char) cc.substract());
            leftovers.flip();
            result = encoder.encode(leftovers, bb, false);
            leftovers.position(leftovers.limit());
            leftovers.limit(leftovers.array().length);
        } while (result.isUnderflow() && (bb.position() == pos));
        if (result.isError() || result.isMalformed()) {
            result.throwException();
        }
        cb.position(cc.getStart());
        leftovers.position(0);
    }
    // Do the decoding and get the results into the byte chunk and the char
    // chunk
    result = encoder.encode(cb, bb, false);
    if (result.isError() || result.isMalformed()) {
        result.throwException();
    } else if (result.isOverflow()) {
        // Propagate current positions to the byte chunk and char chunk
        bc.setEnd(bb.position());
        cc.setOffset(cb.position());
    } else if (result.isUnderflow()) {
        // Propagate current positions to the byte chunk and char chunk
        bc.setEnd(bb.position());
        cc.setOffset(cb.position());
        // Put leftovers in the leftovers char buffer
        if (cc.getLength() > 0) {
            leftovers.limit(leftovers.array().length);
            leftovers.position(cc.getLength());
            cc.substract(leftovers.array(), 0, cc.getLength());
        }
    }
}

Source File: B2CConverter.java From Tomcat7.0.67 with Apache License 2.0

4 votes

/**
 * Convert the given bytes to characters.
 * 
 * @param bc byte input
 * @param cc char output
 * @param endOfInput    Is this all of the available data
 */
public void convert(ByteChunk bc, CharChunk cc, boolean endOfInput)
        throws IOException {
    if ((bb == null) || (bb.array() != bc.getBuffer())) {
        // Create a new byte buffer if anything changed
        bb = ByteBuffer.wrap(bc.getBuffer(), bc.getStart(), bc.getLength());
    } else {
        // Initialize the byte buffer
        bb.limit(bc.getEnd());
        bb.position(bc.getStart());
    }
    if ((cb == null) || (cb.array() != cc.getBuffer())) {
        // Create a new char buffer if anything changed
        cb = CharBuffer.wrap(cc.getBuffer(), cc.getEnd(), 
                cc.getBuffer().length - cc.getEnd());
    } else {
        // Initialize the char buffer
        cb.limit(cc.getBuffer().length);
        cb.position(cc.getEnd());
    }
    CoderResult result = null;
    // Parse leftover if any are present
    if (leftovers.position() > 0) {
        int pos = cb.position();
        // Loop until one char is decoded or there is a decoder error
        do {
            leftovers.put(bc.substractB());
            leftovers.flip();
            result = decoder.decode(leftovers, cb, endOfInput);
            leftovers.position(leftovers.limit());
            leftovers.limit(leftovers.array().length);
        } while (result.isUnderflow() && (cb.position() == pos));
        if (result.isError() || result.isMalformed()) {
            result.throwException();
        }
        bb.position(bc.getStart());
        leftovers.position(0);
    }
    // Do the decoding and get the results into the byte chunk and the char
    // chunk
    result = decoder.decode(bb, cb, endOfInput);
    if (result.isError() || result.isMalformed()) {
        result.throwException();
    } else if (result.isOverflow()) {
        // Propagate current positions to the byte chunk and char chunk, if
        // this continues the char buffer will get resized
        bc.setOffset(bb.position());
        cc.setEnd(cb.position());
    } else if (result.isUnderflow()) {
        // Propagate current positions to the byte chunk and char chunk
        bc.setOffset(bb.position());
        cc.setEnd(cb.position());
        // Put leftovers in the leftovers byte buffer
        if (bc.getLength() > 0) {
            leftovers.limit(leftovers.array().length);
            leftovers.position(bc.getLength());
            bc.substract(leftovers.array(), 0, bc.getLength());
        }
    }
}

Source File: FastMatcher.java From netbeans with Apache License 2.0

4 votes

@Override
public char charAt(int index) {

    if (index < lastIndex) {
        returns++;
    }
    lastIndex = index;
    if (index > length()) {
        throw new IndexOutOfBoundsException();
    }
    if (isInBuffer(index)) {
        return getFromBuffer(index);
    } else {
        if (index < currentStart || currentStart == -1) {
            reset();
        }
        retrieves++;
        MappedByteBuffer mappedByteBuffer = null;
        try {
            while (readBytes < fileSize) {
                try {
                    mappedByteBuffer = fileChannel.map(
                            FileChannel.MapMode.READ_ONLY,
                            readBytes,
                            Math.min(SIZE_LIMIT, fileSize - readBytes));
                    maps++;
                    CoderResult result;
                    do {
                        currentStart = currentStart == -1 ? 0
                                : currentStart + currentBuffer.limit();
                        currentBuffer.clear();
                        result = currentDecoder.decode(mappedByteBuffer,
                                currentBuffer,
                                readBytes + SIZE_LIMIT >= fileSize);
                        currentBuffer.flip();
                        int readChars = currentBuffer.limit();
                        if (currentStart + readChars > index) {
                            return getFromBuffer(index);
                        }
                        if (result.isUnmappable() || result.isMalformed()
                                || result.isError()) {
                            throw new IOException("Error decoding file: "
                                    + result.toString() + " ");
                        }
                    } while (result.isOverflow());
                } finally {
                    if (mappedByteBuffer != null) {
                        int readNow = mappedByteBuffer.position();
                        readBytes += readNow;
                        unmap(mappedByteBuffer);
                    }
                }
            }
            boolean repeat;
            do {
                repeat = currentDecoder.flush(currentBuffer).isOverflow();
                int size = currentBuffer.position();
                if (size + currentStart > index) {
                    currentBuffer.flip();
                    return currentBuffer.get(index - currentStart);
                }
                currentBuffer.clear();
                currentStart += size;
            } while (repeat);
        } catch (IOException ex) {
            if (mappedByteBuffer != null) {
                unmap(mappedByteBuffer);
            }
            Exceptions.printStackTrace(ex);
        }
    }

    throw new IllegalStateException(
            "Cannot get character.");   //NOI18N
}

Source File: FastMatcher.java From netbeans with Apache License 2.0

4 votes

/**
 * Compute lenght of this sequence - quite expensive operation, indeed.
 */
@Override
public int length() {
    if (length != -1) {
        return length;
    }
    long start = System.currentTimeMillis();
    int charactersRead = 0;
    long bytesRead = 0;
    MappedByteBuffer mappedByteBuffer = null;
    CharBuffer charBuffer = CharBuffer.allocate(SIZE_LIMIT);
    CharsetDecoder decoder = prepareDecoder(charset);
    decoder.onUnmappableCharacter(CodingErrorAction.IGNORE);

    try {
        while (bytesRead < fileSize) {
            mappedByteBuffer = fileChannel.map(
                    FileChannel.MapMode.READ_ONLY, bytesRead,
                    Math.min(SIZE_LIMIT, fileSize - bytesRead));
            CoderResult result;
            do {
                charBuffer.clear();
                result = decoder.decode(
                        mappedByteBuffer, charBuffer,
                        bytesRead + SIZE_LIMIT >= fileSize);
                if (result.isUnmappable() || result.isMalformed()
                        || result.isError()) {
                    throw new IOException("Error decoding file: "
                            + result.toString() + " ");
                }
                if (bytesRead + SIZE_LIMIT >= fileSize) {
                    LOG.info("Coding end");
                }
                charactersRead += charBuffer.position();
            } while (result.isOverflow());

            int readNow = mappedByteBuffer.position();
            bytesRead += readNow;
            unmap(mappedByteBuffer);
        }
        charBuffer.clear();
        boolean repeat;
        do {
            repeat = decoder.flush(charBuffer).isOverflow();
            charactersRead += charBuffer.position();
            charBuffer.clear();
        } while (repeat);
    } catch (IOException ex) {
        if (mappedByteBuffer != null) {
            unmap(mappedByteBuffer);
        }
        Exceptions.printStackTrace(ex);
    }
    length = charactersRead;
    LOG.log(Level.INFO, "Length computed in {0} ms.", //NOI18N
            System.currentTimeMillis() - start);
    return length;
}

Source File: BaseFileManager.java From lua-for-android with BSD 3-Clause "New" or "Revised" License

4 votes

public CharBuffer decode(ByteBuffer inbuf, boolean ignoreEncodingErrors) {
    String encName = getEncodingName();
    CharsetDecoder decoder;
    try {
        decoder = getDecoder(encName, ignoreEncodingErrors);
    } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
        log.error(Errors.UnsupportedEncoding(encName));
        return (CharBuffer) CharBuffer.allocate(1).flip();
    }

    // slightly overestimate the buffer size to avoid reallocation.
    float factor =
        decoder.averageCharsPerByte() * 0.8f +
        decoder.maxCharsPerByte() * 0.2f;
    CharBuffer dest = CharBuffer.
        allocate(10 + (int)(inbuf.remaining()*factor));

    while (true) {
        CoderResult result = decoder.decode(inbuf, dest, true);
        dest.flip();

        if (result.isUnderflow()) { // done reading
            // make sure there is at least one extra character
            if (dest.limit() == dest.capacity()) {
                dest = CharBuffer.allocate(dest.capacity()+1).put(dest);
                dest.flip();
            }
            return dest;
        } else if (result.isOverflow()) { // buffer too small; expand
            int newCapacity =
                10 + dest.capacity() +
                (int)(inbuf.remaining()*decoder.maxCharsPerByte());
            dest = CharBuffer.allocate(newCapacity).put(dest);
        } else if (result.isMalformed() || result.isUnmappable()) {
            // bad character in input
            StringBuilder unmappable = new StringBuilder();
            int len = result.length();

            for (int i = 0; i < len; i++) {
                unmappable.append(String.format("%02X", inbuf.get()));
            }

            String charsetName = charset == null ? encName : charset.name();

            log.error(dest.limit(),
                      Errors.IllegalCharForEncoding(unmappable.toString(), charsetName));

            // undo the flip() to prepare the output buffer
            // for more translation
            dest.position(dest.limit());
            dest.limit(dest.capacity());
            dest.put((char)0xfffd); // backward compatible
        } else {
            throw new AssertionError(result);
        }
    }
    // unreached
}

Source File: C2BConverter.java From Tomcat8-Source-Read with MIT License

4 votes

/**
 * Convert the given characters to bytes.
 *
 * @param cc char input
 * @param bc byte output
 * @throws IOException An encoding error occurred
 */
public void convert(CharBuffer cc, ByteBuffer bc) throws IOException {
    if ((bb == null) || (bb.array() != bc.array())) {
        // Create a new byte buffer if anything changed
        bb = ByteBuffer.wrap(bc.array(), bc.limit(), bc.capacity() - bc.limit());
    } else {
        // Initialize the byte buffer
        bb.limit(bc.capacity());
        bb.position(bc.limit());
    }
    if ((cb == null) || (cb.array() != cc.array())) {
        // Create a new char buffer if anything changed
        cb = CharBuffer.wrap(cc.array(), cc.arrayOffset() + cc.position(), cc.remaining());
    } else {
        // Initialize the char buffer
        cb.limit(cc.limit());
        cb.position(cc.position());
    }
    CoderResult result = null;
    // Parse leftover if any are present
    if (leftovers.position() > 0) {
        int pos = bb.position();
        // Loop until one char is encoded or there is a encoder error
        do {
            leftovers.put(cc.get());
            leftovers.flip();
            result = encoder.encode(leftovers, bb, false);
            leftovers.position(leftovers.limit());
            leftovers.limit(leftovers.array().length);
        } while (result.isUnderflow() && (bb.position() == pos));
        if (result.isError() || result.isMalformed()) {
            result.throwException();
        }
        cb.position(cc.position());
        leftovers.position(0);
    }
    // Do the decoding and get the results into the byte chunk and the char
    // chunk
    result = encoder.encode(cb, bb, false);
    if (result.isError() || result.isMalformed()) {
        result.throwException();
    } else if (result.isOverflow()) {
        // Propagate current positions to the byte chunk and char chunk
        bc.limit(bb.position());
        cc.position(cb.position());
    } else if (result.isUnderflow()) {
        // Propagate current positions to the byte chunk and char chunk
        bc.limit(bb.position());
        cc.position(cb.position());
        // Put leftovers in the leftovers char buffer
        if (cc.remaining() > 0) {
            leftovers.limit(leftovers.array().length);
            leftovers.position(cc.remaining());
            cc.get(leftovers.array(), 0, cc.remaining());
        }
    }
}

Source File: C2BConverter.java From Tomcat8-Source-Read with MIT License

4 votes

/**
 * Convert the given characters to bytes.
 *
 * @param cc char input
 * @param bc byte output
 * @throws IOException An encoding error occurred
 */
public void convert(CharChunk cc, ByteChunk bc) throws IOException {
    if ((bb == null) || (bb.array() != bc.getBuffer())) {
        // Create a new byte buffer if anything changed
        bb = ByteBuffer.wrap(bc.getBuffer(), bc.getEnd(), bc.getBuffer().length - bc.getEnd());
    } else {
        // Initialize the byte buffer
        bb.limit(bc.getBuffer().length);
        bb.position(bc.getEnd());
    }
    if ((cb == null) || (cb.array() != cc.getBuffer())) {
        // Create a new char buffer if anything changed
        cb = CharBuffer.wrap(cc.getBuffer(), cc.getStart(), cc.getLength());
    } else {
        // Initialize the char buffer
        cb.limit(cc.getEnd());
        cb.position(cc.getStart());
    }
    CoderResult result = null;
    // Parse leftover if any are present
    if (leftovers.position() > 0) {
        int pos = bb.position();
        // Loop until one char is encoded or there is a encoder error
        do {
            leftovers.put((char) cc.substract());
            leftovers.flip();
            result = encoder.encode(leftovers, bb, false);
            leftovers.position(leftovers.limit());
            leftovers.limit(leftovers.array().length);
        } while (result.isUnderflow() && (bb.position() == pos));
        if (result.isError() || result.isMalformed()) {
            result.throwException();
        }
        cb.position(cc.getStart());
        leftovers.position(0);
    }
    // Do the decoding and get the results into the byte chunk and the char
    // chunk
    result = encoder.encode(cb, bb, false);
    if (result.isError() || result.isMalformed()) {
        result.throwException();
    } else if (result.isOverflow()) {
        // Propagate current positions to the byte chunk and char chunk
        bc.setEnd(bb.position());
        cc.setOffset(cb.position());
    } else if (result.isUnderflow()) {
        // Propagate current positions to the byte chunk and char chunk
        bc.setEnd(bb.position());
        cc.setOffset(cb.position());
        // Put leftovers in the leftovers char buffer
        if (cc.getLength() > 0) {
            leftovers.limit(leftovers.array().length);
            leftovers.position(cc.getLength());
            cc.substract(leftovers.array(), 0, cc.getLength());
        }
    }
}

Source File: Speller.java From morfologik-stemming with BSD 3-Clause "New" or "Revised" License

4 votes

private void findRepl(List<CandidateData> candidates, final int depth, final int node, final byte[] prevBytes, final int wordIndex, final int candIndex) {
  int dist = 0;
  for (int arc = fsa.getFirstArc(node); arc != 0; arc = fsa.getNextArc(arc)) {
    byteBuffer = BufferUtils.clearAndEnsureCapacity(byteBuffer, prevBytes.length + 1);
    byteBuffer.put(prevBytes);
    byteBuffer.put(fsa.getArcLabel(arc));
    final int bufPos = byteBuffer.position();
    byteBuffer.flip();
    decoder.reset();
    // FIXME: this isn't correct -- no checks for overflows, no decoder flush. I don't think this should be in here
    // too, the decoder should run once on accumulated temporary byte buffer (current path) only when there's
    // a potential that this buffer can become a replacement candidate (isEndOfCandidate). Because we assume candidates
    // are valid input strings (this is verified when building the dictionary), it's save a lot of conversions.
    final CoderResult c = decoder.decode(byteBuffer, charBuffer, true);
    if (c.isMalformed()) { // assume that only valid
      // encodings are there
      final byte[] prev = new byte[bufPos];
      byteBuffer.position(0);
      byteBuffer.get(prev);
      if (!fsa.isArcTerminal(arc)) {
        findRepl(candidates, depth, fsa.getEndNode(arc), prev, wordIndex, candIndex); // note: depth is not incremented
      }
      byteBuffer.clear();
    } else if (!c.isError()) { // unmappable characters are silently discarded
      charBuffer.flip();
      candidate[candIndex] = charBuffer.get();
      charBuffer.clear();
      byteBuffer.clear();

      int lengthReplacement;
      // replacement "any to two"
      if ((lengthReplacement = matchAnyToTwo(wordIndex, candIndex)) > 0) {
        // the replacement takes place at the end of the candidate
        if (isEndOfCandidate(arc, wordIndex) && (dist = hMatrix.get(depth - 1, depth - 1)) <= effectEditDistance) {
          if (Math.abs(wordLen - 1 - (wordIndex + lengthReplacement - 2)) > 0) {
            // there are extra letters in the word after the replacement
            dist = dist + Math.abs(wordLen - 1 - (wordIndex + lengthReplacement - 2));
          }
          if (dist <= effectEditDistance) {
            candidates.add(new CandidateData(String.valueOf(candidate, 0, candIndex + 1), dist));
          }
        }
        if (isArcNotTerminal(arc, candIndex)) {
          int x = hMatrix.get(depth, depth);
          hMatrix.set(depth, depth, hMatrix.get(depth - 1, depth - 1));
          findRepl(candidates, Math.max(0, depth), fsa.getEndNode(arc), new byte[0], wordIndex + lengthReplacement - 1,
              candIndex + 1);
          hMatrix.set(depth, depth, x);
        }
      }
      //replacement "any to one"
      if ((lengthReplacement = matchAnyToOne(wordIndex, candIndex)) > 0) {
        // the replacement takes place at the end of the candidate
        if (isEndOfCandidate(arc, wordIndex) && (dist = hMatrix.get(depth, depth)) <= effectEditDistance) {
          if (Math.abs(wordLen - 1 - (wordIndex + lengthReplacement - 1)) > 0) {
            // there are extra letters in the word after the replacement
            dist = dist + Math.abs(wordLen - 1 - (wordIndex + lengthReplacement - 1));
          }
          if (dist <= effectEditDistance) {
            candidates.add(new CandidateData(String.valueOf(candidate, 0, candIndex + 1), dist));
          }
        }
        if (isArcNotTerminal(arc, candIndex)) {
          findRepl(candidates, depth, fsa.getEndNode(arc), new byte[0], wordIndex + lengthReplacement, candIndex + 1);
        }
      }
      //general
      if (cuted(depth, wordIndex, candIndex) <= effectEditDistance) {
        if ((isEndOfCandidate(arc, wordIndex))
            && (dist = ed(wordLen - 1 - (wordIndex - depth), depth, wordLen - 1, candIndex)) <= effectEditDistance) {
          candidates.add(new CandidateData(String.valueOf(candidate, 0, candIndex + 1), dist));
        }
        if (isArcNotTerminal(arc, candIndex)) {
          findRepl(candidates, depth + 1, fsa.getEndNode(arc), new byte[0], wordIndex + 1, candIndex + 1);
        }
      }
    }
  }
}

Java Code Examples for java.nio.charset.CoderResult#isMalformed()