java.nio.charset.CharsetDecoder#reset

Source File: SynonymGraphFilterFactory.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Load synonyms with the given {@link SynonymMap.Parser} class.
 */
protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
  CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
      .onMalformedInput(CodingErrorAction.REPORT)
      .onUnmappableCharacter(CodingErrorAction.REPORT);

  SynonymMap.Parser parser;
  Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class);
  try {
    parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup, expand, analyzer);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }

  List<String> files = splitFileNames(synonyms);
  for (String file : files) {
    decoder.reset();
    parser.parse(new InputStreamReader(loader.openResource(file), decoder));
  }
  return parser.build();
}

Source File: CBUtil.java From stratio-cassandra with Apache License 2.0

6 votes

private static String decodeString(ByteBuffer src) throws CharacterCodingException
{
    // the decoder needs to be reset every time we use it, hence the copy per thread
    CharsetDecoder theDecoder = decoder.get();
    theDecoder.reset();

    final CharBuffer dst = CharBuffer.allocate(
            (int) ((double) src.remaining() * theDecoder.maxCharsPerByte()));

    CoderResult cr = theDecoder.decode(src, dst, true);
    if (!cr.isUnderflow())
        cr.throwException();

    cr = theDecoder.flush(dst);
    if (!cr.isUnderflow())
        cr.throwException();

    return dst.flip().toString();
}

Source File: CharsetUtil.java From netty4.0.27Learn with Apache License 2.0

6 votes

/**
 * Returns a cached thread-local {@link CharsetDecoder} for the specified
 * <tt>charset</tt>.
 */
public static CharsetDecoder getDecoder(Charset charset) {
    if (charset == null) {
        throw new NullPointerException("charset");
    }

    Map<Charset, CharsetDecoder> map = InternalThreadLocalMap.get().charsetDecoderCache();
    CharsetDecoder d = map.get(charset);
    if (d != null) {
        d.reset();
        d.onMalformedInput(CodingErrorAction.REPLACE);
        d.onUnmappableCharacter(CodingErrorAction.REPLACE);
        return d;
    }

    d = charset.newDecoder();
    d.onMalformedInput(CodingErrorAction.REPLACE);
    d.onUnmappableCharacter(CodingErrorAction.REPLACE);
    map.put(charset, d);
    return d;
}

Source File: CharsetUtil.java From simple-netty-source with Apache License 2.0

6 votes

/**
 * Returns a cached thread-local {@link CharsetDecoder} for the specified
 * <tt>charset</tt>.
 */
public static CharsetDecoder getDecoder(Charset charset) {
    if (charset == null) {
        throw new NullPointerException("charset");
    }

    Map<Charset, CharsetDecoder> map = decoders.get();
    CharsetDecoder d = map.get(charset);
    if (d != null) {
        d.reset();
        d.onMalformedInput(CodingErrorAction.REPLACE);
        d.onUnmappableCharacter(CodingErrorAction.REPLACE);
        return d;
    }

    d = charset.newDecoder();
    d.onMalformedInput(CodingErrorAction.REPLACE);
    d.onUnmappableCharacter(CodingErrorAction.REPLACE);
    map.put(charset, d);
    return d;
}

Source File: JsonUtils.java From BigApp_Discuz_Android with Apache License 2.0

6 votes

@SuppressWarnings("unchecked")
public static final <T> T parseObject(byte[] input, int off, int len, CharsetDecoder charsetDecoder, Type clazz,
                                      Feature... features) {
    charsetDecoder.reset();

    int scaleLength = (int) (len * (double) charsetDecoder.maxCharsPerByte());
    char[] chars = ThreadLocalCache.getChars(scaleLength);

    ByteBuffer byteBuf = ByteBuffer.wrap(input, off, len);
    CharBuffer charByte = CharBuffer.wrap(chars);
    IOUtils.decode(charsetDecoder, byteBuf, charByte);

    int position = charByte.position();

    return (T) parseObject(chars, position, clazz, features);
}

Source File: CharsetUtil.java From android-netty with Apache License 2.0

6 votes

/**
 * Returns a cached thread-local {@link CharsetDecoder} for the specified
 * <tt>charset</tt>.
 */
public static CharsetDecoder getDecoder(Charset charset) {
    if (charset == null) {
        throw new NullPointerException("charset");
    }

    Map<Charset, CharsetDecoder> map = decoders.get();
    CharsetDecoder d = map.get(charset);
    if (d != null) {
        d.reset();
        d.onMalformedInput(CodingErrorAction.REPLACE);
        d.onUnmappableCharacter(CodingErrorAction.REPLACE);
        return d;
    }

    d = charset.newDecoder();
    d.onMalformedInput(CodingErrorAction.REPLACE);
    d.onUnmappableCharacter(CodingErrorAction.REPLACE);
    map.put(charset, d);
    return d;
}

Source File: UTF7CharsetTest.java From ph-commons with Apache License 2.0

6 votes

@Test
public void testDecodeLimitedOutput () throws Exception
{
  final CharsetDecoder decoder = tested.newDecoder ();
  ByteBuffer in = CharsetTestHelper.wrap ("+IKwA4QDp-");
  CharBuffer out = CharBuffer.allocate (3);
  assertEquals (CoderResult.UNDERFLOW, decoder.decode (in, out, true));
  assertEquals (CoderResult.UNDERFLOW, decoder.flush (out));
  out.flip ();
  assertEquals ("€áé", out.toString ());
  decoder.reset ();
  in = CharsetTestHelper.wrap ("A+ImIDkQ.");
  out = CharBuffer.allocate (4);
  assertEquals (CoderResult.UNDERFLOW, decoder.decode (in, out, true));
  out.flip ();
  assertEquals ("A\u2262\u0391.", out.toString ());
}

Source File: JSON.java From uavstack with Apache License 2.0

6 votes

@SuppressWarnings("unchecked")
public static <T> T parseObject(byte[] input, //
                                int off, //
                                int len, //
                                CharsetDecoder charsetDecoder, //
                                Type clazz, //
                                Feature... features) {
    charsetDecoder.reset();

    int scaleLength = (int) (len * (double) charsetDecoder.maxCharsPerByte());
    char[] chars = allocateChars(scaleLength);

    ByteBuffer byteBuf = ByteBuffer.wrap(input, off, len);
    CharBuffer charByte = CharBuffer.wrap(chars);
    IOUtils.decode(charsetDecoder, byteBuf, charByte);

    int position = charByte.position();

    return (T) parseObject(chars, position, clazz, features);
}

Source File: Encode.java From keycloak with Apache License 2.0

6 votes

public static String decodePath(String path)
{
   Matcher matcher = encodedCharsMulti.matcher(path);
   int start=0;
   StringBuilder builder = new StringBuilder();
   CharsetDecoder decoder = Charset.forName(UTF_8).newDecoder();
   while (matcher.find())
   {
 	 builder.append(path, start, matcher.start());
      decoder.reset();
      String decoded = decodeBytes(matcher.group(1), decoder);
      builder.append(decoded);
      start = matcher.end();
   }
   builder.append(path, start, path.length());
   return builder.toString();
}

Source File: GremlinAPI.java From hugegraph with Apache License 2.0

6 votes

public String name() {
    // Get the first line of script as the name
    String firstLine = this.gremlin.split("\r\n|\r|\n", 2)[0];
    final Charset charset = Charset.forName(CHARSET);
    final byte[] bytes = firstLine.getBytes(charset);
    if (bytes.length <= MAX_NAME_LENGTH) {
        return firstLine;
    }

    /*
     * Reference https://stackoverflow.com/questions/3576754/truncating-strings-by-bytes
     */
    CharsetDecoder decoder = charset.newDecoder();
    decoder.onMalformedInput(CodingErrorAction.IGNORE);
    decoder.reset();

    ByteBuffer buffer = ByteBuffer.wrap(bytes, 0, MAX_NAME_LENGTH);
    try {
        return decoder.decode(buffer).toString();
    } catch (CharacterCodingException e) {
        throw new HugeException("Failed to decode truncated bytes of " +
                                "gremlin first line", e);
    }
}

Source File: UTF7CharsetModifiedTest.java From ph-commons with Apache License 2.0

6 votes

@Test
public void testDecodeNoClosing () throws Exception
{
  ByteBuffer in = CharsetTestHelper.wrap ("&");
  CharBuffer out = CharBuffer.allocate (1024);
  final CharsetDecoder decoder = tested.newDecoder ();
  CoderResult result = decoder.decode (in, out, true);
  assertEquals (CoderResult.UNDERFLOW, result);
  result = decoder.flush (out);
  assertEquals (CoderResult.malformedForLength (1), result);
  assertEquals (1, in.position ());
  assertEquals (0, out.position ());
  in = CharsetTestHelper.wrap ("&AO");
  out = CharBuffer.allocate (1024);
  decoder.reset ();
  result = decoder.decode (in, out, true);
  assertEquals (CoderResult.UNDERFLOW, result);
  result = decoder.flush (out);
  assertEquals (CoderResult.malformedForLength (1), result);
  assertEquals (3, in.position ());
  assertEquals (0, out.position ());
}

Source File: IOUtils.java From apm-agent-java with Apache License 2.0

5 votes

private static CoderResult decode(CharBuffer charBuffer, ByteBuffer buffer) {
    final CharsetDecoder charsetDecoder = threadLocalCharsetDecoder.get();
    try {
        final CoderResult coderResult = charsetDecoder.decode(buffer, charBuffer, true);
        charsetDecoder.flush(charBuffer);
        return coderResult;
    } finally {
        ((Buffer) buffer).clear();
        charsetDecoder.reset();
    }
}

Source File: CharsetDecoderTest.java From j2objc with Apache License 2.0

5 votes

public void test_ByteArray_decode_no_offset() throws Exception {
    CharsetDecoder decoder = Charset.forName("UTF-16").newDecoder();
    byte[] arr = encode("UTF-16", "Android");
    ByteBuffer inBuffer = ByteBuffer.wrap(arr, 0, arr.length).slice();
    CharBuffer outBuffer = CharBuffer.allocate(arr.length);
    decoder.reset();
    CoderResult coderResult = decoder.decode(inBuffer, outBuffer, true);
    assertFalse(coderResult.toString(), coderResult.isError());
    decoder.flush(outBuffer);
    outBuffer.flip();
    assertEquals("Android", outBuffer.toString().trim());
}

Source File: LuceneUtil.java From nifi with Apache License 2.0

5 votes

/**
 * Truncate a single field so that it does not exceed Lucene's byte size limit on indexed terms.
 *
 * @param field the string to be indexed
 * @return a string that can be indexed which is within Lucene's byte size limit, or null if anything goes wrong
 */
public static String truncateIndexField(String field) {
    if (field == null) {
        return field;
    }

    Charset charset = Charset.defaultCharset();
    byte[] bytes = field.getBytes(charset);
    if (bytes.length <= IndexWriter.MAX_TERM_LENGTH) {
        return field;
    }

    // chop the field to maximum allowed byte length
    ByteBuffer bbuf = ByteBuffer.wrap(bytes, 0, IndexWriter.MAX_TERM_LENGTH);

    try {
        // decode the chopped byte buffer back into original charset
        CharsetDecoder decoder = charset.newDecoder();
        decoder.onMalformedInput(CodingErrorAction.IGNORE);
        decoder.reset();
        CharBuffer cbuf = decoder.decode(bbuf);
        return cbuf.toString();
    } catch (CharacterCodingException shouldNotHappen) {}

    // if we get here, something bad has happened
    return null;
}

Source File: AbstractAdaptiveByteBuffer.java From craft-atom with MIT License

4 votes

/**
 * {@inheritDoc}
 */
@Override
public String getString(CharsetDecoder decoder) throws CharacterCodingException {
    if (!hasRemaining()) {
        return "";
    }

    boolean utf16 = decoder.charset().name().startsWith("UTF-16");

    int oldPos = position();
    int oldLimit = limit();
    int end = -1;
    int newPos;

    if (!utf16) {
        end = indexOf((byte) 0x00);
        if (end < 0) {
            newPos = end = oldLimit;
        } else {
            newPos = end + 1;
        }
    } else {
        int i = oldPos;
        for (;;) {
            boolean wasZero = get(i) == 0;
            i++;

            if (i >= oldLimit) {
                break;
            }

            if (get(i) != 0) {
                i++;
                if (i >= oldLimit) {
                    break;
                }

                continue;
            }

            if (wasZero) {
                end = i - 1;
                break;
            }
        }

        if (end < 0) {
            newPos = end = oldPos + (oldLimit - oldPos & 0xFFFFFFFE);
        } else {
            if (end + 2 <= oldLimit) {
                newPos = end + 2;
            } else {
                newPos = end;
            }
        }
    }

    if (oldPos == end) {
        position(newPos);
        return "";
    }

    limit(end);
    decoder.reset();

    int expectedLength = (int) (remaining() * decoder.averageCharsPerByte()) + 1;
    CharBuffer out = CharBuffer.allocate(expectedLength);
    for (;;) {
        CoderResult cr;
        if (hasRemaining()) {
            cr = decoder.decode(buf(), out, true);
        } else {
            cr = decoder.flush(out);
        }

        if (cr.isUnderflow()) {
            break;
        }

        if (cr.isOverflow()) {
            CharBuffer o = CharBuffer.allocate(out.capacity() + expectedLength);
            out.flip();
            o.put(out);
            out = o;
            continue;
        }

        if (cr.isError()) {
            // Revert the buffer back to the previous state.
            limit(oldLimit);
            position(oldPos);
            cr.throwException();
        }
    }

    limit(oldLimit);
    position(newPos);
    return out.flip().toString();
}

Source File: AbstractIoBuffer.java From neoscada with Eclipse Public License 1.0

4 votes

/**
 * {@inheritDoc}
 */
@Override
public String getString(int fieldSize, CharsetDecoder decoder) throws CharacterCodingException {
    checkFieldSize(fieldSize);

    if (fieldSize == 0) {
        return "";
    }

    if (!hasRemaining()) {
        return "";
    }

    boolean utf16 = decoder.charset().name().startsWith("UTF-16");

    if (utf16 && (fieldSize & 1) != 0) {
        throw new IllegalArgumentException("fieldSize is not even.");
    }

    int oldPos = position();
    int oldLimit = limit();
    int end = oldPos + fieldSize;

    if (oldLimit < end) {
        throw new BufferUnderflowException();
    }

    int i;

    if (!utf16) {
        for (i = oldPos; i < end; i++) {
            if (get(i) == 0) {
                break;
            }
        }

        if (i == end) {
            limit(end);
        } else {
            limit(i);
        }
    } else {
        for (i = oldPos; i < end; i += 2) {
            if (get(i) == 0 && get(i + 1) == 0) {
                break;
            }
        }

        if (i == end) {
            limit(end);
        } else {
            limit(i);
        }
    }

    if (!hasRemaining()) {
        limit(oldLimit);
        position(end);
        return "";
    }
    decoder.reset();

    int expectedLength = (int) (remaining() * decoder.averageCharsPerByte()) + 1;
    CharBuffer out = CharBuffer.allocate(expectedLength);
    for (;;) {
        CoderResult cr;
        if (hasRemaining()) {
            cr = decoder.decode(buf(), out, true);
        } else {
            cr = decoder.flush(out);
        }

        if (cr.isUnderflow()) {
            break;
        }

        if (cr.isOverflow()) {
            CharBuffer o = CharBuffer.allocate(out.capacity() + expectedLength);
            out.flip();
            o.put(out);
            out = o;
            continue;
        }

        if (cr.isError()) {
            // Revert the buffer back to the previous state.
            limit(oldLimit);
            position(oldPos);
            cr.throwException();
        }
    }

    limit(oldLimit);
    position(end);
    return out.flip().toString();
}

Source File: AbstractIoBuffer.java From neoscada with Eclipse Public License 1.0

4 votes

/**
 * {@inheritDoc}
 */
@Override
public String getString(CharsetDecoder decoder) throws CharacterCodingException {
    if (!hasRemaining()) {
        return "";
    }

    boolean utf16 = decoder.charset().name().startsWith("UTF-16");

    int oldPos = position();
    int oldLimit = limit();
    int end = -1;
    int newPos;

    if (!utf16) {
        end = indexOf((byte) 0x00);
        if (end < 0) {
            newPos = end = oldLimit;
        } else {
            newPos = end + 1;
        }
    } else {
        int i = oldPos;
        for (;;) {
            boolean wasZero = get(i) == 0;
            i++;

            if (i >= oldLimit) {
                break;
            }

            if (get(i) != 0) {
                i++;
                if (i >= oldLimit) {
                    break;
                }

                continue;
            }

            if (wasZero) {
                end = i - 1;
                break;
            }
        }

        if (end < 0) {
            newPos = end = oldPos + (oldLimit - oldPos & 0xFFFFFFFE);
        } else {
            if (end + 2 <= oldLimit) {
                newPos = end + 2;
            } else {
                newPos = end;
            }
        }
    }

    if (oldPos == end) {
        position(newPos);
        return "";
    }

    limit(end);
    decoder.reset();

    int expectedLength = (int) (remaining() * decoder.averageCharsPerByte()) + 1;
    CharBuffer out = CharBuffer.allocate(expectedLength);
    for (;;) {
        CoderResult cr;
        if (hasRemaining()) {
            cr = decoder.decode(buf(), out, true);
        } else {
            cr = decoder.flush(out);
        }

        if (cr.isUnderflow()) {
            break;
        }

        if (cr.isOverflow()) {
            CharBuffer o = CharBuffer.allocate(out.capacity() + expectedLength);
            out.flip();
            o.put(out);
            out = o;
            continue;
        }

        if (cr.isError()) {
            // Revert the buffer back to the previous state.
            limit(oldLimit);
            position(oldPos);
            cr.throwException();
        }
    }

    limit(oldLimit);
    position(newPos);
    return out.flip().toString();
}

Source File: JsonUtils.java From BigApp_Discuz_Android with Apache License 2.0

4 votes

public static final Object parse(byte[] input, int off, int len, CharsetDecoder charsetDecoder, int features) {
    charsetDecoder.reset();

    int scaleLength = (int) (len * (double) charsetDecoder.maxCharsPerByte());
    char[] chars = ThreadLocalCache.getChars(scaleLength);

    ByteBuffer byteBuf = ByteBuffer.wrap(input, off, len);
    CharBuffer charBuf = CharBuffer.wrap(chars);
    IOUtils.decode(charsetDecoder, byteBuf, charBuf);

    int position = charBuf.position();

    DefaultJSONParser parser = new DefaultJSONParser(chars, position, ParserConfig.getGlobalInstance(), features);
    Object value = parser.parse();

    handleResovleTask(parser, value);

    parser.close();

    return value;
}

Source File: TerminalEmulator.java From Ansole with GNU General Public License v2.0

4 votes

private boolean handleUTF8Sequence(byte b) {
    if (mUTF8ToFollow == 0 && (b & 0x80) == 0) {
        // ASCII character -- we don't need to handle this
        return false;
    }

    if (mUTF8ToFollow > 0) {
        if ((b & 0xc0) != 0x80) {
            /* Not a UTF-8 continuation byte (doesn't begin with 0b10)
               Replace the entire sequence with the replacement char */
            mUTF8ToFollow = 0;
            mUTF8ByteBuffer.clear();
            emit(UNICODE_REPLACEMENT_CHAR);

            /* The Unicode standard (section 3.9, definition D93) requires
             * that we now attempt to process this byte as though it were
             * the beginning of another possibly-valid sequence */
            return handleUTF8Sequence(b);
        }

        mUTF8ByteBuffer.put(b);
        if (--mUTF8ToFollow == 0) {
            // Sequence complete -- decode and emit it
            ByteBuffer byteBuf = mUTF8ByteBuffer;
            CharBuffer charBuf = mInputCharBuffer;
            CharsetDecoder decoder = mUTF8Decoder;

            byteBuf.rewind();
            decoder.reset();
            decoder.decode(byteBuf, charBuf, true);
            decoder.flush(charBuf);

            char[] chars = charBuf.array();
            if (chars[0] >= 0x80 && chars[0] <= 0x9f) {
                /* Sequence decoded to a C1 control character which needs
                   to be sent through process() again */
                process((byte) chars[0], false);
            } else {
                emit(chars);
            }

            byteBuf.clear();
            charBuf.clear();
        }
    } else {
        if ((b & 0xe0) == 0xc0) { // 0b110 -- two-byte sequence
            mUTF8ToFollow = 1;
        } else if ((b & 0xf0) == 0xe0) { // 0b1110 -- three-byte sequence
            mUTF8ToFollow = 2;
        } else if ((b & 0xf8) == 0xf0) { // 0b11110 -- four-byte sequence
            mUTF8ToFollow = 3;
        } else {
            // Not a valid UTF-8 sequence start -- replace this char
            emit(UNICODE_REPLACEMENT_CHAR);
            return true;
        }

        mUTF8ByteBuffer.put(b);
    }

    return true;
}

Source File: VisorTaskUtils.java From ignite with Apache License 2.0

3 votes

/**
 * Decode file charset.
 *
 * @param f File to process.
 * @return File charset.
 * @throws IOException in case of error.
 */
public static Charset decode(File f) throws IOException {
    SortedMap<String, Charset> charsets = Charset.availableCharsets();

    String[] firstCharsets = {Charset.defaultCharset().name(), "US-ASCII", "UTF-8", "UTF-16BE", "UTF-16LE"};

    Collection<Charset> orderedCharsets = U.newLinkedHashSet(charsets.size());

    for (String c : firstCharsets)
        if (charsets.containsKey(c))
            orderedCharsets.add(charsets.get(c));

    orderedCharsets.addAll(charsets.values());

    try (RandomAccessFile raf = new RandomAccessFile(f, "r")) {
        FileChannel ch = raf.getChannel();

        ByteBuffer buf = ByteBuffer.allocate(DFLT_BUFFER_SIZE);

        ch.read(buf);

        buf.flip();

        for (Charset charset : orderedCharsets) {
            CharsetDecoder decoder = charset.newDecoder();

            decoder.reset();

            try {
                decoder.decode(buf);

                return charset;
            }
            catch (CharacterCodingException ignored) {
            }
        }
    }

    return Charset.defaultCharset();
}

Java Code Examples for java.nio.charset.CharsetDecoder#reset()