Java Code Examples for java.nio.charset.CharsetDecoder#onMalformedInput()
The following examples show how to use
java.nio.charset.CharsetDecoder#onMalformedInput() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Text.java From hadoop-gpu with Apache License 2.0 | 6 votes |
private static String decode(ByteBuffer utf8, boolean replace) throws CharacterCodingException { CharsetDecoder decoder = DECODER_FACTORY.get(); if (replace) { decoder.onMalformedInput( java.nio.charset.CodingErrorAction.REPLACE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); } String str = decoder.decode(utf8).toString(); // set decoder back to its default value: REPORT if (replace) { decoder.onMalformedInput(CodingErrorAction.REPORT); decoder.onUnmappableCharacter(CodingErrorAction.REPORT); } return str; }
Example 2
Source File: UnicodeHelper.java From p4ic4idea with Apache License 2.0 | 6 votes |
/** * Try to determine whether a byte buffer's character encoding is that of the * passed-in charset. Uses inefficient * heuristics that will be revisited when we're more familiar with likely * usage patterns. * * Note this has been heavily changed since inception and will * almost certainly disappear in the 10.x timeframe -- HR. */ public static boolean inferCharset(byte[] bytes, int bytesRead, Charset clientCharset) { ByteBuffer byteBuf = ByteBuffer.wrap(bytes, 0, bytesRead); CharBuffer charBuf = CharBuffer.allocate(byteBuf.capacity() * 2); if (clientCharset != null) { CharsetDecoder decoder = clientCharset.newDecoder(); decoder.onMalformedInput(CodingErrorAction.REPORT); decoder.onUnmappableCharacter(CodingErrorAction.REPORT); CoderResult coderResult = decoder.decode(byteBuf, charBuf, false); if (coderResult != null) { if (coderResult.isError()) { // Wasn't this one... return false; } else { return true; // Still only *probably* true, dammit... } } } return true; }
Example 3
Source File: CharsetUtil.java From android-netty with Apache License 2.0 | 6 votes |
/** * Returns a cached thread-local {@link CharsetDecoder} for the specified * <tt>charset</tt>. */ public static CharsetDecoder getDecoder(Charset charset) { if (charset == null) { throw new NullPointerException("charset"); } Map<Charset, CharsetDecoder> map = decoders.get(); CharsetDecoder d = map.get(charset); if (d != null) { d.reset(); d.onMalformedInput(CodingErrorAction.REPLACE); d.onUnmappableCharacter(CodingErrorAction.REPLACE); return d; } d = charset.newDecoder(); d.onMalformedInput(CodingErrorAction.REPLACE); d.onUnmappableCharacter(CodingErrorAction.REPLACE); map.put(charset, d); return d; }
Example 4
Source File: Text.java From hadoop with Apache License 2.0 | 6 votes |
private static String decode(ByteBuffer utf8, boolean replace) throws CharacterCodingException { CharsetDecoder decoder = DECODER_FACTORY.get(); if (replace) { decoder.onMalformedInput( java.nio.charset.CodingErrorAction.REPLACE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); } String str = decoder.decode(utf8).toString(); // set decoder back to its default value: REPORT if (replace) { decoder.onMalformedInput(CodingErrorAction.REPORT); decoder.onUnmappableCharacter(CodingErrorAction.REPORT); } return str; }
Example 5
Source File: TracingManagedHttpClientConnectionFactory.java From caravan with Apache License 2.0 | 6 votes |
@Override public ManagedHttpClientConnection create(final HttpRoute route, final ConnectionConfig config) { final ConnectionConfig cconfig = config != null ? config : ConnectionConfig.DEFAULT; CharsetDecoder chardecoder = null; CharsetEncoder charencoder = null; final Charset charset = cconfig.getCharset(); final CodingErrorAction malformedInputAction = cconfig.getMalformedInputAction() != null ? cconfig.getMalformedInputAction() : CodingErrorAction.REPORT; final CodingErrorAction unmappableInputAction = cconfig.getUnmappableInputAction() != null ? cconfig.getUnmappableInputAction() : CodingErrorAction.REPORT; if (charset != null) { chardecoder = charset.newDecoder(); chardecoder.onMalformedInput(malformedInputAction); chardecoder.onUnmappableCharacter(unmappableInputAction); charencoder = charset.newEncoder(); charencoder.onMalformedInput(malformedInputAction); charencoder.onUnmappableCharacter(unmappableInputAction); } final String id = "http-outgoing-" + Long.toString(COUNTER.getAndIncrement()); return new TracingManagedHttpClientConnection(id, cconfig.getBufferSize(), cconfig.getFragmentSizeHint(), chardecoder, charencoder, cconfig.getMessageConstraints(), incomingContentStrategy, outgoingContentStrategy, requestWriterFactory, responseParserFactory, logFunc); }
Example 6
Source File: NetStringUtil.java From cronet with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Convert text in a given character set to a Unicode string. Any invalid * characters are replaced with U+FFFD. Returns null if the character set * is not recognized. * @param text ByteBuffer containing the character array to convert. * @param charsetName Character set it's in encoded in. * @return: Unicode string on success, null on failure. */ @CalledByNative private static String convertToUnicodeWithSubstitutions( ByteBuffer text, String charsetName) { try { Charset charset = Charset.forName(charsetName); // TODO(mmenke): Investigate if Charset.decode() can be used // instead. The question is whether it uses the proper replace // character. JDK CharsetDecoder docs say U+FFFD is the default, // but Charset.decode() docs say it uses the "charset's default // replacement byte array". CharsetDecoder decoder = charset.newDecoder(); decoder.onMalformedInput(CodingErrorAction.REPLACE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); decoder.replaceWith("\uFFFD"); return decoder.decode(text).toString(); } catch (Exception e) { return null; } }
Example 7
Source File: Text.java From RDFS with Apache License 2.0 | 6 votes |
private static String decode(ByteBuffer utf8, boolean replace) throws CharacterCodingException { CharsetDecoder decoder = DECODER_FACTORY.get(); if (replace) { decoder.onMalformedInput( java.nio.charset.CodingErrorAction.REPLACE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); } String str = decoder.decode(utf8).toString(); // set decoder back to its default value: REPORT if (replace) { decoder.onMalformedInput(CodingErrorAction.REPORT); decoder.onUnmappableCharacter(CodingErrorAction.REPORT); } return str; }
Example 8
Source File: UnicodeHelper.java From p4ic4idea with Apache License 2.0 | 6 votes |
/** * Try to determine whether a byte buffer's character encoding is that of the * passed-in charset. Uses inefficient * heuristics that will be revisited when we're more familiar with likely * usage patterns. * * Note this has been heavily changed since inception and will * almost certainly disappear in the 10.x timeframe -- HR. */ public static boolean inferCharset(byte[] bytes, int bytesRead, Charset clientCharset) { ByteBuffer byteBuf = ByteBuffer.wrap(bytes, 0, bytesRead); CharBuffer charBuf = CharBuffer.allocate(byteBuf.capacity() * 2); if (clientCharset != null) { CharsetDecoder decoder = clientCharset.newDecoder(); decoder.onMalformedInput(CodingErrorAction.REPORT); decoder.onUnmappableCharacter(CodingErrorAction.REPORT); CoderResult coderResult = decoder.decode(byteBuf, charBuf, false); if (coderResult != null) { if (coderResult.isError()) { // Wasn't this one... return false; } else { return true; // Still only *probably* true, dammit... } } } return true; }
Example 9
Source File: Text.java From Canova with Apache License 2.0 | 6 votes |
private static String decode(ByteBuffer utf8, boolean replace) throws CharacterCodingException { CharsetDecoder decoder = DECODER_FACTORY.get(); if (replace) { decoder.onMalformedInput( java.nio.charset.CodingErrorAction.REPLACE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); } String str = decoder.decode(utf8).toString(); // set decoder back to its default value: REPORT if (replace) { decoder.onMalformedInput(CodingErrorAction.REPORT); decoder.onUnmappableCharacter(CodingErrorAction.REPORT); } return str; }
Example 10
Source File: StringUtils.java From Pydev with Eclipse Public License 1.0 | 6 votes |
public static String safeDecodeByteArray(byte[] b, String baseCharset) { try { if (baseCharset == null) { return new String(b, StandardCharsets.ISO_8859_1); } return new String(b, baseCharset); } catch (Exception e) { try { //If it fails, go for something which shouldn't fail! CharsetDecoder decoder = Charset.forName(baseCharset).newDecoder(); decoder.onMalformedInput(CodingErrorAction.IGNORE); decoder.onUnmappableCharacter(CodingErrorAction.IGNORE); CharBuffer parsed = decoder.decode(ByteBuffer.wrap(b, 0, b.length)); return parsed.toString(); } catch (Exception e2) { Log.log(e2); //Shouldn't ever happen! return new String("Unable to decode bytearray from Python."); } } }
Example 11
Source File: Text.java From Bats with Apache License 2.0 | 6 votes |
private static String decode(ByteBuffer utf8, boolean replace) throws CharacterCodingException { CharsetDecoder decoder = DECODER_FACTORY.get(); if (replace) { decoder.onMalformedInput( java.nio.charset.CodingErrorAction.REPLACE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); } String str = decoder.decode(utf8).toString(); // set decoder back to its default value: REPORT if (replace) { decoder.onMalformedInput(CodingErrorAction.REPORT); decoder.onUnmappableCharacter(CodingErrorAction.REPORT); } return str; }
Example 12
Source File: LuceneUtil.java From localization_nifi with Apache License 2.0 | 5 votes |
/** * Truncate a single field so that it does not exceed Lucene's byte size limit on indexed terms. * * @param field the string to be indexed * @return a string that can be indexed which is within Lucene's byte size limit, or null if anything goes wrong */ public static String truncateIndexField(String field) { if (field == null) { return field; } Charset charset = Charset.defaultCharset(); byte[] bytes = field.getBytes(charset); if (bytes.length <= IndexWriter.MAX_TERM_LENGTH) { return field; } // chop the field to maximum allowed byte length ByteBuffer bbuf = ByteBuffer.wrap(bytes, 0, IndexWriter.MAX_TERM_LENGTH); try { // decode the chopped byte buffer back into original charset CharsetDecoder decoder = charset.newDecoder(); decoder.onMalformedInput(CodingErrorAction.IGNORE); decoder.reset(); CharBuffer cbuf = decoder.decode(bbuf); return cbuf.toString(); } catch (CharacterCodingException shouldNotHappen) {} // if we get here, something bad has happened return null; }
Example 13
Source File: AbstractMatcher.java From netbeans with Apache License 2.0 | 5 votes |
public CharsetDecoder prepareDecoder(Charset charset) { CharsetDecoder decoder = charset.newDecoder(); if (strict) { decoder.onMalformedInput(CodingErrorAction.REPORT); decoder.onUnmappableCharacter(CodingErrorAction.REPORT); } else { decoder.onMalformedInput(CodingErrorAction.IGNORE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); } return decoder; }
Example 14
Source File: Message.java From SI with BSD 2-Clause "Simplified" License | 5 votes |
public String getPayloadTracingString() { if (null == payload || 0 == payload.length) return "no payload"; boolean text = true; for (byte b:payload) { if (' ' > b) { switch(b) { case '\t': case '\n': case '\r': continue; } text = false; break; } } if (text) { CharsetDecoder decoder = CoAP.UTF8_CHARSET.newDecoder(); decoder.onMalformedInput(CodingErrorAction.REPORT); decoder.onUnmappableCharacter(CodingErrorAction.REPORT); ByteBuffer in = ByteBuffer.wrap(payload); CharBuffer out = CharBuffer.allocate(24); CoderResult result = decoder.decode(in, out, true); decoder.flush(out); out.flip(); if (CoderResult.OVERFLOW == result) { return "\"" + out + "\".. " + payload.length + " bytes"; } else if (!result.isError()){ return "\"" + out + "\"" ; } } return Utils.toHexText(payload, 256); }
Example 15
Source File: JsonReader.java From jsondb-core with MIT License | 5 votes |
public JsonReader(JsonDBConfig dbConfig, File collectionFile) throws IOException { this.collectionFile = collectionFile; this.lockFilesLocation = new File(collectionFile.getParentFile(), "lock"); this.fileLockLocation = new File(lockFilesLocation, collectionFile.getName() + ".lock"); if(!lockFilesLocation.exists()) { lockFilesLocation.mkdirs(); } if(!fileLockLocation.exists()) { fileLockLocation.createNewFile(); } CharsetDecoder decoder = dbConfig.getCharset().newDecoder(); decoder.onMalformedInput(CodingErrorAction.REPORT); decoder.onUnmappableCharacter(CodingErrorAction.REPORT); raf = new RandomAccessFile(fileLockLocation, "rw"); channel = raf.getChannel(); try { lock = channel.lock(); } catch (IOException | OverlappingFileLockException e) { try { channel.close(); raf.close(); } catch (IOException e1) { logger.error("Failed while closing RandomAccessFile for collection file {}", collectionFile.getName()); } throw new JsonFileLockException("JsonReader failed to obtain a file lock for file " + fileLockLocation, e); } fis = new FileInputStream(collectionFile); isr = new InputStreamReader(fis, decoder); reader = new BufferedReader(isr); }
Example 16
Source File: InputStreamReaderTest.java From j2objc with Apache License 2.0 | 5 votes |
public void test_read_1() throws IOException { // if the decoder is constructed by InputStreamReader itself, the // decoder's default error action is REPLACE InputStreamReader isr = new InputStreamReader(new ByteArrayInputStream( new byte[] { -32, -96 }), "UTF-8"); assertEquals("read() return incorrect value", 65533, isr.read()); InputStreamReader isr2 = new InputStreamReader( new ByteArrayInputStream(new byte[] { -32, -96 }), Charset .forName("UTF-8")); assertEquals("read() return incorrect value", 65533, isr2.read()); // if the decoder is passed in, keep its status intact CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder(); decoder.onMalformedInput(CodingErrorAction.REPORT); InputStreamReader isr3 = new InputStreamReader( new ByteArrayInputStream(new byte[] { -32, -96 }), decoder); try { isr3.read(); fail("Should throw MalformedInputException"); } catch (MalformedInputException e) { // expected } CharsetDecoder decoder2 = Charset.forName("UTF-8").newDecoder(); decoder2.onMalformedInput(CodingErrorAction.IGNORE); InputStreamReader isr4 = new InputStreamReader( new ByteArrayInputStream(new byte[] { -32, -96 }), decoder2); assertEquals("read() return incorrect value", -1, isr4.read()); CharsetDecoder decoder3 = Charset.forName("UTF-8").newDecoder(); decoder3.onMalformedInput(CodingErrorAction.REPLACE); InputStreamReader isr5 = new InputStreamReader( new ByteArrayInputStream(new byte[] { -32, -96 }), decoder3); assertEquals("read() return incorrect value", 65533, isr5.read()); }
Example 17
Source File: CharacterEncodingExamples.java From tutorials with MIT License | 5 votes |
static String decodeText(String input, Charset charset, CodingErrorAction codingErrorAction) throws IOException { CharsetDecoder charsetDecoder = charset.newDecoder(); charsetDecoder.onMalformedInput(codingErrorAction); return new BufferedReader( new InputStreamReader( new ByteArrayInputStream(input.getBytes()), charsetDecoder)).readLine(); }
Example 18
Source File: AbstractTextParser.java From CloverETL-Engine with GNU Lesser General Public License v2.1 | 4 votes |
/** * Sets the given decoder to replace all malformed and unmappable characters. * @param decoder */ protected static void setLenientDecoder(CharsetDecoder decoder) { decoder.onMalformedInput(CodingErrorAction.REPLACE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); }
Example 19
Source File: NonBlockingFetcher.java From SEAL with Apache License 2.0 | 4 votes |
private static void processDocuments() { CharsetDecoder decoder = charset.newDecoder(); decoder.onMalformedInput(CodingErrorAction.IGNORE); decoder.onUnmappableCharacter(CodingErrorAction.IGNORE); // perform 1 to 2-pass decoding on every document for (int i = 0; i < works.length; i++) { documents.add(null); if (works[i] == null) continue; URL url = works[i].url; log.debug("[" + (i + 1) + "/" + urls.length + "] Processing: " + url); String encoding = DEFAULT_ENCODING; String doc = null; ByteBuffer buffer = works[i].buffer; buffer.flip(); try { // try to use default encoding to decode the document doc = decoder.decode(buffer).toString(); // identify encoding by looking into the <meta> tag Matcher m = CHARSET_PAT.matcher(doc); if (m.find()) { encoding = m.group(1).toUpperCase(); log.debug("Encoding identified as: " + encoding); } else { log.debug("Encoding could not be identified! Using the default: " + DEFAULT_ENCODING); } // if the identified encoding is different from the default encoding if (!encoding.equals(DEFAULT_ENCODING)) { // decode again using the identified encoding CharsetDecoder d = Charset.forName(encoding).newDecoder(); d.onUnmappableCharacter(CodingErrorAction.IGNORE); d.onMalformedInput(CodingErrorAction.IGNORE); buffer.flip(); doc = d.decode(buffer).toString(); } } catch (Exception e) { log.error("Character coding error: " + e); continue; } documents.set(i, removeHTTPHeader(doc)); // doc = removeHTTPHeader(doc); // doc = CacheRecoverer.recover(url, doc); // documents.set(i, doc); } }
Example 20
Source File: ConvertCharacterSet.java From localization_nifi with Apache License 2.0 | 4 votes |
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) { FlowFile flowFile = session.get(); if (flowFile == null) { return; } final ComponentLog logger = getLogger(); final Charset inputCharset = Charset.forName(context.getProperty(INPUT_CHARSET).evaluateAttributeExpressions(flowFile).getValue()); final Charset outputCharset = Charset.forName(context.getProperty(OUTPUT_CHARSET).evaluateAttributeExpressions(flowFile).getValue()); final CharBuffer charBuffer = CharBuffer.allocate(MAX_BUFFER_SIZE); final CharsetDecoder decoder = inputCharset.newDecoder(); decoder.onMalformedInput(CodingErrorAction.REPLACE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); decoder.replaceWith("?"); final CharsetEncoder encoder = outputCharset.newEncoder(); encoder.onMalformedInput(CodingErrorAction.REPLACE); encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); encoder.replaceWith("?".getBytes(outputCharset)); try { final StopWatch stopWatch = new StopWatch(true); flowFile = session.write(flowFile, new StreamCallback() { @Override public void process(final InputStream rawIn, final OutputStream rawOut) throws IOException { try (final BufferedReader reader = new BufferedReader(new InputStreamReader(rawIn, decoder), MAX_BUFFER_SIZE); final BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(rawOut, encoder), MAX_BUFFER_SIZE)) { int charsRead; while ((charsRead = reader.read(charBuffer)) != -1) { charBuffer.flip(); writer.write(charBuffer.array(), 0, charsRead); } writer.flush(); } } }); session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getElapsed(TimeUnit.MILLISECONDS)); logger.info("successfully converted characters from {} to {} for {}", new Object[]{inputCharset, outputCharset, flowFile}); session.transfer(flowFile, REL_SUCCESS); } catch (final Exception e) { throw new ProcessException(e); } }