Java Code Examples for org.apache.commons.io.ByteOrderMark#UTF_8
The following examples show how to use
org.apache.commons.io.ByteOrderMark#UTF_8 .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CsvInput.java From hop with Apache License 2.0 | 5 votes |
String[] readFieldNamesFromFile( String fileName, CsvInputMeta csvInputMeta ) throws HopException { String delimiter = environmentSubstitute( csvInputMeta.getDelimiter() ); String enclosure = environmentSubstitute( csvInputMeta.getEnclosure() ); String realEncoding = environmentSubstitute( csvInputMeta.getEncoding() ); try ( FileObject fileObject = HopVfs.getFileObject( fileName ); BOMInputStream inputStream = new BOMInputStream( HopVfs.getInputStream( fileObject ), ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE ) ) { InputStreamReader reader = null; if ( Utils.isEmpty( realEncoding ) ) { reader = new InputStreamReader( inputStream ); } else { reader = new InputStreamReader( inputStream, realEncoding ); } EncodingType encodingType = EncodingType.guessEncodingType( reader.getEncoding() ); String line = TextFileInput.getLine( log, reader, encodingType, TextFileInputMeta.FILE_FORMAT_UNIX, new StringBuilder( 1000 ) ); String[] fieldNames = TextFileLineUtil.guessStringsFromLine( log, line, delimiter, enclosure, csvInputMeta.getEscapeCharacter() ); if ( !Utils.isEmpty( csvInputMeta.getEnclosure() ) ) { removeEnclosure( fieldNames, csvInputMeta.getEnclosure() ); } trimFieldNames( fieldNames ); return fieldNames; } catch ( IOException e ) { throw new HopFileException( BaseMessages.getString( PKG, "CsvInput.Exception.CreateFieldMappingError" ), e ); } }
Example 2
Source File: XMLUtils.java From modernmt with Apache License 2.0 | 5 votes |
public static XMLEventReader createEventReader(InputStream stream) throws XMLStreamException { Charset charset = UTF8Charset.get(); BOMInputStream bomStream = new BOMInputStream(stream, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE); try { if (bomStream.hasBOM()) charset = Charset.forName(bomStream.getBOMCharsetName()); } catch (IOException e) { throw new XMLStreamException(e); } XMLInputFactory factory = XMLInputFactory.newInstance(); return factory.createXMLEventReader(new XMLFixInputStreamReader(bomStream, charset)); }
Example 3
Source File: MD5Digester.java From p4ic4idea with Apache License 2.0 | 5 votes |
private void digestEncodedStreamToUtf8(@Nonnull InputStream inStream, @Nonnull Charset charset, boolean isRequireLineEndingConvert, @Nullable ClientLineEnding clientLineEnding) throws IOException { try (BOMInputStream unicodeInputStream = new BOMInputStream(inStream, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); InputStreamReader encodedStreamReader = new InputStreamReader(unicodeInputStream, charset)) { CharsetEncoder utf8CharsetEncoder = CharsetDefs.UTF8.newEncoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); char[] buffer = new char[bufferSize]; int read; while ((read = encodedStreamReader.read(buffer)) > 0) { // Convert encoded stream to UTF8 since server digest is UTF8 ByteBuffer utf8ByteBuffer = utf8CharsetEncoder .encode(CharBuffer.wrap(buffer, 0, read)); if (isRequireLineEndingConvert) { ByteBuffer convert = findAndReplaceEncodedClientLineEndingIfRequireLineEndingCovert( encodedStreamReader, utf8CharsetEncoder, utf8ByteBuffer, clientLineEnding); update(convert.array(), convert.arrayOffset(), convert.limit()); } else { update(utf8ByteBuffer.array(), utf8ByteBuffer.arrayOffset(), utf8ByteBuffer.limit()); } } } }
Example 4
Source File: MD5Digester.java From p4ic4idea with Apache License 2.0 | 5 votes |
private void digestEncodedStreamToUtf8(@Nonnull InputStream inStream, @Nonnull Charset charset, boolean isRequireLineEndingConvert, @Nullable ClientLineEnding clientLineEnding) throws IOException { try (BOMInputStream unicodeInputStream = new BOMInputStream(inStream, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); InputStreamReader encodedStreamReader = new InputStreamReader(unicodeInputStream, charset)) { CharsetEncoder utf8CharsetEncoder = CharsetDefs.UTF8.newEncoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); char[] buffer = new char[bufferSize]; int read; while ((read = encodedStreamReader.read(buffer)) > 0) { // Convert encoded stream to UTF8 since server digest is UTF8 ByteBuffer utf8ByteBuffer = utf8CharsetEncoder .encode(CharBuffer.wrap(buffer, 0, read)); if (isRequireLineEndingConvert) { ByteBuffer convert = findAndReplaceEncodedClientLineEndingIfRequireLineEndingCovert( encodedStreamReader, utf8CharsetEncoder, utf8ByteBuffer, clientLineEnding); update(convert.array(), convert.arrayOffset(), convert.limit()); } else { update(utf8ByteBuffer.array(), utf8ByteBuffer.arrayOffset(), utf8ByteBuffer.limit()); } } } }
Example 5
Source File: MD5Digester.java From p4ic4idea with Apache License 2.0 | 5 votes |
private void digestEncodedStreamToUtf8(@Nonnull InputStream inStream, @Nonnull Charset charset, boolean isRequireLineEndingConvert, @Nullable ClientLineEnding clientLineEnding) throws IOException { try (BOMInputStream unicodeInputStream = new BOMInputStream(inStream, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); InputStreamReader encodedStreamReader = new InputStreamReader(unicodeInputStream, charset)) { CharsetEncoder utf8CharsetEncoder = CharsetDefs.UTF8.newEncoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); char[] buffer = new char[bufferSize]; int read; while ((read = encodedStreamReader.read(buffer)) > 0) { // Convert encoded stream to UTF8 since server digest is UTF8 ByteBuffer utf8ByteBuffer = utf8CharsetEncoder .encode(CharBuffer.wrap(buffer, 0, read)); if (isRequireLineEndingConvert) { ByteBuffer convert = findAndReplaceEncodedClientLineEndingIfRequireLineEndingCovert( encodedStreamReader, utf8CharsetEncoder, utf8ByteBuffer, clientLineEnding); update(convert.array(), convert.arrayOffset(), convert.limit()); } else { update(utf8ByteBuffer.array(), utf8ByteBuffer.arrayOffset(), utf8ByteBuffer.limit()); } } } }
Example 6
Source File: MD5Digester.java From p4ic4idea with Apache License 2.0 | 5 votes |
private void digestEncodedStreamToUtf8(@Nonnull InputStream inStream, @Nonnull Charset charset, boolean isRequireLineEndingConvert, @Nullable ClientLineEnding clientLineEnding) throws IOException { try (BOMInputStream unicodeInputStream = new BOMInputStream(inStream, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); InputStreamReader encodedStreamReader = new InputStreamReader(unicodeInputStream, charset)) { CharsetEncoder utf8CharsetEncoder = CharsetDefs.UTF8.newEncoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); char[] buffer = new char[bufferSize]; int read; while ((read = encodedStreamReader.read(buffer)) > 0) { // Convert encoded stream to UTF8 since server digest is UTF8 ByteBuffer utf8ByteBuffer = utf8CharsetEncoder .encode(CharBuffer.wrap(buffer, 0, read)); if (isRequireLineEndingConvert) { ByteBuffer convert = findAndReplaceEncodedClientLineEndingIfRequireLineEndingCovert( encodedStreamReader, utf8CharsetEncoder, utf8ByteBuffer, clientLineEnding); update(convert.array(), convert.arrayOffset(), convert.limit()); } else { update(utf8ByteBuffer.array(), utf8ByteBuffer.arrayOffset(), utf8ByteBuffer.limit()); } } } }
Example 7
Source File: StreamDecoder.java From batfish with Apache License 2.0 | 5 votes |
private static @Nonnull BOMInputStream bomInputStream(@Nonnull InputStream inputStream) { return new BOMInputStream( inputStream, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_32BE, ByteOrderMark.UTF_32LE); }
Example 8
Source File: StreamUtil.java From iaf with Apache License 2.0 | 5 votes |
/** * Return a Reader that reads the InputStream in the character set specified by the BOM. If no BOM is found, a default character set is used. */ public static Reader getCharsetDetectingInputStreamReader(InputStream inputStream, String defaultCharset) throws IOException { BOMInputStream bOMInputStream = new BOMInputStream(inputStream,ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE); ByteOrderMark bom = bOMInputStream.getBOM(); String charsetName = bom == null ? defaultCharset : bom.getCharsetName(); return new InputStreamReader(new BufferedInputStream(bOMInputStream), charsetName); }
Example 9
Source File: BOMInputStream.java From aion-germany with GNU General Public License v3.0 | 2 votes |
/** * Constructs a new BOM InputStream that excludes a {@link ByteOrderMark#UTF_8} BOM. * * @param delegate * the InputStream to delegate to */ public BOMInputStream(InputStream delegate) { this(delegate, false, ByteOrderMark.UTF_8); }
Example 10
Source File: BOMInputStream.java From aion-germany with GNU General Public License v3.0 | 2 votes |
/** * Constructs a new BOM InputStream that detects a a {@link ByteOrderMark#UTF_8} and optionally includes it. * * @param delegate * the InputStream to delegate to * @param include * true to include the UTF-8 BOM or false to exclude it */ public BOMInputStream(InputStream delegate, boolean include) { this(delegate, include, ByteOrderMark.UTF_8); }
Example 11
Source File: BOMInputStream.java From lams with GNU General Public License v2.0 | 2 votes |
/** * Constructs a new BOM InputStream that excludes a {@link ByteOrderMark#UTF_8} BOM. * * @param delegate * the InputStream to delegate to */ public BOMInputStream(final InputStream delegate) { this(delegate, false, ByteOrderMark.UTF_8); }
Example 12
Source File: BOMInputStream.java From lams with GNU General Public License v2.0 | 2 votes |
/** * Constructs a new BOM InputStream that detects a a {@link ByteOrderMark#UTF_8} and optionally includes it. * * @param delegate * the InputStream to delegate to * @param include * true to include the UTF-8 BOM or false to exclude it */ public BOMInputStream(final InputStream delegate, final boolean include) { this(delegate, include, ByteOrderMark.UTF_8); }