Java Code Examples for org.apache.commons.io.ByteOrderMark#UTF_16LE
The following examples show how to use
org.apache.commons.io.ByteOrderMark#UTF_16LE .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CsvInput.java From hop with Apache License 2.0 | 5 votes |
String[] readFieldNamesFromFile( String fileName, CsvInputMeta csvInputMeta ) throws HopException { String delimiter = environmentSubstitute( csvInputMeta.getDelimiter() ); String enclosure = environmentSubstitute( csvInputMeta.getEnclosure() ); String realEncoding = environmentSubstitute( csvInputMeta.getEncoding() ); try ( FileObject fileObject = HopVfs.getFileObject( fileName ); BOMInputStream inputStream = new BOMInputStream( HopVfs.getInputStream( fileObject ), ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE ) ) { InputStreamReader reader = null; if ( Utils.isEmpty( realEncoding ) ) { reader = new InputStreamReader( inputStream ); } else { reader = new InputStreamReader( inputStream, realEncoding ); } EncodingType encodingType = EncodingType.guessEncodingType( reader.getEncoding() ); String line = TextFileInput.getLine( log, reader, encodingType, TextFileInputMeta.FILE_FORMAT_UNIX, new StringBuilder( 1000 ) ); String[] fieldNames = TextFileLineUtil.guessStringsFromLine( log, line, delimiter, enclosure, csvInputMeta.getEscapeCharacter() ); if ( !Utils.isEmpty( csvInputMeta.getEnclosure() ) ) { removeEnclosure( fieldNames, csvInputMeta.getEnclosure() ); } trimFieldNames( fieldNames ); return fieldNames; } catch ( IOException e ) { throw new HopFileException( BaseMessages.getString( PKG, "CsvInput.Exception.CreateFieldMappingError" ), e ); } }
Example 2
Source File: XMLUtils.java From modernmt with Apache License 2.0 | 5 votes |
public static XMLEventReader createEventReader(InputStream stream) throws XMLStreamException { Charset charset = UTF8Charset.get(); BOMInputStream bomStream = new BOMInputStream(stream, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE); try { if (bomStream.hasBOM()) charset = Charset.forName(bomStream.getBOMCharsetName()); } catch (IOException e) { throw new XMLStreamException(e); } XMLInputFactory factory = XMLInputFactory.newInstance(); return factory.createXMLEventReader(new XMLFixInputStreamReader(bomStream, charset)); }
Example 3
Source File: MD5Digester.java From p4ic4idea with Apache License 2.0 | 5 votes |
private void digestEncodedStreamToUtf8(@Nonnull InputStream inStream, @Nonnull Charset charset, boolean isRequireLineEndingConvert, @Nullable ClientLineEnding clientLineEnding) throws IOException { try (BOMInputStream unicodeInputStream = new BOMInputStream(inStream, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); InputStreamReader encodedStreamReader = new InputStreamReader(unicodeInputStream, charset)) { CharsetEncoder utf8CharsetEncoder = CharsetDefs.UTF8.newEncoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); char[] buffer = new char[bufferSize]; int read; while ((read = encodedStreamReader.read(buffer)) > 0) { // Convert encoded stream to UTF8 since server digest is UTF8 ByteBuffer utf8ByteBuffer = utf8CharsetEncoder .encode(CharBuffer.wrap(buffer, 0, read)); if (isRequireLineEndingConvert) { ByteBuffer convert = findAndReplaceEncodedClientLineEndingIfRequireLineEndingCovert( encodedStreamReader, utf8CharsetEncoder, utf8ByteBuffer, clientLineEnding); update(convert.array(), convert.arrayOffset(), convert.limit()); } else { update(utf8ByteBuffer.array(), utf8ByteBuffer.arrayOffset(), utf8ByteBuffer.limit()); } } } }
Example 4
Source File: SubmitAndSyncUtf16FileTypeTest.java From p4ic4idea with Apache License 2.0 | 5 votes |
private long getUtf16FileSizeAfterRemoveBomAndEncodedByUtf8(File testResourceFile, Charset utf16) throws Exception { try (BOMInputStream bomSkipedInputStream = new BOMInputStream( new FileInputStream(testResourceFile), false, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE)) { byte[] bomSkippedBytes = IOUtils.toByteArray(bomSkipedInputStream); ByteBuffer buf = ByteBuffer.wrap(bomSkippedBytes); CharsetConverter convert = new CharsetConverter(utf16, CharsetDefs.UTF8); return convert.convert(buf).limit(); } }
Example 5
Source File: MD5Digester.java From p4ic4idea with Apache License 2.0 | 5 votes |
private void digestEncodedStreamToUtf8(@Nonnull InputStream inStream, @Nonnull Charset charset, boolean isRequireLineEndingConvert, @Nullable ClientLineEnding clientLineEnding) throws IOException { try (BOMInputStream unicodeInputStream = new BOMInputStream(inStream, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); InputStreamReader encodedStreamReader = new InputStreamReader(unicodeInputStream, charset)) { CharsetEncoder utf8CharsetEncoder = CharsetDefs.UTF8.newEncoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); char[] buffer = new char[bufferSize]; int read; while ((read = encodedStreamReader.read(buffer)) > 0) { // Convert encoded stream to UTF8 since server digest is UTF8 ByteBuffer utf8ByteBuffer = utf8CharsetEncoder .encode(CharBuffer.wrap(buffer, 0, read)); if (isRequireLineEndingConvert) { ByteBuffer convert = findAndReplaceEncodedClientLineEndingIfRequireLineEndingCovert( encodedStreamReader, utf8CharsetEncoder, utf8ByteBuffer, clientLineEnding); update(convert.array(), convert.arrayOffset(), convert.limit()); } else { update(utf8ByteBuffer.array(), utf8ByteBuffer.arrayOffset(), utf8ByteBuffer.limit()); } } } }
Example 6
Source File: SubmitAndSyncUtf16FileTypeTest.java From p4ic4idea with Apache License 2.0 | 5 votes |
private long getUtf16FileSizeAfterRemoveBomAndEncodedByUtf8(File testResourceFile, Charset utf16) throws Exception { try (BOMInputStream bomSkipedInputStream = new BOMInputStream( new FileInputStream(testResourceFile), false, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE)) { byte[] bomSkippedBytes = IOUtils.toByteArray(bomSkipedInputStream); ByteBuffer buf = ByteBuffer.wrap(bomSkippedBytes); CharsetConverter convert = new CharsetConverter(utf16, CharsetDefs.UTF8); return convert.convert(buf).limit(); } }
Example 7
Source File: MD5Digester.java From p4ic4idea with Apache License 2.0 | 5 votes |
private void digestEncodedStreamToUtf8(@Nonnull InputStream inStream, @Nonnull Charset charset, boolean isRequireLineEndingConvert, @Nullable ClientLineEnding clientLineEnding) throws IOException { try (BOMInputStream unicodeInputStream = new BOMInputStream(inStream, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); InputStreamReader encodedStreamReader = new InputStreamReader(unicodeInputStream, charset)) { CharsetEncoder utf8CharsetEncoder = CharsetDefs.UTF8.newEncoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); char[] buffer = new char[bufferSize]; int read; while ((read = encodedStreamReader.read(buffer)) > 0) { // Convert encoded stream to UTF8 since server digest is UTF8 ByteBuffer utf8ByteBuffer = utf8CharsetEncoder .encode(CharBuffer.wrap(buffer, 0, read)); if (isRequireLineEndingConvert) { ByteBuffer convert = findAndReplaceEncodedClientLineEndingIfRequireLineEndingCovert( encodedStreamReader, utf8CharsetEncoder, utf8ByteBuffer, clientLineEnding); update(convert.array(), convert.arrayOffset(), convert.limit()); } else { update(utf8ByteBuffer.array(), utf8ByteBuffer.arrayOffset(), utf8ByteBuffer.limit()); } } } }
Example 8
Source File: SubmitAndSyncUtf16FileTypeTest.java From p4ic4idea with Apache License 2.0 | 5 votes |
private long getUtf16FileSizeAfterRemoveBomAndEncodedByUtf8(File testResourceFile, Charset utf16) throws Exception { try (BOMInputStream bomSkipedInputStream = new BOMInputStream( new FileInputStream(testResourceFile), false, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE)) { byte[] bomSkippedBytes = IOUtils.toByteArray(bomSkipedInputStream); ByteBuffer buf = ByteBuffer.wrap(bomSkippedBytes); CharsetConverter convert = new CharsetConverter(utf16, CharsetDefs.UTF8); return convert.convert(buf).limit(); } }
Example 9
Source File: MD5Digester.java From p4ic4idea with Apache License 2.0 | 5 votes |
private void digestEncodedStreamToUtf8(@Nonnull InputStream inStream, @Nonnull Charset charset, boolean isRequireLineEndingConvert, @Nullable ClientLineEnding clientLineEnding) throws IOException { try (BOMInputStream unicodeInputStream = new BOMInputStream(inStream, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); InputStreamReader encodedStreamReader = new InputStreamReader(unicodeInputStream, charset)) { CharsetEncoder utf8CharsetEncoder = CharsetDefs.UTF8.newEncoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); char[] buffer = new char[bufferSize]; int read; while ((read = encodedStreamReader.read(buffer)) > 0) { // Convert encoded stream to UTF8 since server digest is UTF8 ByteBuffer utf8ByteBuffer = utf8CharsetEncoder .encode(CharBuffer.wrap(buffer, 0, read)); if (isRequireLineEndingConvert) { ByteBuffer convert = findAndReplaceEncodedClientLineEndingIfRequireLineEndingCovert( encodedStreamReader, utf8CharsetEncoder, utf8ByteBuffer, clientLineEnding); update(convert.array(), convert.arrayOffset(), convert.limit()); } else { update(utf8ByteBuffer.array(), utf8ByteBuffer.arrayOffset(), utf8ByteBuffer.limit()); } } } }
Example 10
Source File: SubmitAndSyncUtf16FileTypeTest.java From p4ic4idea with Apache License 2.0 | 5 votes |
private long getUtf16FileSizeAfterRemoveBomAndEncodedByUtf8(File testResourceFile, Charset utf16) throws Exception { try (BOMInputStream bomSkipedInputStream = new BOMInputStream( new FileInputStream(testResourceFile), false, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE)) { byte[] bomSkippedBytes = IOUtils.toByteArray(bomSkipedInputStream); ByteBuffer buf = ByteBuffer.wrap(bomSkippedBytes); CharsetConverter convert = new CharsetConverter(utf16, CharsetDefs.UTF8); return convert.convert(buf).limit(); } }
Example 11
Source File: StreamDecoder.java From batfish with Apache License 2.0 | 5 votes |
private static @Nonnull BOMInputStream bomInputStream(@Nonnull InputStream inputStream) { return new BOMInputStream( inputStream, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_32BE, ByteOrderMark.UTF_32LE); }
Example 12
Source File: StreamUtil.java From iaf with Apache License 2.0 | 5 votes |
/** * Return a Reader that reads the InputStream in the character set specified by the BOM. If no BOM is found, a default character set is used. */ public static Reader getCharsetDetectingInputStreamReader(InputStream inputStream, String defaultCharset) throws IOException { BOMInputStream bOMInputStream = new BOMInputStream(inputStream,ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE); ByteOrderMark bom = bOMInputStream.getBOM(); String charsetName = bom == null ? defaultCharset : bom.getCharsetName(); return new InputStreamReader(new BufferedInputStream(bOMInputStream), charsetName); }