org.apache.commons.io.ByteOrderMark Java Examples
The following examples show how to use
org.apache.commons.io.ByteOrderMark.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: EncodingSniffer.java From htmlunit with Apache License 2.0 | 6 votes |
/** * Attempts to sniff an encoding from a <a href="http://en.wikipedia.org/wiki/Byte_Order_Mark">Byte Order Mark</a> * in the specified byte array. * * @param bytes the bytes to check for a Byte Order Mark * @return the encoding sniffed from the specified bytes, or {@code null} if the encoding * could not be determined */ static Charset sniffEncodingFromUnicodeBom(final byte[] bytes) { if (bytes == null) { return null; } Charset encoding = null; if (startsWith(bytes, ByteOrderMark.UTF_8)) { encoding = UTF_8; } else if (startsWith(bytes, ByteOrderMark.UTF_16BE)) { encoding = UTF_16BE; } else if (startsWith(bytes, ByteOrderMark.UTF_16LE)) { encoding = UTF_16LE; } if (encoding != null && LOG.isDebugEnabled()) { LOG.debug("Encoding found in Unicode Byte Order Mark: '" + encoding + "'."); } return encoding; }
Example #2
Source File: HtmlScript2Test.java From htmlunit with Apache License 2.0 | 6 votes |
/** * @throws Exception if the test fails */ @Test @Alerts("\u0623\u0647\u0644\u0627\u064b\u0623\u0647\u0644\u0627" + "\u064b\u0623\u0647\u0644\u0627\u064b\u0623\u0647\u0644\u0627\u064b") public void incorrectCharset() throws Exception { final String html = "<html><head>\n" + " <script src='" + URL_SECOND + "' charset='" + ISO_8859_1 + "'></script>\n" + "</head>\n" + "<body></body>\n" + "</html>"; final String script = new String(ByteOrderMark.UTF_8.getBytes()) + "alert('" + "\u0623\u0647\u0644\u0627\u064b\u0623\u0647\u0644\u0627" + "\u064b\u0623\u0647\u0644\u0627\u064b\u0623\u0647\u0644\u0627\u064b" + "');"; getMockWebConnection().setResponse(URL_SECOND, script, MimeType.APPLICATION_JAVASCRIPT, UTF_8); loadPageWithAlerts2(html); }
Example #3
Source File: TextInput.java From dremio-oss with Apache License 2.0 | 6 votes |
private final boolean checkBom(ByteOrderMark bom) { int bomLength = bom.length(); if (bufferPtr + bomLength >= length) { // Not enough bytes from the current position to the end of the buffer return false; } if (BoundsChecking.BOUNDS_CHECKING_ENABLED) { buffer.checkBytes(bufferPtr - 1, bufferPtr + bomLength); } byte[] bomBytes = bom.getBytes(); for (int i = 0; i < bomLength; i++) { byte nextChar = PlatformDependent.getByte(bStartMinus1 + bufferPtr + i); if (nextChar != bomBytes[i]) { // No BOM. Position is unchanged return false; } } return true; }
Example #4
Source File: EncodingSniffer.java From HtmlUnit-Android with Apache License 2.0 | 6 votes |
/** * Attempts to sniff an encoding from a <a href="http://en.wikipedia.org/wiki/Byte_Order_Mark">Byte Order Mark</a> * in the specified byte array. * * @param bytes the bytes to check for a Byte Order Mark * @return the encoding sniffed from the specified bytes, or {@code null} if the encoding * could not be determined */ static Charset sniffEncodingFromUnicodeBom(final byte[] bytes) { if (bytes == null) { return null; } Charset encoding = null; if (startsWith(bytes, ByteOrderMark.UTF_8)) { encoding = UTF_8; } else if (startsWith(bytes, ByteOrderMark.UTF_16BE)) { encoding = UTF_16BE; } else if (startsWith(bytes, ByteOrderMark.UTF_16LE)) { encoding = UTF_16LE; } if (encoding != null && LOG.isDebugEnabled()) { LOG.debug("Encoding found in Unicode Byte Order Mark: '" + encoding + "'."); } return encoding; }
Example #5
Source File: TestNewTextReader.java From dremio-oss with Apache License 2.0 | 6 votes |
@Test public void testBomUtf8() throws Exception { // Simple .csv file with a UTF-8 BOM. Should read successfully File testFolder = tempDir.newFolder("testUtf8Folder"); File testFile = new File(testFolder, "utf8.csv"); PrintStream p = new PrintStream(testFile); p.write(ByteOrderMark.UTF_8.getBytes(), 0, ByteOrderMark.UTF_8.length()); p.print("A,B\n"); p.print("5,7\n"); p.close(); testBuilder() .sqlQuery(String.format("select * from table(dfs.\"%s\" (type => 'text', " + "fieldDelimiter => ',', lineDelimiter => '\n', extractHeader => true))", testFile.getAbsolutePath())) .unOrdered() .baselineColumns("A","B") .baselineValues("5", "7") .go(); }
Example #6
Source File: TestNewTextReader.java From dremio-oss with Apache License 2.0 | 6 votes |
@Test public void testErrorBomUtf16() throws Exception { // UTF-16 BOM should cause a dataReadError user exception File testFolder = tempDir.newFolder("testUtf16Folder"); File testFile = new File(testFolder, "utf16.csv"); PrintStream p = new PrintStream(testFile); p.write(ByteOrderMark.UTF_16LE.getBytes(), 0, ByteOrderMark.UTF_16LE.length()); p.print("A,B\n"); p.print("5,7\n"); p.close(); thrownException.expect(new UserExceptionMatcher(UserBitShared.DremioPBError.ErrorType.DATA_READ, "DATA_READ ERROR: UTF-16 files not supported")); // NB: using test() instead of testBuilder() because it unwraps the thrown RpcException and re-throws the // underlying UserException (which is then matched with the UserExceptionMatcher) test(String.format("select * from table(dfs.\"%s\" (type => 'text', " + "fieldDelimiter => ',', lineDelimiter => '\n', extractHeader => true))", testFile.getAbsolutePath())); }
Example #7
Source File: SubmitAndSyncUtf16FileTypeTest.java From p4ic4idea with Apache License 2.0 | 5 votes |
private long getUtf16FileSizeAfterRemoveBomAndEncodedByUtf8(File testResourceFile, Charset utf16) throws Exception { try (BOMInputStream bomSkipedInputStream = new BOMInputStream( new FileInputStream(testResourceFile), false, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE)) { byte[] bomSkippedBytes = IOUtils.toByteArray(bomSkipedInputStream); ByteBuffer buf = ByteBuffer.wrap(bomSkippedBytes); CharsetConverter convert = new CharsetConverter(utf16, CharsetDefs.UTF8); return convert.convert(buf).limit(); } }
Example #8
Source File: BOMInputStream.java From lams with GNU General Public License v2.0 | 5 votes |
/** * Return the BOM (Byte Order Mark). * * @return The BOM or null if none * @throws IOException * if an error reading the first bytes of the stream occurs */ public ByteOrderMark getBOM() throws IOException { if (firstBytes == null) { fbLength = 0; // BOMs are sorted from longest to shortest final int maxBomSize = boms.get(0).length(); firstBytes = new int[maxBomSize]; // Read first maxBomSize bytes for (int i = 0; i < firstBytes.length; i++) { firstBytes[i] = in.read(); fbLength++; if (firstBytes[i] < 0) { break; } } // match BOM in firstBytes byteOrderMark = find(); if (byteOrderMark != null) { if (!include) { if (byteOrderMark.length() < firstBytes.length) { fbIndex = byteOrderMark.length(); } else { fbLength = 0; } } } } return byteOrderMark; }
Example #9
Source File: BOMInputStream.java From lams with GNU General Public License v2.0 | 5 votes |
/** * Find a BOM with the specified bytes. * * @return The matched BOM or null if none matched */ private ByteOrderMark find() { for (final ByteOrderMark bom : boms) { if (matches(bom)) { return bom; } } return null; }
Example #10
Source File: BOMInputStream.java From lams with GNU General Public License v2.0 | 5 votes |
/** * Check if the bytes match a BOM. * * @param bom * The BOM * @return true if the bytes match the bom, otherwise false */ private boolean matches(final ByteOrderMark bom) { // if (bom.length() != fbLength) { // return false; // } // firstBytes may be bigger than the BOM bytes for (int i = 0; i < bom.length(); i++) { if (bom.get(i) != firstBytes[i]) { return false; } } return true; }
Example #11
Source File: TextRecordWriter.java From dremio-oss with Apache License 2.0 | 5 votes |
@Override public void startPartition(WritePartition partition) throws Exception { if(this.partition != null){ close(); } this.partition = partition; // open a new file for writing data with new schema try { this.path = fs.canonicalizePath(partition.qualified(location, prefix + "_" + index + "." + extension)); dos = new DataOutputStream(fs.create(path)); stream = new PrintStream(dos); stream.write(ByteOrderMark.UTF_8.getBytes(), 0, ByteOrderMark.UTF_8.length()); logger.debug("Created file: {}", path); } catch (IOException e) { throw UserException.dataWriteError(e) .message("Failure while attempting to write file %s.", path) .build(logger); } index++; String columns = Joiner.on(fieldDelimiter).join(columnNames); stream.print(columns); stream.print(lineDelimiter); }
Example #12
Source File: TextInput.java From dremio-oss with Apache License 2.0 | 5 votes |
private final void skipOptionalBOM() throws IOException { if (checkBom(ByteOrderMark.UTF_8)) { bufferPtr += ByteOrderMark.UTF_8.length(); } else if (checkBom(ByteOrderMark.UTF_16LE) || checkBom(ByteOrderMark.UTF_16BE)) { throw UserException.dataReadError() .message("UTF-16 files not supported") .build(logger); } }
Example #13
Source File: XMLUtils.java From modernmt with Apache License 2.0 | 5 votes |
public static XMLEventReader createEventReader(InputStream stream) throws XMLStreamException { Charset charset = UTF8Charset.get(); BOMInputStream bomStream = new BOMInputStream(stream, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE); try { if (bomStream.hasBOM()) charset = Charset.forName(bomStream.getBOMCharsetName()); } catch (IOException e) { throw new XMLStreamException(e); } XMLInputFactory factory = XMLInputFactory.newInstance(); return factory.createXMLEventReader(new XMLFixInputStreamReader(bomStream, charset)); }
Example #14
Source File: MD5Digester.java From p4ic4idea with Apache License 2.0 | 5 votes |
private void digestEncodedStreamToUtf8(@Nonnull InputStream inStream, @Nonnull Charset charset, boolean isRequireLineEndingConvert, @Nullable ClientLineEnding clientLineEnding) throws IOException { try (BOMInputStream unicodeInputStream = new BOMInputStream(inStream, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); InputStreamReader encodedStreamReader = new InputStreamReader(unicodeInputStream, charset)) { CharsetEncoder utf8CharsetEncoder = CharsetDefs.UTF8.newEncoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); char[] buffer = new char[bufferSize]; int read; while ((read = encodedStreamReader.read(buffer)) > 0) { // Convert encoded stream to UTF8 since server digest is UTF8 ByteBuffer utf8ByteBuffer = utf8CharsetEncoder .encode(CharBuffer.wrap(buffer, 0, read)); if (isRequireLineEndingConvert) { ByteBuffer convert = findAndReplaceEncodedClientLineEndingIfRequireLineEndingCovert( encodedStreamReader, utf8CharsetEncoder, utf8ByteBuffer, clientLineEnding); update(convert.array(), convert.arrayOffset(), convert.limit()); } else { update(utf8ByteBuffer.array(), utf8ByteBuffer.arrayOffset(), utf8ByteBuffer.limit()); } } } }
Example #15
Source File: SubmitAndSyncUtf16FileTypeTest.java From p4ic4idea with Apache License 2.0 | 5 votes |
private long getUtf16FileSizeAfterRemoveBomAndEncodedByUtf8(File testResourceFile, Charset utf16) throws Exception { try (BOMInputStream bomSkipedInputStream = new BOMInputStream( new FileInputStream(testResourceFile), false, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE)) { byte[] bomSkippedBytes = IOUtils.toByteArray(bomSkipedInputStream); ByteBuffer buf = ByteBuffer.wrap(bomSkippedBytes); CharsetConverter convert = new CharsetConverter(utf16, CharsetDefs.UTF8); return convert.convert(buf).limit(); } }
Example #16
Source File: MD5Digester.java From p4ic4idea with Apache License 2.0 | 5 votes |
private void digestEncodedStreamToUtf8(@Nonnull InputStream inStream, @Nonnull Charset charset, boolean isRequireLineEndingConvert, @Nullable ClientLineEnding clientLineEnding) throws IOException { try (BOMInputStream unicodeInputStream = new BOMInputStream(inStream, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); InputStreamReader encodedStreamReader = new InputStreamReader(unicodeInputStream, charset)) { CharsetEncoder utf8CharsetEncoder = CharsetDefs.UTF8.newEncoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); char[] buffer = new char[bufferSize]; int read; while ((read = encodedStreamReader.read(buffer)) > 0) { // Convert encoded stream to UTF8 since server digest is UTF8 ByteBuffer utf8ByteBuffer = utf8CharsetEncoder .encode(CharBuffer.wrap(buffer, 0, read)); if (isRequireLineEndingConvert) { ByteBuffer convert = findAndReplaceEncodedClientLineEndingIfRequireLineEndingCovert( encodedStreamReader, utf8CharsetEncoder, utf8ByteBuffer, clientLineEnding); update(convert.array(), convert.arrayOffset(), convert.limit()); } else { update(utf8ByteBuffer.array(), utf8ByteBuffer.arrayOffset(), utf8ByteBuffer.limit()); } } } }
Example #17
Source File: WebResponse.java From htmlunit with Apache License 2.0 | 5 votes |
/** * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br> * * Returns the response content as a string, using the specified charset, * rather than the charset/encoding specified in the server response. * If there is a bom header the charset parameter will be overwritten by the bom. * @param encoding the charset/encoding to use to convert the response content into a string * @param ignoreUtf8Bom if true utf8 bom header will be ignored * @return the response content as a string or null if the content retrieval was failing */ public String getContentAsString(final Charset encoding, final boolean ignoreUtf8Bom) { if (responseData_ != null) { try (InputStream in = responseData_.getInputStreamWithBomIfApplicable(BOM_HEADERS)) { if (in instanceof BOMInputStream) { try (BOMInputStream bomIn = (BOMInputStream) in) { // there seems to be a bug in BOMInputStream // we have to call this before hasBOM(ByteOrderMark) if (bomIn.hasBOM()) { if (!ignoreUtf8Bom && bomIn.hasBOM(ByteOrderMark.UTF_8)) { return IOUtils.toString(bomIn, UTF_8); } if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) { return IOUtils.toString(bomIn, UTF_16BE); } if (bomIn.hasBOM(ByteOrderMark.UTF_16LE)) { return IOUtils.toString(bomIn, UTF_16LE); } } return IOUtils.toString(bomIn, encoding); } } return IOUtils.toString(in, encoding); } catch (final IOException e) { LOG.warn(e.getMessage(), e); } } return null; }
Example #18
Source File: MD5Digester.java From p4ic4idea with Apache License 2.0 | 5 votes |
private void digestEncodedStreamToUtf8(@Nonnull InputStream inStream, @Nonnull Charset charset, boolean isRequireLineEndingConvert, @Nullable ClientLineEnding clientLineEnding) throws IOException { try (BOMInputStream unicodeInputStream = new BOMInputStream(inStream, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); InputStreamReader encodedStreamReader = new InputStreamReader(unicodeInputStream, charset)) { CharsetEncoder utf8CharsetEncoder = CharsetDefs.UTF8.newEncoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); char[] buffer = new char[bufferSize]; int read; while ((read = encodedStreamReader.read(buffer)) > 0) { // Convert encoded stream to UTF8 since server digest is UTF8 ByteBuffer utf8ByteBuffer = utf8CharsetEncoder .encode(CharBuffer.wrap(buffer, 0, read)); if (isRequireLineEndingConvert) { ByteBuffer convert = findAndReplaceEncodedClientLineEndingIfRequireLineEndingCovert( encodedStreamReader, utf8CharsetEncoder, utf8ByteBuffer, clientLineEnding); update(convert.array(), convert.arrayOffset(), convert.limit()); } else { update(utf8ByteBuffer.array(), utf8ByteBuffer.arrayOffset(), utf8ByteBuffer.limit()); } } } }
Example #19
Source File: SubmitAndSyncUtf16FileTypeTest.java From p4ic4idea with Apache License 2.0 | 5 votes |
private long getUtf16FileSizeAfterRemoveBomAndEncodedByUtf8(File testResourceFile, Charset utf16) throws Exception { try (BOMInputStream bomSkipedInputStream = new BOMInputStream( new FileInputStream(testResourceFile), false, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE)) { byte[] bomSkippedBytes = IOUtils.toByteArray(bomSkipedInputStream); ByteBuffer buf = ByteBuffer.wrap(bomSkippedBytes); CharsetConverter convert = new CharsetConverter(utf16, CharsetDefs.UTF8); return convert.convert(buf).limit(); } }
Example #20
Source File: MD5Digester.java From p4ic4idea with Apache License 2.0 | 5 votes |
private void digestEncodedStreamToUtf8(@Nonnull InputStream inStream, @Nonnull Charset charset, boolean isRequireLineEndingConvert, @Nullable ClientLineEnding clientLineEnding) throws IOException { try (BOMInputStream unicodeInputStream = new BOMInputStream(inStream, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE); InputStreamReader encodedStreamReader = new InputStreamReader(unicodeInputStream, charset)) { CharsetEncoder utf8CharsetEncoder = CharsetDefs.UTF8.newEncoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); char[] buffer = new char[bufferSize]; int read; while ((read = encodedStreamReader.read(buffer)) > 0) { // Convert encoded stream to UTF8 since server digest is UTF8 ByteBuffer utf8ByteBuffer = utf8CharsetEncoder .encode(CharBuffer.wrap(buffer, 0, read)); if (isRequireLineEndingConvert) { ByteBuffer convert = findAndReplaceEncodedClientLineEndingIfRequireLineEndingCovert( encodedStreamReader, utf8CharsetEncoder, utf8ByteBuffer, clientLineEnding); update(convert.array(), convert.arrayOffset(), convert.limit()); } else { update(utf8ByteBuffer.array(), utf8ByteBuffer.arrayOffset(), utf8ByteBuffer.limit()); } } } }
Example #21
Source File: SubmitAndSyncUtf16FileTypeTest.java From p4ic4idea with Apache License 2.0 | 5 votes |
private long getUtf16FileSizeAfterRemoveBomAndEncodedByUtf8(File testResourceFile, Charset utf16) throws Exception { try (BOMInputStream bomSkipedInputStream = new BOMInputStream( new FileInputStream(testResourceFile), false, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE)) { byte[] bomSkippedBytes = IOUtils.toByteArray(bomSkipedInputStream); ByteBuffer buf = ByteBuffer.wrap(bomSkippedBytes); CharsetConverter convert = new CharsetConverter(utf16, CharsetDefs.UTF8); return convert.convert(buf).limit(); } }
Example #22
Source File: StreamDecoder.java From batfish with Apache License 2.0 | 5 votes |
private static @Nonnull BOMInputStream bomInputStream(@Nonnull InputStream inputStream) { return new BOMInputStream( inputStream, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_32BE, ByteOrderMark.UTF_32LE); }
Example #23
Source File: ChakraTest.java From es6draft with MIT License | 5 votes |
private static Charset charsetFor(BOMInputStream bis) throws IOException { ByteOrderMark bom = bis.getBOM(); if (ByteOrderMark.UTF_8.equals(bom)) { return StandardCharsets.UTF_8; } if (ByteOrderMark.UTF_16LE.equals(bom)) { return StandardCharsets.UTF_16LE; } if (ByteOrderMark.UTF_16BE.equals(bom)) { return StandardCharsets.UTF_16BE; } return StandardCharsets.UTF_8; }
Example #24
Source File: StreamUtil.java From iaf with Apache License 2.0 | 5 votes |
/** * Return a Reader that reads the InputStream in the character set specified by the BOM. If no BOM is found, a default character set is used. */ public static Reader getCharsetDetectingInputStreamReader(InputStream inputStream, String defaultCharset) throws IOException { BOMInputStream bOMInputStream = new BOMInputStream(inputStream,ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE); ByteOrderMark bom = bOMInputStream.getBOM(); String charsetName = bom == null ? defaultCharset : bom.getCharsetName(); return new InputStreamReader(new BufferedInputStream(bOMInputStream), charsetName); }
Example #25
Source File: CharsetIdentification.java From storm-crawler with Apache License 2.0 | 5 votes |
/** * Detects any BOMs and returns the corresponding charset */ private static String getCharsetFromBOM(final byte[] byteData) { try (BOMInputStream bomIn = new BOMInputStream( new ByteArrayInputStream(byteData))) { ByteOrderMark bom = bomIn.getBOM(); if (bom != null) { return bom.getCharsetName(); } } catch (IOException e) { return null; } return null; }
Example #26
Source File: BOMInputStream.java From aion-germany with GNU General Public License v3.0 | 5 votes |
public int compare(ByteOrderMark bom1, ByteOrderMark bom2) { int len1 = bom1.length(); int len2 = bom2.length(); if (len1 > len2) { return -1; } if (len2 > len1) { return 1; } return 0; }
Example #27
Source File: CsvInput.java From hop with Apache License 2.0 | 5 votes |
String[] readFieldNamesFromFile( String fileName, CsvInputMeta csvInputMeta ) throws HopException { String delimiter = environmentSubstitute( csvInputMeta.getDelimiter() ); String enclosure = environmentSubstitute( csvInputMeta.getEnclosure() ); String realEncoding = environmentSubstitute( csvInputMeta.getEncoding() ); try ( FileObject fileObject = HopVfs.getFileObject( fileName ); BOMInputStream inputStream = new BOMInputStream( HopVfs.getInputStream( fileObject ), ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE ) ) { InputStreamReader reader = null; if ( Utils.isEmpty( realEncoding ) ) { reader = new InputStreamReader( inputStream ); } else { reader = new InputStreamReader( inputStream, realEncoding ); } EncodingType encodingType = EncodingType.guessEncodingType( reader.getEncoding() ); String line = TextFileInput.getLine( log, reader, encodingType, TextFileInputMeta.FILE_FORMAT_UNIX, new StringBuilder( 1000 ) ); String[] fieldNames = TextFileLineUtil.guessStringsFromLine( log, line, delimiter, enclosure, csvInputMeta.getEscapeCharacter() ); if ( !Utils.isEmpty( csvInputMeta.getEnclosure() ) ) { removeEnclosure( fieldNames, csvInputMeta.getEnclosure() ); } trimFieldNames( fieldNames ); return fieldNames; } catch ( IOException e ) { throw new HopFileException( BaseMessages.getString( PKG, "CsvInput.Exception.CreateFieldMappingError" ), e ); } }
Example #28
Source File: WebResponse.java From HtmlUnit-Android with Apache License 2.0 | 5 votes |
/** * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br> * * Returns the response content as a string, using the specified charset, * rather than the charset/encoding specified in the server response. * If there is a bom header the charset parameter will be overwritten by the bom. * @param encoding the charset/encoding to use to convert the response content into a string * @param ignoreUtf8Bom if true utf8 bom header will be ignored * @return the response content as a string or null if the content retrieval was failing */ public String getContentAsString(final Charset encoding, final boolean ignoreUtf8Bom) { if (responseData_ != null) { try (InputStream in = responseData_.getInputStream()) { if (in != null) { try (BOMInputStream bomIn = new BOMInputStream(in, BOM_HEADERS)) { // there seems to be a bug in BOMInputStream // we have to call this before hasBOM(ByteOrderMark) if (bomIn.hasBOM()) { if (!ignoreUtf8Bom && bomIn.hasBOM(ByteOrderMark.UTF_8)) { return IOUtils.toString(bomIn, UTF_8); } if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) { return IOUtils.toString(bomIn, UTF_16BE); } if (bomIn.hasBOM(ByteOrderMark.UTF_16LE)) { return IOUtils.toString(bomIn, UTF_16LE); } } return IOUtils.toString(bomIn, encoding); } } } catch (final IOException e) { LOG.warn(e); } } return null; }
Example #29
Source File: BOMInputStream.java From aion-germany with GNU General Public License v3.0 | 5 votes |
/** * Constructs a new BOM InputStream that detects the specified BOMs and optionally includes them. * * @param delegate * the InputStream to delegate to * @param include * true to include the specified BOMs or false to exclude them * @param boms * The BOMs to detect and optionally exclude */ public BOMInputStream(InputStream delegate, boolean include, ByteOrderMark... boms) { super(delegate); if (boms == null || boms.length == 0) { throw new IllegalArgumentException("No BOMs specified"); } this.include = include; // Sort the BOMs to match the longest BOM first because some BOMs have the same starting two bytes. Arrays.sort(boms, ByteOrderMarkLengthComparator); this.boms = Arrays.asList(boms); }
Example #30
Source File: BOMInputStream.java From aion-germany with GNU General Public License v3.0 | 5 votes |
/** * Return the BOM (Byte Order Mark). * * @return The BOM or null if none * @throws IOException * if an error reading the first bytes of the stream occurs */ public ByteOrderMark getBOM() throws IOException { if (firstBytes == null) { fbLength = 0; // BOMs are sorted from longest to shortest final int maxBomSize = boms.get(0).length(); firstBytes = new int[maxBomSize]; // Read first maxBomSize bytes for (int i = 0; i < firstBytes.length; i++) { firstBytes[i] = in.read(); fbLength++; if (firstBytes[i] < 0) { break; } } // match BOM in firstBytes byteOrderMark = find(); if (byteOrderMark != null) { if (!include) { if (byteOrderMark.length() < firstBytes.length) { fbIndex = byteOrderMark.length(); } else { fbLength = 0; } } } } return byteOrderMark; }