Java Code Examples for org.mozilla.universalchardet.UniversalDetector#isDone()
The following examples show how to use
org.mozilla.universalchardet.UniversalDetector#isDone() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CrawlUtils.java From Asqatasun with GNU Affero General Public License v3.0 | 7 votes |
/** * This method extracts the charset from the html source code. * If the charset is not specified, it is set to UTF-8 by default * @param is * @return */ public static String extractCharset(InputStream is) throws java.io.IOException { byte[] buf = new byte[4096]; UniversalDetector detector = new UniversalDetector(null); int nread; while ((nread = is.read(buf)) > 0 && !detector.isDone()) { detector.handleData(buf, 0, nread); } detector.dataEnd(); String encoding = detector.getDetectedCharset(); if (encoding != null) { LOGGER.debug("Detected encoding = " + encoding); } else { LOGGER.debug("No encoding detected."); } detector.reset(); if (encoding != null && CrawlUtils.isValidCharset(encoding)) { return encoding; } else { return DEFAULT_CHARSET; } }
Example 2
Source File: CsvImporter.java From fingen with Apache License 2.0 | 6 votes |
private String detectCharset() throws IOException { String result = "UTF-8"; UniversalDetector detector = new UniversalDetector(null); byte[] buf = new byte[4096]; try { FileInputStream fis = new FileInputStream(mFileName); int nread; while ((nread = fis.read(buf)) > 0 && !detector.isDone()) { detector.handleData(buf, 0, nread); } fis.close(); detector.dataEnd(); String encoding = detector.getDetectedCharset(); if (encoding != null) { result = encoding; } detector.reset(); } catch (Exception e) { e.printStackTrace(); } return result; }
Example 3
Source File: UniversalEncodingDetector.java From onedev with MIT License | 6 votes |
public static boolean isBinary(InputStream in) throws IOException { byte[] buf = new byte[4]; in.mark(5); int len = in.read(buf); in.reset(); UniversalDetector detector = new UniversalDetector(null); detector.handleData(buf, 0, len); if (detector.isDone()) { return false; } //Not UTF check ASCII text in.mark(LOOKAHEAD); len = 0; int b; while ((b = in.read()) != -1 && len < (LOOKAHEAD - 192)) { len++; if (b == 0) { in.reset(); return true; } } in.reset(); return false; }
Example 4
Source File: Charset.java From dualsub with GNU General Public License v3.0 | 6 votes |
public static String detect(InputStream inputStream) throws IOException { UniversalDetector detector = Charset.getSingleton() .getCharsetDetector(); byte[] buf = new byte[4096]; int nread; while ((nread = inputStream.read(buf)) > 0 && !detector.isDone()) { detector.handleData(buf, 0, nread); } detector.dataEnd(); String encoding = detector.getDetectedCharset(); detector.reset(); inputStream.close(); if (encoding == null) { // If none encoding is detected, we assume UTF-8 encoding = UTF8; } return encoding; }
Example 5
Source File: TaskIo.java From jdotxt with GNU General Public License v3.0 | 6 votes |
private static String detectEncoding(File file) throws IOException { byte[] buf = new byte[4096]; FileInputStream fis = new FileInputStream(file); UniversalDetector detector = new UniversalDetector(null); int nread; while ((nread = fis.read(buf)) > 0 && !detector.isDone()) detector.handleData(buf, 0, nread); Util.closeStream(fis); detector.dataEnd(); String encoding = detector.getDetectedCharset(); if (encoding == null) encoding = DEFAULT_ENCODING; return encoding; }
Example 6
Source File: LyricView.java From MusicPlayer_XiangDa with GNU General Public License v3.0 | 5 votes |
public void setLyricFile(File file) { if (file == null || !file.exists()) { reset(); mCurrentLyricFilePath = ""; return; } else if (file.getPath().equals(mCurrentLyricFilePath)) { return; } else { mCurrentLyricFilePath = file.getPath(); reset(); } try { FileInputStream fis = new FileInputStream(file); byte[] buf = new byte[1024]; UniversalDetector detector = new UniversalDetector(null); int nread; while ((nread = fis.read(buf)) > 0 && !detector.isDone()) { detector.handleData(buf, 0, nread); } detector.dataEnd(); String encoding = detector.getDetectedCharset(); if (encoding != null) { setLyricFile(file, encoding); } else { setLyricFile(file, "UTF-8"); } detector.reset(); fis.close(); } catch (IOException e) { e.printStackTrace(); } }
Example 7
Source File: LyricView.java From RetroMusicPlayer with GNU General Public License v3.0 | 5 votes |
public void setLyricFile(File file) { if (file == null || !file.exists()) { reset(); mCurrentLyricFilePath = ""; return; } else if (file.getPath().equals(mCurrentLyricFilePath)) { return; } else { mCurrentLyricFilePath = file.getPath(); reset(); } try { FileInputStream fis = new FileInputStream(file); byte[] buf = new byte[1024]; UniversalDetector detector = new UniversalDetector(null); int nread; while ((nread = fis.read(buf)) > 0 && !detector.isDone()) { detector.handleData(buf, 0, nread); } detector.dataEnd(); String encoding = detector.getDetectedCharset(); if (encoding != null) { setLyricFile(file, encoding); } else { setLyricFile(file, "UTF-8"); } detector.reset(); fis.close(); } catch (IOException e) { e.printStackTrace(); } }
Example 8
Source File: LocalDocReader.java From TranskribusCore with GNU General Public License v3.0 | 5 votes |
public static String readTextFromFile(File txtFile) throws IOException { byte[] buf = new byte[4096]; java.io.FileInputStream fis = new FileInputStream(txtFile); // (1) UniversalDetector detector = new UniversalDetector(null); // (2) int nread; while ((nread = fis.read(buf)) > 0 && !detector.isDone()) { detector.handleData(buf, 0, nread); } fis.close(); // (3) detector.dataEnd(); // (4) String encoding = detector.getDetectedCharset(); if (encoding != null) { logger.debug("Detected encoding = " + encoding); } else { logger.debug("No encoding detected - use utf-8"); encoding = "utf-8"; } // (5) detector.reset(); String text = FileUtils.readFileToString(txtFile, encoding); //String text = FileUtils.readFileToString(txtFile, "ISO-8859-1"); //logger.debug("text = "+text); return text; }
Example 9
Source File: FileInfoReader.java From editorconfig-netbeans with MIT License | 5 votes |
protected static Charset guessCharset(FileObject fo) { Charset charset = StandardCharsets.UTF_8; byte[] buf = new byte[4096]; try (InputStream is = fo.getInputStream()) { UniversalDetector detector = new UniversalDetector(null); int nread; while ((nread = is.read(buf)) > 0 && !detector.isDone()) { detector.handleData(buf, 0, nread); } detector.dataEnd(); String encoding = detector.getDetectedCharset(); if (encoding == null) { encoding = "ISO-8859-1"; } detector.reset(); charset = Charset.forName(encoding); } catch (IllegalArgumentException | IOException ex) { Exceptions.printStackTrace(ex); } return charset; }
Example 10
Source File: Utils.java From Man-Man with GNU General Public License v3.0 | 5 votes |
public static String detectEncodingOfArchive(File gzipped) throws IOException { FileInputStream fis = new FileInputStream(gzipped); GZIPInputStream gis = new GZIPInputStream(fis); byte[] buf = new byte[4096]; UniversalDetector detector = new UniversalDetector(null); int read; while ((read = gis.read(buf)) > 0 && !detector.isDone()) { detector.handleData(buf, 0, read); } detector.dataEnd(); gis.close(); return detector.getDetectedCharset(); }
Example 11
Source File: FileStorable.java From Readily with MIT License | 5 votes |
public static String guessCharset(InputStream is) throws IOException{ UniversalDetector detector = new UniversalDetector(null); byte[] buf = new byte[Constants.ENCODING_HELPER_BUFFER_SIZE]; int nread; while ((nread = is.read(buf)) > 0 && !detector.isDone()) { detector.handleData(buf, 0, nread); } detector.dataEnd(); String encoding = detector.getDetectedCharset(); detector.reset(); if (encoding != null) return encoding; return Constants.DEFAULT_ENCODING; }