com.optimaize.langdetect.LanguageDetector Java Examples
The following examples show how to use
com.optimaize.langdetect.LanguageDetector.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Translator.java From KaellyBot with GNU General Public License v3.0 | 6 votes |
private static LanguageDetector getLanguageDetector(){ if (languageDetector == null){ try { List<String> languages = new ArrayList<>(); for(Language lg : Language.values()) languages.add(lg.getAbrev().toLowerCase()); List<LanguageProfile> languageProfiles = new LanguageProfileReader().read(languages); languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()) .withProfiles(languageProfiles).build(); } catch (IOException e) { LOG.error("Translator.getLanguageDetector", e); } } return languageDetector; }
Example #2
Source File: AbstractOptimaizeFilter.java From modernmt with Apache License 2.0 | 6 votes |
protected final LanguageDetector getLanguageDetector() { if (detectorInstance == null) { synchronized (AbstractOptimaizeFilter.class) { if (detectorInstance == null) { try { detectorInstance = LanguageDetectorBuilder.create(NgramExtractors.standard()) .shortTextAlgorithm(0) .withProfiles(new LanguageProfileReader().readAllBuiltIn()) .build(); } catch (IOException e) { throw new RuntimeIOException(e); } } } } return detectorInstance; }
Example #3
Source File: AbstractOptimaizeFilter.java From modernmt with Apache License 2.0 | 6 votes |
protected String guessLanguage(CharSequence text, boolean largeText, float minProbability) { LanguageDetector detector = getLanguageDetector(); TextObjectFactory factory; if (largeText) { factory = CommonTextObjectFactories.forDetectingOnLargeText(); } else { factory = new TextObjectFactoryBuilder() .withTextFilter(UrlTextFilter.getInstance()) .build(); } TextObject textObject = factory.create().append(text); List<DetectedLanguage> languages = detector.getProbabilities(textObject); if (languages.size() < 1) return null; DetectedLanguage lang = languages.get(0); if (lang.getProbability() < minProbability && languages.size() > 1) return null; return lang.getLocale().getLanguage(); }
Example #4
Source File: CommandLineInterface.java From jstarcraft-nlp with Apache License 2.0 | 5 votes |
/** * Language detection test for each file (--detectlang option) * * <pre> * usage: --detectlang -d [profile directory] -a [alpha] -s [seed] [test file(s)] * </pre> * */ public void detectLang() throws IOException { LanguageDetector languageDetector = makeDetector(); TextObjectFactory textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText(); for (String filename : arglist) { try (BufferedReader is = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "utf-8"))) { TextObject textObject = textObjectFactory.create().append(is); List<DetectedLanguage> probabilities = languageDetector.getProbabilities(textObject); System.out.println(filename + ":" + probabilities); } } }
Example #5
Source File: CommandLineInterface.java From jstarcraft-nlp with Apache License 2.0 | 5 votes |
/** * Using all language profiles from the given directory. */ private LanguageDetector makeDetector() throws IOException { double alpha = getParamDouble("alpha", DEFAULT_ALPHA); String profileDirectory = requireParamString("directory") + "/"; Optional<Long> seed = Optional.fromNullable(getParamLongOrNull("seed")); List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAll(new File(profileDirectory)); return LanguageDetectorBuilder.create(NgramExtractors.standard()).alpha(alpha).seed(seed).shortTextAlgorithm(50).withProfiles(languageProfiles).build(); }
Example #6
Source File: CommandLineInterface.java From language-detector with Apache License 2.0 | 5 votes |
/** * Language detection test for each file (--detectlang option) * * <pre> * usage: --detectlang -d [profile directory] -a [alpha] -s [seed] [test file(s)] * </pre> * */ public void detectLang() throws IOException { LanguageDetector languageDetector = makeDetector(); TextObjectFactory textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText(); for (String filename: arglist) { try (BufferedReader is = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "utf-8"))) { TextObject textObject = textObjectFactory.create().append(is); List<DetectedLanguage> probabilities = languageDetector.getProbabilities(textObject); System.out.println(filename + ":" + probabilities); } } }
Example #7
Source File: CommandLineInterface.java From language-detector with Apache License 2.0 | 5 votes |
/** * Using all language profiles from the given directory. */ private LanguageDetector makeDetector() throws IOException { double alpha = getParamDouble("alpha", DEFAULT_ALPHA); String profileDirectory = requireParamString("directory") + "/"; Optional<Long> seed = Optional.fromNullable(getParamLongOrNull("seed")); List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAll(new File(profileDirectory)); return LanguageDetectorBuilder.create(NgramExtractors.standard()) .alpha(alpha) .seed(seed) .shortTextAlgorithm(50) .withProfiles(languageProfiles) .build(); }
Example #8
Source File: VideoBlocker.java From SkyTube with GNU General Public License v3.0 | 4 votes |
LanguageDetector getLanguageDetector() { return languageDetector; }