com.optimaize.langdetect.LanguageDetector Java Examples

The following examples show how to use com.optimaize.langdetect.LanguageDetector. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Translator.java    From KaellyBot with GNU General Public License v3.0 6 votes vote down vote up
private static LanguageDetector getLanguageDetector(){
    if (languageDetector == null){
        try {
            List<String> languages = new ArrayList<>();
            for(Language lg : Language.values())
                languages.add(lg.getAbrev().toLowerCase());

            List<LanguageProfile> languageProfiles = new LanguageProfileReader().read(languages);
            languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
                            .withProfiles(languageProfiles).build();
        }
        catch (IOException e) {
            LOG.error("Translator.getLanguageDetector", e);
        }
    }
    return languageDetector;
}
 
Example #2
Source File: AbstractOptimaizeFilter.java    From modernmt with Apache License 2.0 6 votes vote down vote up
protected final LanguageDetector getLanguageDetector() {
    if (detectorInstance == null) {
        synchronized (AbstractOptimaizeFilter.class) {
            if (detectorInstance == null) {
                try {
                    detectorInstance = LanguageDetectorBuilder.create(NgramExtractors.standard())
                            .shortTextAlgorithm(0)
                            .withProfiles(new LanguageProfileReader().readAllBuiltIn())
                            .build();
                } catch (IOException e) {
                    throw new RuntimeIOException(e);
                }
            }
        }
    }

    return detectorInstance;
}
 
Example #3
Source File: AbstractOptimaizeFilter.java    From modernmt with Apache License 2.0 6 votes vote down vote up
protected String guessLanguage(CharSequence text, boolean largeText, float minProbability) {
    LanguageDetector detector = getLanguageDetector();

    TextObjectFactory factory;
    if (largeText) {
        factory = CommonTextObjectFactories.forDetectingOnLargeText();
    } else {
        factory = new TextObjectFactoryBuilder()
                .withTextFilter(UrlTextFilter.getInstance())
                .build();
    }

    TextObject textObject = factory.create().append(text);
    List<DetectedLanguage> languages = detector.getProbabilities(textObject);

    if (languages.size() < 1)
        return null;

    DetectedLanguage lang = languages.get(0);
    if (lang.getProbability() < minProbability && languages.size() > 1)
        return null;

    return lang.getLocale().getLanguage();
}
 
Example #4
Source File: CommandLineInterface.java    From jstarcraft-nlp with Apache License 2.0 5 votes vote down vote up
/**
 * Language detection test for each file (--detectlang option)
 * 
 * <pre>
 * usage: --detectlang -d [profile directory] -a [alpha] -s [seed] [test file(s)]
 * </pre>
 * 
 */
public void detectLang() throws IOException {
    LanguageDetector languageDetector = makeDetector();
    TextObjectFactory textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();

    for (String filename : arglist) {
        try (BufferedReader is = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "utf-8"))) {
            TextObject textObject = textObjectFactory.create().append(is);
            List<DetectedLanguage> probabilities = languageDetector.getProbabilities(textObject);
            System.out.println(filename + ":" + probabilities);
        }
    }
}
 
Example #5
Source File: CommandLineInterface.java    From jstarcraft-nlp with Apache License 2.0 5 votes vote down vote up
/**
 * Using all language profiles from the given directory.
 */
private LanguageDetector makeDetector() throws IOException {
    double alpha = getParamDouble("alpha", DEFAULT_ALPHA);
    String profileDirectory = requireParamString("directory") + "/";
    Optional<Long> seed = Optional.fromNullable(getParamLongOrNull("seed"));

    List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAll(new File(profileDirectory));

    return LanguageDetectorBuilder.create(NgramExtractors.standard()).alpha(alpha).seed(seed).shortTextAlgorithm(50).withProfiles(languageProfiles).build();
}
 
Example #6
Source File: CommandLineInterface.java    From language-detector with Apache License 2.0 5 votes vote down vote up
/**
 * Language detection test for each file (--detectlang option)
 * 
 * <pre>
 * usage: --detectlang -d [profile directory] -a [alpha] -s [seed] [test file(s)]
 * </pre>
 * 
 */
public void detectLang() throws IOException {
    LanguageDetector languageDetector = makeDetector();
    TextObjectFactory textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();

    for (String filename: arglist) {
        try (BufferedReader is = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "utf-8"))) {
            TextObject textObject = textObjectFactory.create().append(is);
            List<DetectedLanguage> probabilities = languageDetector.getProbabilities(textObject);
            System.out.println(filename + ":" + probabilities);
        }
    }
}
 
Example #7
Source File: CommandLineInterface.java    From language-detector with Apache License 2.0 5 votes vote down vote up
/**
 * Using all language profiles from the given directory.
 */
private LanguageDetector makeDetector() throws IOException {
    double alpha = getParamDouble("alpha", DEFAULT_ALPHA);
    String profileDirectory = requireParamString("directory") + "/";
    Optional<Long> seed = Optional.fromNullable(getParamLongOrNull("seed"));

    List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAll(new File(profileDirectory));

    return LanguageDetectorBuilder.create(NgramExtractors.standard())
            .alpha(alpha)
            .seed(seed)
            .shortTextAlgorithm(50)
            .withProfiles(languageProfiles)
            .build();
}
 
Example #8
Source File: VideoBlocker.java    From SkyTube with GNU General Public License v3.0 4 votes vote down vote up
LanguageDetector getLanguageDetector() {
	return languageDetector;
}