com.optimaize.langdetect.profiles.LanguageProfileReader Java Examples
The following examples show how to use
com.optimaize.langdetect.profiles.LanguageProfileReader.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OptimaizeDetector.java From vespa with Apache License 2.0 | 6 votes |
static private void initOptimaize() { synchronized (initGuard) { if ((textObjectFactory != null) && (languageDetector != null)) return; // origin: https://github.com/optimaize/language-detector // load all languages: List<LanguageProfile> languageProfiles; try { languageProfiles = new LanguageProfileReader().readAllBuiltIn(); } catch (IOException e) { throw new UncheckedIOException(e); } //build language detector: languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()) .withProfiles(languageProfiles) .build(); //create a text object factory textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText(); } }
Example #2
Source File: Translator.java From KaellyBot with GNU General Public License v3.0 | 6 votes |
private static LanguageDetector getLanguageDetector(){ if (languageDetector == null){ try { List<String> languages = new ArrayList<>(); for(Language lg : Language.values()) languages.add(lg.getAbrev().toLowerCase()); List<LanguageProfile> languageProfiles = new LanguageProfileReader().read(languages); languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()) .withProfiles(languageProfiles).build(); } catch (IOException e) { LOG.error("Translator.getLanguageDetector", e); } } return languageDetector; }
Example #3
Source File: AbstractOptimaizeFilter.java From modernmt with Apache License 2.0 | 6 votes |
protected final LanguageDetector getLanguageDetector() { if (detectorInstance == null) { synchronized (AbstractOptimaizeFilter.class) { if (detectorInstance == null) { try { detectorInstance = LanguageDetectorBuilder.create(NgramExtractors.standard()) .shortTextAlgorithm(0) .withProfiles(new LanguageProfileReader().readAllBuiltIn()) .build(); } catch (IOException e) { throw new RuntimeIOException(e); } } } } return detectorInstance; }
Example #4
Source File: CommandLineInterface.java From jstarcraft-nlp with Apache License 2.0 | 5 votes |
/** * Using all language profiles from the given directory. */ private LanguageDetector makeDetector() throws IOException { double alpha = getParamDouble("alpha", DEFAULT_ALPHA); String profileDirectory = requireParamString("directory") + "/"; Optional<Long> seed = Optional.fromNullable(getParamLongOrNull("seed")); List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAll(new File(profileDirectory)); return LanguageDetectorBuilder.create(NgramExtractors.standard()).alpha(alpha).seed(seed).shortTextAlgorithm(50).withProfiles(languageProfiles).build(); }
Example #5
Source File: LanguageCheckerImpl.java From translator with MIT License | 5 votes |
public LanguageCheckerImpl() { //build language detector: try { languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()) .withProfiles(new LanguageProfileReader().readAllBuiltIn()) //load all languages: .build(); } catch (IOException e) { log.error("LanguageCheckerImpl {}", e.getMessage()); } //create a text object factory textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText(); }
Example #6
Source File: NonEnglishCheckerService.java From Natty with GNU General Public License v3.0 | 5 votes |
public NonEnglishCheckerService() { try { languageProfiles = new LanguageProfileReader().readAllBuiltIn(); optimaizeDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()) .withProfiles(languageProfiles) .build(); textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText(); tikaDetector = new OptimaizeLangDetector().loadModels(); writer = new LanguageWriter(tikaDetector); } catch (IOException e) { e.printStackTrace(); } }
Example #7
Source File: VideoBlocker.java From SkyTube with GNU General Public License v3.0 | 5 votes |
private LanguageDetectionSingleton() throws IOException { // load all languages List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn(); // build language detector languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()) .withProfiles(languageProfiles) .build(); // create a text object factory textObjectFactory = CommonTextObjectFactories.forDetectingShortCleanText(); }
Example #8
Source File: DocumentLanguage.java From baleen with Apache License 2.0 | 5 votes |
@Override public void doInitialize(UimaContext aContext) throws ResourceInitializationException { try { List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn(); languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()) .withProfiles(languageProfiles) .build(); textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText(); } catch (IOException ioe) { throw new ResourceInitializationException(ioe); } }
Example #9
Source File: CommandLineInterface.java From language-detector with Apache License 2.0 | 5 votes |
/** * Using all language profiles from the given directory. */ private LanguageDetector makeDetector() throws IOException { double alpha = getParamDouble("alpha", DEFAULT_ALPHA); String profileDirectory = requireParamString("directory") + "/"; Optional<Long> seed = Optional.fromNullable(getParamLongOrNull("seed")); List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAll(new File(profileDirectory)); return LanguageDetectorBuilder.create(NgramExtractors.standard()) .alpha(alpha) .seed(seed) .shortTextAlgorithm(50) .withProfiles(languageProfiles) .build(); }
Example #10
Source File: DataLanguageDetectorImplTest.java From language-detector with Apache License 2.0 | 5 votes |
public DataLanguageDetectorImplTest() throws IOException { List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn(); shortDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()) .shortTextAlgorithm(100) .withProfiles(languageProfiles) .build(); longDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()) .shortTextAlgorithm(0) .withProfiles(new LanguageProfileReader().readAllBuiltIn()) .build(); }
Example #11
Source File: LanguageProfileValidator.java From jstarcraft-nlp with Apache License 2.0 | 4 votes |
/** * Adds all {@link LanguageProfile}s that are available when calling {@link LanguageProfileReader#readAllBuiltIn()}. */ public LanguageProfileValidator loadAllBuiltInLanguageProfiles() throws IOException { this.languageProfiles.addAll(new LanguageProfileReader().readAllBuiltIn()); return this; }
Example #12
Source File: NgramFrequencyDataTest.java From jstarcraft-nlp with Apache License 2.0 | 4 votes |
private static NgramFrequencyData forAll(int gramSize) throws IOException { List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn(); return NgramFrequencyData.create(languageProfiles, ImmutableSet.of(gramSize)); }
Example #13
Source File: OptimaizeLanguageGuesser.java From datashare with GNU Affero General Public License v3.0 | 4 votes |
public OptimaizeLanguageGuesser() throws IOException { this.languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()) .withProfiles(new LanguageProfileReader().readAllBuiltIn()) .build(); }
Example #14
Source File: LanguageProfileValidator.java From language-detector with Apache License 2.0 | 4 votes |
/** * Adds all {@link LanguageProfile}s that are available when calling {@link LanguageProfileReader#readAllBuiltIn()}. */ public LanguageProfileValidator loadAllBuiltInLanguageProfiles() throws IOException { this.languageProfiles.addAll(new LanguageProfileReader().readAllBuiltIn()); return this; }
Example #15
Source File: NgramFrequencyDataTest.java From language-detector with Apache License 2.0 | 4 votes |
private static NgramFrequencyData forAll(int gramSize) throws IOException { List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn(); return NgramFrequencyData.create(languageProfiles, ImmutableSet.of(gramSize)); }
Example #16
Source File: DataLanguageDetectorImplTest.java From jstarcraft-nlp with Apache License 2.0 | 3 votes |
public DataLanguageDetectorImplTest() throws IOException { List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn(); shortDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()).shortTextAlgorithm(100).withProfiles(languageProfiles).build(); longDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()).shortTextAlgorithm(0).withProfiles(new LanguageProfileReader().readAllBuiltIn()).build(); }