opennlp.tools.util.ObjectStream Java Examples
The following examples show how to use
opennlp.tools.util.ObjectStream.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AbstractTaggerTrainer.java From ixa-pipe-pos with Apache License 2.0 | 6 votes |
/** * Construct an AbstractTrainer. In the params parameter there is information * about the language, the featureset, and whether to use pos tag dictionaries * or automatically created dictionaries from the training set. * * @param params * the training parameters * @throws IOException * the io exceptions */ public AbstractTaggerTrainer(final TrainingParameters params) throws IOException { this.lang = Flags.getLanguage(params); final String trainData = Flags.getDataSet("TrainSet", params); final String testData = Flags.getDataSet("TestSet", params); final ObjectStream<String> trainStream = InputOutputUtils .readFileIntoMarkableStreamFactory(trainData); this.trainSamples = new MorphoSampleStream(trainStream); final ObjectStream<String> testStream = InputOutputUtils .readFileIntoMarkableStreamFactory(testData); this.testSamples = new MorphoSampleStream(testStream); final ObjectStream<String> dictStream = InputOutputUtils .readFileIntoMarkableStreamFactory(trainData); setDictSamples(new MorphoSampleStream(dictStream)); this.dictCutOff = Flags.getAutoDictFeatures(params); this.ngramCutOff = Flags.getNgramDictFeatures(params); }
Example #2
Source File: AbstractTaggerTrainer.java From ixa-pipe-pos with Apache License 2.0 | 6 votes |
/** * Automatically create a tag dictionary from training data. * * @param aDictSamples * the dictSamples created from training data * @param aDictCutOff * the cutoff to create the dictionary */ protected final void createAutomaticDictionary( final ObjectStream<POSSample> aDictSamples, final int aDictCutOff) { if (aDictCutOff != Flags.DEFAULT_DICT_CUTOFF) { try { TagDictionary dict = getPosTaggerFactory().getTagDictionary(); if (dict == null) { dict = getPosTaggerFactory().createEmptyTagDictionary(); getPosTaggerFactory().setTagDictionary(dict); } if (dict instanceof MutableTagDictionary) { POSTaggerME.populatePOSDictionary(aDictSamples, (MutableTagDictionary) dict, aDictCutOff); } else { throw new IllegalArgumentException("Can't extend a POSDictionary" + " that does not implement MutableTagDictionary."); } this.dictSamples.reset(); } catch (final IOException e) { throw new TerminateToolException(-1, "IO error while creating/extending POS Dictionary: " + e.getMessage(), e); } } }
Example #3
Source File: AbstractTaggerTrainer.java From ixa-pipe-pos with Apache License 2.0 | 6 votes |
/** * Create ngram dictionary from training data. * * @param aDictSamples * the training data * @param aNgramCutoff * the cutoff * @return ngram dictionary */ protected final Dictionary createNgramDictionary( final ObjectStream<POSSample> aDictSamples, final int aNgramCutoff) { Dictionary ngramDict = null; if (aNgramCutoff != Flags.DEFAULT_DICT_CUTOFF) { System.err.print("Building ngram dictionary ... "); try { ngramDict = POSTaggerME .buildNGramDictionary(aDictSamples, aNgramCutoff); this.dictSamples.reset(); } catch (final IOException e) { throw new TerminateToolException(-1, "IO error while building NGram Dictionary: " + e.getMessage(), e); } System.err.println("done"); } return ngramDict; }
Example #4
Source File: LanguageDetectorAndTrainingDataUnitTest.java From tutorials with MIT License | 6 votes |
@Test public void givenLanguageDictionary_whenLanguageDetect_thenLanguageIsDetected() throws FileNotFoundException, IOException { InputStreamFactory dataIn = new MarkableFileInputStreamFactory(new File("src/main/resources/models/DoccatSample.txt")); ObjectStream lineStream = new PlainTextByLineStream(dataIn, "UTF-8"); LanguageDetectorSampleStream sampleStream = new LanguageDetectorSampleStream(lineStream); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 100); params.put(TrainingParameters.CUTOFF_PARAM, 5); params.put("DataIndexer", "TwoPass"); params.put(TrainingParameters.ALGORITHM_PARAM, "NAIVEBAYES"); LanguageDetectorModel model = LanguageDetectorME.train(sampleStream, params, new LanguageDetectorFactory()); LanguageDetector ld = new LanguageDetectorME(model); Language[] languages = ld.predictLanguages("estava em uma marcenaria na Rua Bruno"); assertThat(Arrays.asList(languages)).extracting("lang", "confidence").contains(tuple("pob", 0.9999999950605625), tuple("ita", 4.939427661577956E-9), tuple("spa", 9.665954064665144E-15), tuple("fra", 8.250349924885834E-25)); }
Example #5
Source File: Chapter4.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License | 5 votes |
private static void trainingOpenNLPNERModel() { try (OutputStream modelOutputStream = new BufferedOutputStream( new FileOutputStream(new File("modelFile")));) { ObjectStream<String> lineStream = new PlainTextByLineStream( new FileInputStream("en-ner-person.train"), "UTF-8"); ObjectStream<NameSample> sampleStream = new NameSampleDataStream(lineStream); TokenNameFinderModel model = NameFinderME.train("en", "person", sampleStream, null, 100, 5); model.serialize(modelOutputStream); } catch (IOException ex) { ex.printStackTrace(); } }
Example #6
Source File: POSCrossValidator.java From ixa-pipe-pos with Apache License 2.0 | 5 votes |
/** * Construct a CrossValidator. In the params parameter there is information * about the language, the featureset, and whether to use pos tag dictionaries * or automatically created dictionaries from the training set. * * @param params * the training parameters * @throws IOException * the io exceptions */ public POSCrossValidator(final TrainingParameters params) throws IOException { this.lang = Flags.getLanguage(params); final String trainData = Flags.getDataSet("TrainSet", params); final ObjectStream<String> trainStream = InputOutputUtils .readFileIntoMarkableStreamFactory(trainData); this.trainSamples = new WordTagSampleStream(trainStream); this.dictCutOff = Flags.getAutoDictFeatures(params); this.folds = Flags.getFolds(params); createPOSFactory(params); getEvalListeners(params); }
Example #7
Source File: IntentDocumentSampleStream.java From org.openhab.ui.habot with Eclipse Public License 1.0 | 4 votes |
public IntentDocumentSampleStream(String category, ObjectStream<String> stream) { this.category = category; this.stream = stream; }
Example #8
Source File: IntentDocumentSampleStream.java From nlp-intent-toolkit with The Unlicense | 4 votes |
public IntentDocumentSampleStream(String category, ObjectStream<String> stream) { this.category = category; this.stream = stream; }
Example #9
Source File: LemmaSampleSequenceStream.java From ixa-pipe-pos with Apache License 2.0 | 4 votes |
public LemmaSampleSequenceStream(ObjectStream<LemmaSample> samples, LemmatizerContextGenerator contextGenerator) { this.samples = samples; this.contextGenerator = contextGenerator; }
Example #10
Source File: LemmaSampleStream.java From ixa-pipe-pos with Apache License 2.0 | 4 votes |
public LemmaSampleStream(ObjectStream<String> samples) { super(samples); }
Example #11
Source File: MorphoSampleStream.java From ixa-pipe-pos with Apache License 2.0 | 4 votes |
public MorphoSampleStream(ObjectStream<String> samples) { super(samples); }
Example #12
Source File: AbstractLemmatizerTrainer.java From ixa-pipe-pos with Apache License 2.0 | 3 votes |
/** * Construct an AbstractTrainer. In the params parameter there is information * about the language, the featureset, and whether to use pos tag dictionaries * or automatically created dictionaries from the training set. * * @param params * the training parameters * @throws IOException * the io exceptions */ public AbstractLemmatizerTrainer(final TrainingParameters params) throws IOException { this.lang = Flags.getLanguage(params); final String trainData = Flags.getDataSet("TrainSet", params); final String testData = Flags.getDataSet("TestSet", params); final ObjectStream<String> trainStream = InputOutputUtils.readFileIntoMarkableStreamFactory(trainData); this.trainSamples = new LemmaSampleStream(trainStream); final ObjectStream<String> testStream = InputOutputUtils.readFileIntoMarkableStreamFactory(testData); this.testSamples = new LemmaSampleStream(testStream); }
Example #13
Source File: LemmaSampleEventStream.java From ixa-pipe-pos with Apache License 2.0 | 2 votes |
/** * Creates a new event stream based on the specified data stream using the specified context generator. * @param d The data stream for this event stream. * @param cg The context generator which should be used in the creation of events for this event stream. */ public LemmaSampleEventStream(ObjectStream<LemmaSample> d, LemmatizerContextGenerator cg) { super(d); this.contextGenerator = cg; }