cc.mallet.classify.Classifier Java Examples
The following examples show how to use
cc.mallet.classify.Classifier.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MalletClassifierTrainerTest.java From baleen with Apache License 2.0 | 6 votes |
private void validateModel() { File modelFile = modelPath.toFile(); assertTrue(modelFile.exists()); Classifier classifier = new FileObject<Classifier>(modelFile.getPath()).object(); assertTrue(classifier.getLabelAlphabet().contains("pos")); assertTrue(classifier.getLabelAlphabet().contains("neg")); Pipe pipe = classifier.getInstancePipe(); InstanceList instanceList = new InstanceList(pipe); instanceList.addThruPipe( new Instance("I love this amazing awesome classifier.", "", null, null)); instanceList.addThruPipe(new Instance("I can't stand this horrible test.", "", null, null)); ImmutableSet<String> labels = ImmutableSet.of("pos", "neg"); assertTrue( labels.contains( classifier.classify(instanceList.get(0)).getLabeling().getBestLabel().toString())); assertTrue( labels.contains( classifier.classify(instanceList.get(1)).getLabeling().getBestLabel().toString())); }
Example #2
Source File: MaxEntClassifierTrainerTest.java From baleen with Apache License 2.0 | 6 votes |
@Test public void testTaskProducesValidModelFile() throws Exception { File modelFile = modelPath.toFile(); assertTrue(modelFile.exists()); Classifier classifier = new FileObject<Classifier>(modelFile.getPath()).object(); assertTrue(classifier.getLabelAlphabet().contains("pos")); assertTrue(classifier.getLabelAlphabet().contains("neg")); Pipe pipe = classifier.getInstancePipe(); InstanceList instanceList = new InstanceList(pipe); instanceList.addThruPipe( new Instance("I love this amazing awesome classifier.", null, null, null)); instanceList.addThruPipe(new Instance("I can't stand this horrible test.", null, null, null)); assertEquals( "pos", classifier.classify(instanceList.get(0)).getLabeling().getBestLabel().toString()); assertEquals( "neg", classifier.classify(instanceList.get(1)).getLabeling().getBestLabel().toString()); }
Example #3
Source File: ReferencesClassifierAnnotator.java From bluima with Apache License 2.0 | 6 votes |
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); try { // load model for inference File modelfile = new File(ReferencesHelper.REFERENCES_RESOURCES + "models/" + modelName); checkArgument(modelfile.exists(), "no modelFile at " + modelName); ObjectInputStream s = new ObjectInputStream(new FileInputStream( modelfile)); classifier = (Classifier) s.readObject(); s.close(); checkArgument(classifier != null); pipes = classifier.getInstancePipe(); } catch (Exception e) { throw new ResourceInitializationException(e); } }
Example #4
Source File: EngineMBMalletClass.java From gateplugin-LearningFramework with GNU Lesser General Public License v2.1 | 5 votes |
@Override public List<ModelApplication> applyModel( AnnotationSet instanceAS, AnnotationSet inputAS, AnnotationSet sequenceAS, String parms) { // NOTE: the crm should be of type CorpusRepresentationMalletClass for this to work! if(!(corpusRepresentation instanceof CorpusRepresentationMalletTarget)) { throw new GateRuntimeException("Cannot perform classification with data from "+corpusRepresentation.getClass()); } CorpusRepresentationMalletTarget data = (CorpusRepresentationMalletTarget)corpusRepresentation; data.stopGrowth(); List<ModelApplication> gcs = new ArrayList<>(); LFPipe pipe = (LFPipe)data.getRepresentationMallet().getPipe(); Classifier classifier = (Classifier)model; // iterate over the instance annotations and create mallet instances for(Annotation instAnn : instanceAS.inDocumentOrder()) { Instance inst = data.extractIndependentFeatures(instAnn, inputAS); inst = pipe.instanceFrom(inst); Classification classification = classifier.classify(inst); Labeling labeling = classification.getLabeling(); LabelVector labelvec = labeling.toLabelVector(); List<String> classes = new ArrayList<>(labelvec.numLocations()); List<Double> confidences = new ArrayList<>(labelvec.numLocations()); for(int i=0; i<labelvec.numLocations(); i++) { classes.add(labelvec.getLabelAtRank(i).toString()); confidences.add(labelvec.getValueAtRank(i)); } ModelApplication gc = new ModelApplication(instAnn, labeling.getBestLabel().toString(), labeling.getBestValue(), classes, confidences); //System.err.println("ADDING GC "+gc); // now save the class in our special class feature on the instance as well instAnn.getFeatures().put("gate.LF.target",labeling.getBestLabel().toString()); gcs.add(gc); } data.startGrowth(); return gcs; }
Example #5
Source File: EngineMBTopicsLDA.java From gateplugin-LearningFramework with GNU Lesser General Public License v2.1 | 5 votes |
@Override protected void loadModel(URL directory, String parms) { URL modelFile = newURL(directory, FILENAME_MODEL); Classifier classifier; try (InputStream is = modelFile.openStream(); ObjectInputStream ois = new ObjectInputStream(is)) { ParallelTopicModel ptm = (ParallelTopicModel) ois.readObject(); model=ptm; } catch (IOException | ClassNotFoundException ex) { throw new GateRuntimeException("Could not load Mallet model", ex); } }
Example #6
Source File: ClassifierTrainerFactory.java From baleen with Apache License 2.0 | 5 votes |
/** {@link ClassifierTrainer} according to the specification */ @SuppressWarnings("unchecked") public <T extends Classifier> ClassifierTrainer<T> createTrainer() { String[] fields = trainerDescriptor.split(","); ClassifierTrainer<T> trainer = (ClassifierTrainer<T>) createTrainer(resolveTrainerClassName(fields[0])); setParameterValues(fields, trainer); return trainer; }
Example #7
Source File: MalletClassifierTrainer.java From baleen with Apache License 2.0 | 5 votes |
private void writeClassifierToModelPath(int i, Classifier classifier) { if (modelFile != null) { if (trainerDefinition.length > 1) { new ObjectFile( classifier, modelFile + "_" + i + "_" + classifier.getClass().getSimpleName() + ".mallet") .write(); } else { new ObjectFile(classifier, modelFile).write(); } } }
Example #8
Source File: MalletClassifierTrainer.java From baleen with Apache License 2.0 | 5 votes |
private void logAccuracyMetrics(Classifier classifier, Trial trial) { getMonitor().info("Accuracy: {}", trial.getAccuracy()); for (String label : (String[]) classifier.getLabelAlphabet().toArray(new String[0])) { getMonitor().info("F1 for class '{}': {}", label, trial.getF1(label)); getMonitor().info("Precision for class '{}' : {}", label, trial.getPrecision(label)); } }
Example #9
Source File: MalletClassifierTrainer.java From baleen with Apache License 2.0 | 5 votes |
private List<String> createTitle(Classifier classifier) { List<String> title = new ArrayList<>(); title.add("Trainer"); title.add("Training"); title.add("Trail"); title.add("Accuracy"); for (String label : (String[]) classifier.getLabelAlphabet().toArray(new String[0])) { title.add(label + "_F1"); title.add(label + "_P"); title.add(label + "_R"); } return title; }
Example #10
Source File: MalletClassifierTrainer.java From baleen with Apache License 2.0 | 5 votes |
private List<String> createRow( InstanceList training, InstanceList testing, String e, Classifier classifier, Trial trial) { List<String> row = new ArrayList<>(); row.add(e); row.add(Integer.toString(training.size())); row.add(Integer.toString(testing.size())); row.add(Double.toString(trial.getAccuracy())); for (String label : (String[]) classifier.getLabelAlphabet().toArray(new String[0])) { row.add(Double.toString(trial.getF1(label))); row.add(Double.toString(trial.getPrecision(label))); row.add(Double.toString(trial.getRecall(label))); } return row; }
Example #11
Source File: ReferencesClassifierTrainer.java From bluima with Apache License 2.0 | 5 votes |
public static Trial testTrainSplit(InstanceList instances) { InstanceList[] instanceLists = instances.split(new Randoms(), new double[] { 0.9, 0.1, 0.0 }); // LOG.debug("{} training instance, {} testing instances", // instanceLists[0].size(), instanceLists[1].size()); @SuppressWarnings("rawtypes") ClassifierTrainer trainer = new MaxEntTrainer(); Classifier classifier = trainer.train(instanceLists[TRAINING]); return new Trial(classifier, instanceLists[TESTING]); }
Example #12
Source File: SpamDetector.java From Machine-Learning-in-Java with MIT License | 4 votes |
public static void main(String[] args){ String stopListFilePath = "data/stoplists/en.txt"; String dataFolderPath = "data/ex6DataEmails/train"; String testFolderPath = "data/ex6DataEmails/test"; ArrayList<Pipe> pipeList = new ArrayList<Pipe>(); pipeList.add(new Input2CharSequence("UTF-8")); Pattern tokenPattern = Pattern.compile("[\\p{L}\\p{N}_]+"); pipeList.add(new CharSequence2TokenSequence(tokenPattern)); pipeList.add(new TokenSequenceLowercase()); pipeList.add(new TokenSequenceRemoveStopwords(new File(stopListFilePath), "utf-8", false, false, false)); pipeList.add(new TokenSequence2FeatureSequence()); pipeList.add(new FeatureSequence2FeatureVector()); pipeList.add(new Target2Label()); SerialPipes pipeline = new SerialPipes(pipeList); FileIterator folderIterator = new FileIterator( new File[] {new File(dataFolderPath)}, new TxtFilter(), FileIterator.LAST_DIRECTORY); InstanceList instances = new InstanceList(pipeline); instances.addThruPipe(folderIterator); ClassifierTrainer classifierTrainer = new NaiveBayesTrainer(); Classifier classifier = classifierTrainer.train(instances); InstanceList testInstances = new InstanceList(classifier.getInstancePipe()); folderIterator = new FileIterator( new File[] {new File(testFolderPath)}, new TxtFilter(), FileIterator.LAST_DIRECTORY); testInstances.addThruPipe(folderIterator); Trial trial = new Trial(classifier, testInstances); System.out.println("Accuracy: " + trial.getAccuracy()); System.out.println("F1 for class 'spam': " + trial.getF1("spam")); System.out.println("Precision for class '" + classifier.getLabelAlphabet().lookupLabel(1) + "': " + trial.getPrecision(1)); System.out.println("Recall for class '" + classifier.getLabelAlphabet().lookupLabel(1) + "': " + trial.getRecall(1)); }
Example #13
Source File: MaxEntClassifierTrainer.java From baleen with Apache License 2.0 | 4 votes |
@Override protected void execute(JobSettings settings) throws AnalysisEngineProcessException { Pipe pipe = new MaxEntClassifierPipe(labelsAndFeatures.keySet(), stopwords); InstanceList instances = new InstanceList(pipe); instances.addThruPipe(getDocumentsFromMongoWithRandonLabelAssignement()); Alphabet targetAlphabet = instances.getTargetAlphabet(); HashMap<Integer, ArrayList<Integer>> featuresAndLabels = mapFeaturesToLabels(instances.getDataAlphabet(), targetAlphabet); int numLabels = targetAlphabet.size(); HashMap<Integer, double[]> constraintsMap = FeatureConstraintUtil.setTargetsUsingHeuristic(featuresAndLabels, numLabels, 0.9); MaxEntKLFLGEConstraints geConstraints = new MaxEntKLFLGEConstraints(instances.getDataAlphabet().size(), numLabels, false); constraintsMap .entrySet() .forEach(e -> geConstraints.addConstraint(e.getKey(), e.getValue(), 1)); ArrayList<MaxEntGEConstraint> constraints = new ArrayList<>(); constraints.add(geConstraints); // Create a classifier trainer, and use it to create a classifier MaxEntGETrainer trainer = new MaxEntGETrainer(constraints); trainer.setMaxIterations(numIterations); trainer.setGaussianPriorVariance(variance); instances.forEach( i -> { i.unLock(); i.setTarget(null); i.lock(); }); Classifier classifier = trainer.train(instances); List<Classification> classify = classifier.classify(instances); writeClassificationToMongo(classify); new ObjectFile(classifier, modelFile).write(); }
Example #14
Source File: ReferencesClassifierTrainer.java From bluima with Apache License 2.0 | 4 votes |
public static void main(String[] args) { // pipe instances InstanceList instanceList = new InstanceList( new SerialPipes(getPipes())); FileIterator iterator = new FileIterator(new File[] { CORPUS }, new TxtFilter(), LAST_DIRECTORY); instanceList.addThruPipe(iterator); // //////////////////////////////////////////////////////////////// // cross-validate System.out.println("trial\tprec\trecall\tF-score"); double f1s = 0; for (int i = 0; i < trials; i++) { Trial trial = testTrainSplit(instanceList); System.out.println(join(new Object[] {// i, trial.getPrecision(TESTING), trial.getRecall(TESTING), trial.getF1(TESTING) }, "\t")); f1s += trial.getF1(TESTING); } System.out.println("mean F1 = " + (f1s / (trials + 0d))); // //////////////////////////////////////////////////////////////// // train ClassifierTrainer trainer = new MaxEntTrainer(); Classifier c = trainer.train(instanceList); String txt = "in the entorhinal cortex of the rat\n" + "II: phase relations between unit discharges and theta field potentials.\n" + "J. Comp. Neurol. 67, 502–509.\n" + "Alonso, A., and Klink, R. (1993).\n" + "Differential electroresponsiveness of\n" + "stellate and pyramidal-like cells of\n" + "medial entorhinal cortex layer II.\n" + "J. Neurophysiol. 70, 128–143.\n" + "Alonso, A., and Köhler, C. (1984).\n" + "A study of the reciprocal connections between the septum and the\n" + "entorhinal area using anterograde\n" + "and retrograde axonal transport\n" + "methods in the rat brain. J. Comp.\n" + "Neurol. 225, 327–343.\n" + "Alonso, A., and Llinás, R. (1989).\n" + "Subthreshold sodium-dependent\n" + "theta-like rhythmicity in stellate\n" + "cells of entorhinal cortex layer II.\n" + "Nature 342, 175–177.\n" + "Amaral, D. G., and Kurz, J. (1985).\n" + "An analysis of the origins of\n" + ""; Classification classification = c.classify(c.getInstancePipe() .instanceFrom(new Instance(txt, null, null, null))); System.out.println("LABELL " + classification.getLabeling()); c.print(); try { ObjectOutputStream oos = new ObjectOutputStream( new FileOutputStream("target/classifier_" + currentTimeMillis() + ".model")); oos.writeObject(c); oos.close(); } catch (Exception e) { e.fillInStackTrace(); } // ////////////////////////////////////////////////////////////////// // train test for (String goldLabel : new String[] { "I", "O" }) { ClassifierTrainer trainer2 = new MaxEntTrainer(); Classifier c2 = trainer2.train(instanceList); FileIterator iteratorI = new FileIterator(new File[] { new File( CORPUS, "../annots1/" + goldLabel + "/") }, new TxtFilter(), LAST_DIRECTORY); Iterator<Instance> instancesI = c2.getInstancePipe() .newIteratorFrom(iteratorI); Histogram<String> h = new Histogram<String>(); while (instancesI.hasNext()) { Instance inst = instancesI.next(); Labeling labeling = c2.classify(inst).getLabeling(); Label bestLabel = labeling.getBestLabel(); h.add(bestLabel.toString()); // if (!bestLabel.toString().equals(goldLabel)) { // LOG.debug( // "\n\n\nMISSCLASSIFIED as {} but gold:{} :: " // + inst.getSource(), bestLabel, goldLabel); // } } System.out.println("\nlabel " + goldLabel + "\n" + h); } }