Java Code Examples for cc.mallet.types.InstanceList#addThruPipe()

The following examples show how to use cc.mallet.types.InstanceList#addThruPipe() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TopicModel.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
protected void doProcess(JCas jCas) throws AnalysisEngineProcessException {

  InstanceList testing = new InstanceList(pipe);
  testing.addThruPipe(new Instance(jCas.getDocumentText(), null, "from jcas", null));

  TopicInferencer inferencer = model.getInferencer();

  double[] topicDistribution =
      inferencer.getSampledDistribution(testing.get(0), iterations, thining, burnIn);

  int topicIndex = new MaximumIndex(topicDistribution).find();

  List<String> inferedTopic = topicWords.forTopic(topicIndex);

  Metadata md = new Metadata(jCas);
  md.setKey(metadataKey);
  md.setValue(inferedTopic.toString());
  addToJCasIndex(md);
}
 
Example 2
Source File: MalletClassifierTrainer.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
protected void execute(JobSettings settings) throws AnalysisEngineProcessException {

  Pipe pipe = new ClassifierPipe(stopwords);
  InstanceList instances = new InstanceList(pipe);
  instances.addThruPipe(getDocumentsFromMongo());

  InstanceList training = null;
  InstanceList testing = null;
  if (forTesting > 0.0) {
    InstanceList[] ilists = instances.split(new double[] {1 - forTesting, forTesting});
    training = ilists[0];
    testing = ilists[1];
  } else {
    training = instances;
  }

  processTrainerDefinitions(training, testing);
}
 
Example 3
Source File: MalletClassifierTrainerTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
private void validateModel() {
  File modelFile = modelPath.toFile();
  assertTrue(modelFile.exists());

  Classifier classifier = new FileObject<Classifier>(modelFile.getPath()).object();
  assertTrue(classifier.getLabelAlphabet().contains("pos"));
  assertTrue(classifier.getLabelAlphabet().contains("neg"));

  Pipe pipe = classifier.getInstancePipe();
  InstanceList instanceList = new InstanceList(pipe);

  instanceList.addThruPipe(
      new Instance("I love this amazing awesome classifier.", "", null, null));
  instanceList.addThruPipe(new Instance("I can't stand this horrible test.", "", null, null));

  ImmutableSet<String> labels = ImmutableSet.of("pos", "neg");
  assertTrue(
      labels.contains(
          classifier.classify(instanceList.get(0)).getLabeling().getBestLabel().toString()));
  assertTrue(
      labels.contains(
          classifier.classify(instanceList.get(1)).getLabeling().getBestLabel().toString()));
}
 
Example 4
Source File: MaxEntClassifierTrainerTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testTaskProducesValidModelFile() throws Exception {

  File modelFile = modelPath.toFile();
  assertTrue(modelFile.exists());

  Classifier classifier = new FileObject<Classifier>(modelFile.getPath()).object();
  assertTrue(classifier.getLabelAlphabet().contains("pos"));
  assertTrue(classifier.getLabelAlphabet().contains("neg"));

  Pipe pipe = classifier.getInstancePipe();
  InstanceList instanceList = new InstanceList(pipe);
  instanceList.addThruPipe(
      new Instance("I love this amazing awesome classifier.", null, null, null));
  instanceList.addThruPipe(new Instance("I can't stand this horrible test.", null, null, null));

  assertEquals(
      "pos", classifier.classify(instanceList.get(0)).getLabeling().getBestLabel().toString());
  assertEquals(
      "neg", classifier.classify(instanceList.get(1)).getLabeling().getBestLabel().toString());
}
 
Example 5
Source File: MalletClassifier.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Override
protected void doProcess(JCas jCas) throws AnalysisEngineProcessException {

  InstanceList instances = new InstanceList(classifierModel.getInstancePipe());
  instances.addThruPipe(new Instance(jCas.getDocumentText(), "", "from jcas", null));

  Classification classify = classifierModel.classify(instances.get(0));

  Metadata md = new Metadata(jCas);
  md.setKey(metadataKey);
  md.setValue(classify.getLabeling().getBestLabel().toString());
  addToJCasIndex(md);
}
 
Example 6
Source File: BrainRegionPipesTest.java    From bluima with Apache License 2.0 5 votes vote down vote up
private void pipe(String txt, List<String>... features) throws Exception {
    // it might not have all the aes, though...
    JCas jCas = getOpenNlpTokenizedTestCas(txt);

    InstanceList il = new InstanceList(//
            new SerialPipes(BrainRegionPipes.getPipes()));

    Instance instance = new Instance(jCas, null, 1, jCas);
    il.addThruPipe(instance);

    Instance pipedInstance = il.iterator().next();
    FeatureVectorSequence data = (FeatureVectorSequence) pipedInstance
            .getData();

    java.util.Iterator<List<String>> featuresIt = asList(features)
            .iterator();
    Iterator it = data.iterator();
    while (it.hasNext()) {
        FeatureVector featureVector = it.next();

        if (featuresIt.hasNext()) {
            for (String expectedFeature : featuresIt.next()) {
                assertTrue("could not find expected feature '"
                        + expectedFeature + "', FeatureVector = \n"
                        + featureVector,
                        featureVector.contains(expectedFeature));

            }
        }
    }
}
 
Example 7
Source File: LDA.java    From topic-detection with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a list of Malelt instances from a list of documents
 * @param texts a list of documents
 * @return a list of Mallet instances
 * @throws IOException
 */
private InstanceList createInstanceList(List<String> texts) throws IOException
{
	ArrayList<Pipe> pipes = new ArrayList<Pipe>();
	pipes.add(new CharSequence2TokenSequence());
	pipes.add(new TokenSequenceLowercase());
	pipes.add(new TokenSequenceRemoveStopwords());
	pipes.add(new TokenSequence2FeatureSequence());
	InstanceList instanceList = new InstanceList(new SerialPipes(pipes));
	instanceList.addThruPipe(new ArrayIterator(texts));
	return instanceList;
}
 
Example 8
Source File: MaxEntClassifierTrainer.java    From baleen with Apache License 2.0 4 votes vote down vote up
@Override
protected void execute(JobSettings settings) throws AnalysisEngineProcessException {

  Pipe pipe = new MaxEntClassifierPipe(labelsAndFeatures.keySet(), stopwords);

  InstanceList instances = new InstanceList(pipe);
  instances.addThruPipe(getDocumentsFromMongoWithRandonLabelAssignement());

  Alphabet targetAlphabet = instances.getTargetAlphabet();
  HashMap<Integer, ArrayList<Integer>> featuresAndLabels =
      mapFeaturesToLabels(instances.getDataAlphabet(), targetAlphabet);

  int numLabels = targetAlphabet.size();
  HashMap<Integer, double[]> constraintsMap =
      FeatureConstraintUtil.setTargetsUsingHeuristic(featuresAndLabels, numLabels, 0.9);

  MaxEntKLFLGEConstraints geConstraints =
      new MaxEntKLFLGEConstraints(instances.getDataAlphabet().size(), numLabels, false);
  constraintsMap
      .entrySet()
      .forEach(e -> geConstraints.addConstraint(e.getKey(), e.getValue(), 1));
  ArrayList<MaxEntGEConstraint> constraints = new ArrayList<>();
  constraints.add(geConstraints);

  // Create a classifier trainer, and use it to create a classifier
  MaxEntGETrainer trainer = new MaxEntGETrainer(constraints);
  trainer.setMaxIterations(numIterations);
  trainer.setGaussianPriorVariance(variance);

  instances.forEach(
      i -> {
        i.unLock();
        i.setTarget(null);
        i.lock();
      });

  Classifier classifier = trainer.train(instances);

  List<Classification> classify = classifier.classify(instances);

  writeClassificationToMongo(classify);
  new ObjectFile(classifier, modelFile).write();
}
 
Example 9
Source File: ReferencesClassifierTrainer.java    From bluima with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {

        // pipe instances
        InstanceList instanceList = new InstanceList(
                new SerialPipes(getPipes()));
        FileIterator iterator = new FileIterator(new File[] { CORPUS },
                new TxtFilter(), LAST_DIRECTORY);
        instanceList.addThruPipe(iterator);

        // ////////////////////////////////////////////////////////////////
        // cross-validate
        System.out.println("trial\tprec\trecall\tF-score");
        double f1s = 0;
        for (int i = 0; i < trials; i++) {
            Trial trial = testTrainSplit(instanceList);
            System.out.println(join(new Object[] {//
                    i, trial.getPrecision(TESTING), trial.getRecall(TESTING),
                            trial.getF1(TESTING) }, "\t"));
            f1s += trial.getF1(TESTING);
        }
        System.out.println("mean F1 = " + (f1s / (trials + 0d)));

        // ////////////////////////////////////////////////////////////////
        // train
        ClassifierTrainer trainer = new MaxEntTrainer();
        Classifier c = trainer.train(instanceList);

        String txt = "in the entorhinal cortex of the rat\n"
                + "II: phase relations between unit discharges and theta field potentials.\n"
                + "J. Comp. Neurol. 67, 502–509.\n"
                + "Alonso, A., and Klink, R. (1993).\n"
                + "Differential electroresponsiveness of\n"
                + "stellate and pyramidal-like cells of\n"
                + "medial entorhinal cortex layer II.\n"
                + "J. Neurophysiol. 70, 128–143.\n"
                + "Alonso, A., and Köhler, C. (1984).\n"
                + "A study of the reciprocal connections between the septum and the\n"
                + "entorhinal area using anterograde\n"
                + "and retrograde axonal transport\n"
                + "methods in the rat brain. J. Comp.\n"
                + "Neurol. 225, 327–343.\n"
                + "Alonso, A., and Llinás, R. (1989).\n"
                + "Subthreshold sodium-dependent\n"
                + "theta-like rhythmicity in stellate\n"
                + "cells of entorhinal cortex layer II.\n"
                + "Nature 342, 175–177.\n"
                + "Amaral, D. G., and Kurz, J. (1985).\n"
                + "An analysis of the origins of\n" + "";
        Classification classification = c.classify(c.getInstancePipe()
                .instanceFrom(new Instance(txt, null, null, null)));
        System.out.println("LABELL " + classification.getLabeling());
        c.print();

        try {
            ObjectOutputStream oos = new ObjectOutputStream(
                    new FileOutputStream("target/classifier_"
                            + currentTimeMillis() + ".model"));
            oos.writeObject(c);
            oos.close();
        } catch (Exception e) {
            e.fillInStackTrace();
        }

        // //////////////////////////////////////////////////////////////////
        // train test
        for (String goldLabel : new String[] { "I", "O" }) {
            ClassifierTrainer trainer2 = new MaxEntTrainer();
            Classifier c2 = trainer2.train(instanceList);

            FileIterator iteratorI = new FileIterator(new File[] { new File(
                    CORPUS, "../annots1/" + goldLabel + "/") },
                    new TxtFilter(), LAST_DIRECTORY);
            Iterator<Instance> instancesI = c2.getInstancePipe()
                    .newIteratorFrom(iteratorI);

            Histogram<String> h = new Histogram<String>();
            while (instancesI.hasNext()) {
                Instance inst = instancesI.next();
                Labeling labeling = c2.classify(inst).getLabeling();
                Label bestLabel = labeling.getBestLabel();
                h.add(bestLabel.toString());

                // if (!bestLabel.toString().equals(goldLabel)) {
                // LOG.debug(
                // "\n\n\nMISSCLASSIFIED as {} but gold:{} :: "
                // + inst.getSource(), bestLabel, goldLabel);
                // }
            }
            System.out.println("\nlabel " + goldLabel + "\n" + h);
        }
    }