Java Code Examples for cc.mallet.types.InstanceList#size()

The following examples show how to use cc.mallet.types.InstanceList#size() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CorpusRepresentationLibSVM.java    From gateplugin-LearningFramework with GNU Lesser General Public License v2.1 5 votes vote down vote up
/**
 * Create libsvm representation from Mallet.
 *
 * @param crm mallet representation
 * @return libsvm representation
 */
public static svm_problem getFromMallet(CorpusRepresentationMallet crm) {
  InstanceList instances = crm.getRepresentationMallet();
  svm_problem prob = new svm_problem();
  int numTrainingInstances = instances.size();
  prob.l = numTrainingInstances;
  prob.y = new double[prob.l];
  prob.x = new svm_node[prob.l][];

  for (int i = 0; i < numTrainingInstances; i++) {
    Instance instance = instances.get(i);

    //Labels
    // convert the target: if we get a label, convert to index,
    // if we get a double, use it directly
    Object tobj = instance.getTarget();
    if (tobj instanceof Label) {
      prob.y[i] = ((Label) instance.getTarget()).getIndex();
    } else if (tobj instanceof Double) {
      prob.y[i] = (double) tobj;
    } else {
      throw new GateRuntimeException("Odd target in mallet instance, cannot convert to LIBSVM: " + tobj);
    }

    //Features
    SparseVector data = (SparseVector) instance.getData();
    int[] indices = data.getIndices();
    double[] values = data.getValues();
    prob.x[i] = new svm_node[indices.length];
    for (int j = 0; j < indices.length; j++) {
      svm_node node = new svm_node();
      node.index = indices[j]+1; // NOTE: LibSVM location indices have to start with 1
      node.value = values[j];
      prob.x[i][j] = node;
    }
  }
  return prob;
}
 
Example 2
Source File: MultiSegmentationEvaluator.java    From bluima with Apache License 2.0 4 votes vote down vote up
public void evaluateInstanceList(TransducerTrainer tt, InstanceList data,
        String description) {
    Transducer model = tt.getTransducer();
    int numCorrectTokens, totalTokens;
    int[] numTrueSegments, numPredictedSegments, numCorrectSegments;
    int allIndex = segmentStartTags.length;
    numTrueSegments = new int[allIndex + 1];
    numPredictedSegments = new int[allIndex + 1];
    numCorrectSegments = new int[allIndex + 1];

    totalTokens = numCorrectTokens = 0;
    for (int n = 0; n < numTrueSegments.length; n++)
        numTrueSegments[n] = numPredictedSegments[n] = numCorrectSegments[n] = 0;
    for (int i = 0; i < data.size(); i++) {
        Instance instance = data.get(i);
        Sequence input = (Sequence) instance.getData();
        // String tokens = null;
        // if (instance.getSource() != null)
        // tokens = (String) instance.getSource().toString();
        Sequence trueOutput = (Sequence) instance.getTarget();
        assert (input.size() == trueOutput.size());
        Sequence predOutput = model.transduce(input);
        assert (predOutput.size() == trueOutput.size());
        int trueStart, predStart; // -1 for non-start, otherwise index into
                                  // segmentStartTag
        for (int j = 0; j < trueOutput.size(); j++) {
            totalTokens++;
            if (trueOutput.get(j).equals(predOutput.get(j)))
                numCorrectTokens++;
            trueStart = predStart = -1;
            // Count true segment starts
            for (int n = 0; n < segmentStartTags.length; n++) {
                if (segmentStartTags[n].equals(trueOutput.get(j))) {
                    numTrueSegments[n]++;
                    numTrueSegments[allIndex]++;
                    trueStart = n;
                    break;
                }
            }
            // Count predicted segment starts
            for (int n = 0; n < segmentStartTags.length; n++) {
                if (segmentStartTags[n].equals(predOutput.get(j))) {
                    numPredictedSegments[n]++;
                    numPredictedSegments[allIndex]++;
                    predStart = n;
                }
            }
            if (trueStart != -1 && trueStart == predStart) {
                // Truth and Prediction both agree that the same segment
                // tag-type is starting now
                int m;
                boolean trueContinue = false;
                boolean predContinue = false;
                for (m = j + 1; m < trueOutput.size(); m++) {
                    trueContinue = segmentContinueTags[predStart]
                            .equals(trueOutput.get(m));
                    predContinue = segmentContinueTags[predStart]
                            .equals(predOutput.get(m));
                    if (!trueContinue || !predContinue) {
                        if (trueContinue == predContinue) {
                            // They agree about a segment is ending somehow
                            numCorrectSegments[predStart]++;
                            numCorrectSegments[allIndex]++;
                        }
                        break;
                    }
                }
                // for the case of the end of the sequence
                if (m == trueOutput.size()) {
                    if (trueContinue == predContinue) {
                        numCorrectSegments[predStart]++;
                        numCorrectSegments[allIndex]++;
                    }
                }
            }
        }
    }
    DecimalFormat f = new DecimalFormat("0.####");
    System.err.println(description + " tokenaccuracy="
            + f.format(((double) numCorrectTokens) / totalTokens));
    for (int n = 0; n < numCorrectSegments.length; n++) {
        System.err.println((n < allIndex ? segmentStartTags[n].toString()
                : "OVERALL") + ' ');
        double precision = numPredictedSegments[n] == 0 ? 1
                : ((double) numCorrectSegments[n])
                        / numPredictedSegments[n];
        double recall = numTrueSegments[n] == 0 ? 1
                : ((double) numCorrectSegments[n]) / numTrueSegments[n];
        double f1 = recall + precision == 0.0 ? 0.0
                : (2.0 * recall * precision) / (recall + precision);
        System.err.println(" " + description + " segments true="
                + numTrueSegments[n] + " pred=" + numPredictedSegments[n]
                + " correct=" + numCorrectSegments[n] + " misses="
                + (numTrueSegments[n] - numCorrectSegments[n]) + " alarms="
                + (numPredictedSegments[n] - numCorrectSegments[n]));
        System.err.println(" " + description + " precision="
                + f.format(precision) + " recall=" + f.format(recall)
                + " f1=" + f.format(f1));
    }

}