Java Code Examples for cc.mallet.types.InstanceList#size()
The following examples show how to use
cc.mallet.types.InstanceList#size() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CorpusRepresentationLibSVM.java From gateplugin-LearningFramework with GNU Lesser General Public License v2.1 | 5 votes |
/** * Create libsvm representation from Mallet. * * @param crm mallet representation * @return libsvm representation */ public static svm_problem getFromMallet(CorpusRepresentationMallet crm) { InstanceList instances = crm.getRepresentationMallet(); svm_problem prob = new svm_problem(); int numTrainingInstances = instances.size(); prob.l = numTrainingInstances; prob.y = new double[prob.l]; prob.x = new svm_node[prob.l][]; for (int i = 0; i < numTrainingInstances; i++) { Instance instance = instances.get(i); //Labels // convert the target: if we get a label, convert to index, // if we get a double, use it directly Object tobj = instance.getTarget(); if (tobj instanceof Label) { prob.y[i] = ((Label) instance.getTarget()).getIndex(); } else if (tobj instanceof Double) { prob.y[i] = (double) tobj; } else { throw new GateRuntimeException("Odd target in mallet instance, cannot convert to LIBSVM: " + tobj); } //Features SparseVector data = (SparseVector) instance.getData(); int[] indices = data.getIndices(); double[] values = data.getValues(); prob.x[i] = new svm_node[indices.length]; for (int j = 0; j < indices.length; j++) { svm_node node = new svm_node(); node.index = indices[j]+1; // NOTE: LibSVM location indices have to start with 1 node.value = values[j]; prob.x[i][j] = node; } } return prob; }
Example 2
Source File: MultiSegmentationEvaluator.java From bluima with Apache License 2.0 | 4 votes |
public void evaluateInstanceList(TransducerTrainer tt, InstanceList data, String description) { Transducer model = tt.getTransducer(); int numCorrectTokens, totalTokens; int[] numTrueSegments, numPredictedSegments, numCorrectSegments; int allIndex = segmentStartTags.length; numTrueSegments = new int[allIndex + 1]; numPredictedSegments = new int[allIndex + 1]; numCorrectSegments = new int[allIndex + 1]; totalTokens = numCorrectTokens = 0; for (int n = 0; n < numTrueSegments.length; n++) numTrueSegments[n] = numPredictedSegments[n] = numCorrectSegments[n] = 0; for (int i = 0; i < data.size(); i++) { Instance instance = data.get(i); Sequence input = (Sequence) instance.getData(); // String tokens = null; // if (instance.getSource() != null) // tokens = (String) instance.getSource().toString(); Sequence trueOutput = (Sequence) instance.getTarget(); assert (input.size() == trueOutput.size()); Sequence predOutput = model.transduce(input); assert (predOutput.size() == trueOutput.size()); int trueStart, predStart; // -1 for non-start, otherwise index into // segmentStartTag for (int j = 0; j < trueOutput.size(); j++) { totalTokens++; if (trueOutput.get(j).equals(predOutput.get(j))) numCorrectTokens++; trueStart = predStart = -1; // Count true segment starts for (int n = 0; n < segmentStartTags.length; n++) { if (segmentStartTags[n].equals(trueOutput.get(j))) { numTrueSegments[n]++; numTrueSegments[allIndex]++; trueStart = n; break; } } // Count predicted segment starts for (int n = 0; n < segmentStartTags.length; n++) { if (segmentStartTags[n].equals(predOutput.get(j))) { numPredictedSegments[n]++; numPredictedSegments[allIndex]++; predStart = n; } } if (trueStart != -1 && trueStart == predStart) { // Truth and Prediction both agree that the same segment // tag-type is starting now int m; boolean trueContinue = false; boolean predContinue = false; for (m = j + 1; m < trueOutput.size(); m++) { trueContinue = segmentContinueTags[predStart] .equals(trueOutput.get(m)); predContinue = segmentContinueTags[predStart] .equals(predOutput.get(m)); if (!trueContinue || !predContinue) { if (trueContinue == predContinue) { // They agree about a segment is ending somehow numCorrectSegments[predStart]++; numCorrectSegments[allIndex]++; } break; } } // for the case of the end of the sequence if (m == trueOutput.size()) { if (trueContinue == predContinue) { numCorrectSegments[predStart]++; numCorrectSegments[allIndex]++; } } } } } DecimalFormat f = new DecimalFormat("0.####"); System.err.println(description + " tokenaccuracy=" + f.format(((double) numCorrectTokens) / totalTokens)); for (int n = 0; n < numCorrectSegments.length; n++) { System.err.println((n < allIndex ? segmentStartTags[n].toString() : "OVERALL") + ' '); double precision = numPredictedSegments[n] == 0 ? 1 : ((double) numCorrectSegments[n]) / numPredictedSegments[n]; double recall = numTrueSegments[n] == 0 ? 1 : ((double) numCorrectSegments[n]) / numTrueSegments[n]; double f1 = recall + precision == 0.0 ? 0.0 : (2.0 * recall * precision) / (recall + precision); System.err.println(" " + description + " segments true=" + numTrueSegments[n] + " pred=" + numPredictedSegments[n] + " correct=" + numCorrectSegments[n] + " misses=" + (numTrueSegments[n] - numCorrectSegments[n]) + " alarms=" + (numPredictedSegments[n] - numCorrectSegments[n])); System.err.println(" " + description + " precision=" + f.format(precision) + " recall=" + f.format(recall) + " f1=" + f.format(f1)); } }