Java Code Examples for cc.mallet.types.Instance#getTarget()

The following examples show how to use cc.mallet.types.Instance#getTarget() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: EngineMBPythonNetworksBase.java    From gateplugin-LearningFramework with GNU Lesser General Public License v2.1 5 votes vote down vote up
protected AbstractMap.SimpleEntry<String,Integer> findOutMode(CorpusRepresentationMalletTarget crm)  {
  InstanceList instances = crm.getRepresentationMallet();
  // we pass on a "mode" for the learning problem, which is one of the following:
  // - classind: predict the index of a class
  // - classcosts: targets are vectors of class costs
  // - regr: regression
  // we also pass on another parameter which provides details of the learning problem:
  // - the number of class indices in case of classind and classcosts
  // - 0 as a dummy value in case of "regr"
  
  int nrClasses = 0;
  String mode = "regr";
  Alphabet ta = crm.getPipe().getTargetAlphabet();
  
  if(ta != null) {
    // if this is invoked for training, we should have a first instance, but for 
    // application, we do not have any instances yet. If we do not have any instances, we 
    // just use dummy values for now since at the moment we do not need this information
    // at application time. Should we ever need it we need to store this in the pipe!
    if(instances==null || instances.isEmpty()) {
      mode="classind";
      nrClasses=-1;
    } else {
      Instance firstInstance = instances.get(0);
      Object targetObj = firstInstance.getTarget();
      if(targetObj instanceof NominalTargetWithCosts) {
        NominalTargetWithCosts target = (NominalTargetWithCosts)targetObj;
        nrClasses = target.getCosts().length;
        mode = "classcosts";
      } else {
        mode = "classind";
        nrClasses = ta.size();
      }
    }
  } 
  AbstractMap.SimpleEntry<String,Integer> ret = new AbstractMap.SimpleEntry<>(mode,nrClasses);
  return ret;
}
 
Example 2
Source File: CorpusRepresentationLibSVM.java    From gateplugin-LearningFramework with GNU Lesser General Public License v2.1 5 votes vote down vote up
/**
 * Create libsvm representation from Mallet.
 *
 * @param crm mallet representation
 * @return libsvm representation
 */
public static svm_problem getFromMallet(CorpusRepresentationMallet crm) {
  InstanceList instances = crm.getRepresentationMallet();
  svm_problem prob = new svm_problem();
  int numTrainingInstances = instances.size();
  prob.l = numTrainingInstances;
  prob.y = new double[prob.l];
  prob.x = new svm_node[prob.l][];

  for (int i = 0; i < numTrainingInstances; i++) {
    Instance instance = instances.get(i);

    //Labels
    // convert the target: if we get a label, convert to index,
    // if we get a double, use it directly
    Object tobj = instance.getTarget();
    if (tobj instanceof Label) {
      prob.y[i] = ((Label) instance.getTarget()).getIndex();
    } else if (tobj instanceof Double) {
      prob.y[i] = (double) tobj;
    } else {
      throw new GateRuntimeException("Odd target in mallet instance, cannot convert to LIBSVM: " + tobj);
    }

    //Features
    SparseVector data = (SparseVector) instance.getData();
    int[] indices = data.getIndices();
    double[] values = data.getValues();
    prob.x[i] = new svm_node[indices.length];
    for (int j = 0; j < indices.length; j++) {
      svm_node node = new svm_node();
      node.index = indices[j]+1; // NOTE: LibSVM location indices have to start with 1
      node.value = values[j];
      prob.x[i][j] = node;
    }
  }
  return prob;
}
 
Example 3
Source File: CorpusExporterMRJsonTarget.java    From gateplugin-LearningFramework with GNU Lesser General Public License v2.1 5 votes vote down vote up
/**
 * Convert instance to string. 
 * 
 * @param inst instance
 * @param targetAlphabet target alphabet
 * @param attrs attributes
 * @param nrFeatures number of features
 * @param asString represent as quoted string
 * @param filterMV filter missing values
 * @return string representation
 */
public String instance2String(
        Instance inst,
        LabelAlphabet targetAlphabet,
        Attributes attrs,
        int nrFeatures,
        boolean asString,
        boolean filterMV) {
  StringBuilder sb = new StringBuilder();
  sb.append("[");  // outermost list 
  FeatureVector fv = (FeatureVector)inst.getData();
  Object targetObject = inst.getTarget();
  if (filterMV) {
    Object ignore = inst.getProperty(FeatureExtractionMalletSparse.PROP_IGNORE_HAS_MV);
    if (ignore != null && ignore.equals(true)) {
        return null;
    }
  }
  sb.append(featureVector2String(fv, nrFeatures, attrs, asString));
  // for now, we always try to output the target, even if it is null, this may change 
  // in the future
  if (targetObject!=null) {
    sb.append(", ");
    sb.append(target2String(targetObject, targetAlphabet, asString));
  }
  sb.append("]");  // close outer list
  return sb.toString();
}
 
Example 4
Source File: CorpusExporterMRARFF.java    From gateplugin-LearningFramework with GNU Lesser General Public License v2.1 4 votes vote down vote up
private String instance2WekaArffLine(Instance inst, Attributes attrs, boolean filterMVs) {
  StringBuilder sb = new StringBuilder();
  
  if(filterMVs) {
    Object ignore = inst.getProperty(FeatureExtractionMalletSparse.PROP_IGNORE_HAS_MV);    
    // If the flag says the instance should get ignored, return null
    // to indicate to the caller that this is an ignored instance.
    if(ignore != null && ignore.equals(true)) {
      return null;
    }
  }
  Double instanceWeight = (Double)inst.getProperty("instanceWeight");
  Object data = inst.getData();
  if(data instanceof FeatureVector) {
    FeatureVector vector = (FeatureVector)data;
    sb.append("{");
    boolean first = true;
    // TODO: maybe it is easier to do 
    // for(int idx : vector.getIndices) 
    for(int i=0; i<vector.numLocations(); i++) {   
      int idx = vector.indexAtLocation(i);
      if(first) { 
        first = false;
      } else {
        sb.append(", ");
      } 
      sb.append(idx);
      sb.append(" ");
      double value = vector.valueAtLocation(i);
      if(Double.isNaN(value)) {
        sb.append("?");
      } else {
        // TODO: proper handling of missing values!!!
        // Also: codeas may be null sometimes, make sure if we have a datatype
        // where codeas is relevant, we ALWAYS have codeas set to the correct value!
        Attribute attr = attrs.getAttribute(idx);
        if(attr.datatype==Datatype.numeric || (attr.datatype==Datatype.nominal && attr.codeAs!=CodeAs.number)) {
          sb.append(value);
        } else if(attr.datatype==Datatype.bool) {
          // TODO: check for missing value, also use the special alphabet we created?
          if(value<0.5) { sb.append("false"); } else { sb.append("true"); }
        } else if(attr.datatype==Datatype.nominal) {
          // TODO: check for how to exactly handling missing values, for now we simply output
          // the Weka missing value placeholder
          if(((int)value)==-1) {
            sb.append("?");
          } else {
            sb.append(escape4Arff((String)attr.alphabet.lookupObject((int) value)));
          }
        } else {
          // guard for forgetting about here when we add datatypes later
          sb.append("GOTCHA!!!! DATATYPE NOT SUPPORTED IN THE EXPORT CODE");
        }                  
      }
    } // for 
    // Now also add location and value for the target, if we have one
    Object target = inst.getTarget();
    if(target!=null) {
      Attribute targetAttr = attrs.getTargetAttribute();
      sb.append(", ");        
      sb.append(targetAttr.index);
      sb.append(" ");
      // we expect this to be either a Label instance or something that can be cast to double
      if(target instanceof Label) {
        if(targetAttr.datatype != Datatype.nominal) {
          throw new RuntimeException("Target is a label but datatype for attribute is not nominal");
        }
        Label malletLabel = (Label)target;
        String targetString = malletLabel.toString();
        sb.append(escape4Arff(targetString));
        // TODO: could check here if the label index is the same as expected from
        // the attribute defintion!
      } else {
        if(targetAttr.datatype != Datatype.numeric) {
          throw new RuntimeException("Target is a number but datatype for attribute is not  numeric");
        }
        double targetValue = (double)target;
        sb.append(targetValue);
      }
    } else {
      // target is null: do nothing, simply create the row without a target 
      // TODO: not sure what I was thinking here, but admittedly, exporting without
      // a target or a missing target could have its uses, so we leave this as it is
    }
    sb.append("}");
    if(instanceWeight!=null) {
      sb.append(", {");
      sb.append(instanceWeight);
      sb.append("}");
    }
  } else {
    throw new RuntimeException("Cannot export, instance is not a feature vector but "+data.getClass());
  }
  return sb.toString();
}
 
Example 5
Source File: MultiSegmentationEvaluator.java    From bluima with Apache License 2.0 4 votes vote down vote up
public void evaluateInstanceList(TransducerTrainer tt, InstanceList data,
        String description) {
    Transducer model = tt.getTransducer();
    int numCorrectTokens, totalTokens;
    int[] numTrueSegments, numPredictedSegments, numCorrectSegments;
    int allIndex = segmentStartTags.length;
    numTrueSegments = new int[allIndex + 1];
    numPredictedSegments = new int[allIndex + 1];
    numCorrectSegments = new int[allIndex + 1];

    totalTokens = numCorrectTokens = 0;
    for (int n = 0; n < numTrueSegments.length; n++)
        numTrueSegments[n] = numPredictedSegments[n] = numCorrectSegments[n] = 0;
    for (int i = 0; i < data.size(); i++) {
        Instance instance = data.get(i);
        Sequence input = (Sequence) instance.getData();
        // String tokens = null;
        // if (instance.getSource() != null)
        // tokens = (String) instance.getSource().toString();
        Sequence trueOutput = (Sequence) instance.getTarget();
        assert (input.size() == trueOutput.size());
        Sequence predOutput = model.transduce(input);
        assert (predOutput.size() == trueOutput.size());
        int trueStart, predStart; // -1 for non-start, otherwise index into
                                  // segmentStartTag
        for (int j = 0; j < trueOutput.size(); j++) {
            totalTokens++;
            if (trueOutput.get(j).equals(predOutput.get(j)))
                numCorrectTokens++;
            trueStart = predStart = -1;
            // Count true segment starts
            for (int n = 0; n < segmentStartTags.length; n++) {
                if (segmentStartTags[n].equals(trueOutput.get(j))) {
                    numTrueSegments[n]++;
                    numTrueSegments[allIndex]++;
                    trueStart = n;
                    break;
                }
            }
            // Count predicted segment starts
            for (int n = 0; n < segmentStartTags.length; n++) {
                if (segmentStartTags[n].equals(predOutput.get(j))) {
                    numPredictedSegments[n]++;
                    numPredictedSegments[allIndex]++;
                    predStart = n;
                }
            }
            if (trueStart != -1 && trueStart == predStart) {
                // Truth and Prediction both agree that the same segment
                // tag-type is starting now
                int m;
                boolean trueContinue = false;
                boolean predContinue = false;
                for (m = j + 1; m < trueOutput.size(); m++) {
                    trueContinue = segmentContinueTags[predStart]
                            .equals(trueOutput.get(m));
                    predContinue = segmentContinueTags[predStart]
                            .equals(predOutput.get(m));
                    if (!trueContinue || !predContinue) {
                        if (trueContinue == predContinue) {
                            // They agree about a segment is ending somehow
                            numCorrectSegments[predStart]++;
                            numCorrectSegments[allIndex]++;
                        }
                        break;
                    }
                }
                // for the case of the end of the sequence
                if (m == trueOutput.size()) {
                    if (trueContinue == predContinue) {
                        numCorrectSegments[predStart]++;
                        numCorrectSegments[allIndex]++;
                    }
                }
            }
        }
    }
    DecimalFormat f = new DecimalFormat("0.####");
    System.err.println(description + " tokenaccuracy="
            + f.format(((double) numCorrectTokens) / totalTokens));
    for (int n = 0; n < numCorrectSegments.length; n++) {
        System.err.println((n < allIndex ? segmentStartTags[n].toString()
                : "OVERALL") + ' ');
        double precision = numPredictedSegments[n] == 0 ? 1
                : ((double) numCorrectSegments[n])
                        / numPredictedSegments[n];
        double recall = numTrueSegments[n] == 0 ? 1
                : ((double) numCorrectSegments[n]) / numTrueSegments[n];
        double f1 = recall + precision == 0.0 ? 0.0
                : (2.0 * recall * precision) / (recall + precision);
        System.err.println(" " + description + " segments true="
                + numTrueSegments[n] + " pred=" + numPredictedSegments[n]
                + " correct=" + numCorrectSegments[n] + " misses="
                + (numTrueSegments[n] - numCorrectSegments[n]) + " alarms="
                + (numPredictedSegments[n] - numCorrectSegments[n]));
        System.err.println(" " + description + " precision="
                + f.format(precision) + " recall=" + f.format(recall)
                + " f1=" + f.format(f1));
    }

}