weka.core.SparseInstance Java Exaples

Source File: Model.java From AIDR with GNU Affero General Public License v3.0

6 votes

Instance wordsToInstance(WordSet words) {
    Instance item = new SparseInstance(
            attributeSpecification.numAttributes());
    item.setDataset(attributeSpecification);
    // Words
    for (String word : words.getWords()) {
        Attribute attribute = attributeSpecification.attribute(word);
        if (attribute != null) {
            item.setValue(attribute, 1);
        }
    }

    item.replaceMissingValues(missingVal);

    return item;
}

Source File: TweetPreprocessor.java From sentiment-analysis with Apache License 2.0

6 votes

private void setLexiconInstances(){
	ArrayList<Attribute> atts = new ArrayList<Attribute>(6);
       ArrayList<String> classVal = new ArrayList<String>();
       classVal.add("positive");
       classVal.add("negative");
       
       atts.add(new Attribute("verb"));
       atts.add(new Attribute("noun"));
       atts.add(new Attribute("adj"));
       atts.add(new Attribute("adv"));
       atts.add(new Attribute("wordnet"));
       atts.add(new Attribute("polarity"));
       atts.add(new Attribute("sentimentClassAttribute",classVal));
       
       Instances textRaw = new Instances("TextInstances",atts,0);
       double[] vals = lp.getProcessed(tweet, tagger);
       textRaw.add(new SparseInstance(1.0, vals));
	lexicon_instances = new Instances(textRaw);
}

Source File: TweetPreprocessor.java From sentiment-analysis with Apache License 2.0

6 votes

/**Instantiates the complex-based Instances*/
private String getComplexInstances(String processed_text){
	ArrayList<Attribute> atts = new ArrayList<Attribute>(2);
       ArrayList<String> classVal = new ArrayList<String>();
       classVal.add("positive");
       classVal.add("negative");
       atts.add(new Attribute("sentimentClassAttribute",classVal));
       atts.add(new Attribute("text",(ArrayList<String>)null));
       Instances textRaw = new Instances("TextInstances",atts,0);
       double[] instanceValue1 = new double[textRaw.numAttributes()];
       String tmp_cmplx = cp.getProcessed(processed_text, tagger);
       instanceValue1[1] = textRaw.attribute(1).addStringValue(tmp_cmplx);
       textRaw.add(new SparseInstance(1.0, instanceValue1));
	complex_instances = new Instances(textRaw);
	return tmp_cmplx;
}

Source File: TweetPreprocessor.java From sentiment-analysis with Apache License 2.0

6 votes

/**Instantiates the text-based Instances*/
private String getTextInstances(){
	ArrayList<Attribute> atts = new ArrayList<Attribute>(2);
       ArrayList<String> classVal = new ArrayList<String>();
       classVal.add("positive");
       classVal.add("negative");
       atts.add(new Attribute("sentimentClassAttribute",classVal));
       atts.add(new Attribute("text",(ArrayList<String>)null));
       Instances textRaw = new Instances("TextInstances",atts,0);
       double[] instanceValue1 = new double[textRaw.numAttributes()];
       String tmp_txt = tp.getProcessed(tweet);
       instanceValue1[1] = textRaw.attribute(1).addStringValue(tmp_txt);
       textRaw.add(new SparseInstance(1.0, instanceValue1));
	text_instances = new Instances(textRaw);
       return tmp_txt;
}

Source File: CLOPE.java From tsml with GNU General Public License v3.0

6 votes

/**
      * Add instance to cluster
      */
     public void AddInstance(Instance inst) {
if (inst instanceof SparseInstance) {
  //  System.out.println("AddSparceInstance");
  for (int i = 0; i < inst.numValues(); i++) {
    AddItem(inst.index(i));
    //  for(int i=0;i<inst.numAttributes();int++){
    // AddItem(inst.index(i)+inst.value(i));
  }
} else {
  for (int i = 0; i < inst.numAttributes(); i++) {

    if (!inst.isMissing(i)) {

      AddItem(i + inst.toString(i));
    }
  }
}
this.W = this.occ.size();
this.N++;
     }

Source File: CLOPE.java From tsml with GNU General Public License v3.0

6 votes

/**
      * Delete instance from cluster
      */
     public void DeleteInstance(Instance inst) {
if (inst instanceof SparseInstance) {
  //   System.out.println("DeleteSparceInstance");
  for (int i = 0; i < inst.numValues(); i++) {
    DeleteItem(inst.index(i));
  }
} else {
  for (int i = 0; i <= inst.numAttributes() - 1; i++) {

    if (!inst.isMissing(i)) {
      DeleteItem(i + inst.toString(i));
    }
  }
}
this.W = this.occ.size();
this.N--;
     }

Source File: FPGrowth.java From tsml with GNU General Public License v3.0

6 votes

private void processSingleton(Instance current, 
    ArrayList<BinaryItem> singletons) throws Exception {
  
  if (current instanceof SparseInstance) {
    for (int j = 0; j < current.numValues(); j++) {
      int attIndex = current.index(j);
      singletons.get(attIndex).increaseFrequency();
    }
  } else {
    for (int j = 0; j < current.numAttributes(); j++) {
      if (!current.isMissing(j)) {
        if (current.attribute(j).numValues() == 1 
            || current.value(j) == m_positiveIndex - 1) {
          singletons.get(j).increaseFrequency();
        }
      }
    }
  }
}

Source File: SentimentAnalyser.java From sentiment-analysis with Apache License 2.0

6 votes

/**Decides upon a "disagreed" document by applying the learned model based on the last 1,000 "agreed" documents.*/
private String clarifyOnSlidingWindow(String tweet){
	String out = "";
       double[] instanceValues = new double[train.numAttributes()];
       instanceValues[0] = train.attribute(0).addStringValue(tweet);
	train.add(new SparseInstance(1.0, instanceValues));
	try {
		stwv.setInputFormat(train);
		Instances newData = Filter.useFilter(train, stwv);
		Instances train_ins = new Instances(newData, 0, train.size()-1);
		Instances test_ins = new Instances(newData, train.size()-1, 1);
		Classifier mnb = (Classifier)new NaiveBayesMultinomial();
		mnb.buildClassifier(train_ins);
		double[] preds = mnb.distributionForInstance(test_ins.get(0));
		if (preds[0]>0.5)
			out = "positive";
		else
			out = "negative";
	} catch (Exception e) {
		e.printStackTrace();
	}
	train.remove(train.numInstances()-1);
	return out;
}

Source File: PartitionMembership.java From tsml with GNU General Public License v3.0

6 votes

/**
 * Convert a single instance over. The converted instance is added to 
 * the end of the output queue.
 *
 * @param instance the instance to convert
 * @throws Exception if something goes wrong
 */
protected void convertInstance(Instance instance) throws Exception {
  
  // Make copy and set weight to one
  Instance cp = (Instance)instance.copy();
  cp.setWeight(1.0);
  
  // Set up values
  double [] instanceVals = new double[outputFormatPeek().numAttributes()];
  double [] vals = m_partitionGenerator.getMembershipValues(cp);
  System.arraycopy(vals, 0, instanceVals, 0, vals.length);
  if (instance.classIndex() >= 0) {
    instanceVals[instanceVals.length - 1] = instance.classValue();
  }
  
  push(new SparseInstance(instance.weight(), instanceVals));
}

Source File: WekaHierarchicalClustering2.java From Java-Data-Analysis with MIT License

5 votes

private static Instances load(double[][] data) {
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("X"));
    attributes.add(new Attribute("Y"));
    Instances dataset = new Instances("Dataset", attributes, M);
    for (double[] datum : data) {
        Instance instance = new SparseInstance(2);
        instance.setValue(0, datum[0]);
        instance.setValue(1, datum[1]);
        dataset.add(instance);
    }
    return dataset;
}

Source File: ClusteringTask.java From mzmine3 with GNU General Public License v2.0

5 votes

/**
 * Creates the weka data set for clustering of variables (metabolites)
 *
 * @param rawData Data extracted from selected Raw data files and rows.
 * @return Weka library data set
 */
private Instances createVariableWekaDataset(double[][] rawData) {
  FastVector attributes = new FastVector();

  for (int i = 0; i < this.selectedRawDataFiles.length; i++) {
    String varName = "Var" + i;
    Attribute var = new Attribute(varName);
    attributes.addElement(var);
  }

  if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
    Attribute name = new Attribute("name", (FastVector) null);
    attributes.addElement(name);
  }
  Instances data = new Instances("Dataset", attributes, 0);

  for (int i = 0; i < selectedRows.length; i++) {
    double[] values = new double[data.numAttributes()];
    System.arraycopy(rawData[i], 0, values, 0, rawData[0].length);

    if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
      DecimalFormat twoDForm = new DecimalFormat("#.##");
      double MZ = Double.valueOf(twoDForm.format(selectedRows[i].getAverageMZ()));
      double RT = Double.valueOf(twoDForm.format(selectedRows[i].getAverageRT()));
      String rowName = "MZ->" + MZ + "/RT->" + RT;
      values[data.numAttributes() - 1] = data.attribute("name").addStringValue(rowName);
    }
    Instance inst = new SparseInstance(1.0, values);
    data.add(inst);
  }
  return data;
}

Source File: ClusteringTask.java From mzmine2 with GNU General Public License v2.0

5 votes

/**
 * Creates the weka data set for clustering of samples
 *
 * @param rawData Data extracted from selected Raw data files and rows.
 * @return Weka library data set
 */
private Instances createSampleWekaDataset(double[][] rawData) {
  FastVector attributes = new FastVector();

  for (int i = 0; i < rawData[0].length; i++) {
    String varName = "Var" + i;
    Attribute var = new Attribute(varName);
    attributes.addElement(var);
  }

  if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
    Attribute name = new Attribute("name", (FastVector) null);
    attributes.addElement(name);
  }
  Instances data = new Instances("Dataset", attributes, 0);

  for (int i = 0; i < rawData.length; i++) {
    double[] values = new double[data.numAttributes()];
    System.arraycopy(rawData[i], 0, values, 0, rawData[0].length);
    if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
      values[data.numAttributes() - 1] =
          data.attribute("name").addStringValue(this.selectedRawDataFiles[i].getName());
    }
    Instance inst = new SparseInstance(1.0, values);
    data.add(inst);
  }
  return data;
}

Source File: ClusteringTask.java From mzmine2 with GNU General Public License v2.0

5 votes

/**
 * Creates the weka data set for clustering of variables (metabolites)
 *
 * @param rawData Data extracted from selected Raw data files and rows.
 * @return Weka library data set
 */
private Instances createVariableWekaDataset(double[][] rawData) {
  FastVector attributes = new FastVector();

  for (int i = 0; i < this.selectedRawDataFiles.length; i++) {
    String varName = "Var" + i;
    Attribute var = new Attribute(varName);
    attributes.addElement(var);
  }

  if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
    Attribute name = new Attribute("name", (FastVector) null);
    attributes.addElement(name);
  }
  Instances data = new Instances("Dataset", attributes, 0);

  for (int i = 0; i < selectedRows.length; i++) {
    double[] values = new double[data.numAttributes()];
    System.arraycopy(rawData[i], 0, values, 0, rawData[0].length);

    if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
      DecimalFormat twoDForm = new DecimalFormat("#.##");
      double MZ = Double.valueOf(twoDForm.format(selectedRows[i].getAverageMZ()));
      double RT = Double.valueOf(twoDForm.format(selectedRows[i].getAverageRT()));
      String rowName = "MZ->" + MZ + "/RT->" + RT;
      values[data.numAttributes() - 1] = data.attribute("name").addStringValue(rowName);
    }
    Instance inst = new SparseInstance(1.0, values);
    data.add(inst);
  }
  return data;
}

Source File: NutchOnlineClassifier.java From anthelion with Apache License 2.0

5 votes

/**
 * Converts an {@link AnthURL} into an {@link Instance} which can be handled
 * by the {@link Classifier}.
 * 
 * @param url
 *            the {@link AnthURL} which should be transformed/converted.
 * @return the resulting {@link Instance}.
 */
private static Instance convert(AnthURL url) {
	if (url != null) {

		Instance inst = new SparseInstance(dimension);
		inst.replaceMissingValues(replaceMissingValues);

		inst.setDataset(instances);
		inst.setValue(attributesIndex.get("class"), (url.sem ? "sem" : "nonsem"));
		inst.setValue(attributesIndex.get("sempar"), (url.semFather ? 1 : 0));
		inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
		inst.setValue(attributesIndex.get("semsib"), (url.semSibling ? 1 : 0));
		inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
		inst.setValue(attributesIndex.get("domain"), url.uri.getHost());
		Set<String> tokens = new HashSet<String>();

		tokens.addAll(tokenizer(url.uri.getPath()));
		tokens.addAll(tokenizer(url.uri.getQuery()));
		tokens.addAll(tokenizer(url.uri.getFragment()));
		for (String tok : tokens) {
			inst.setValue(attributesIndex.get(getAttributeNameOfHash(getHash(tok, hashTrickSize))), 1);
		}
		return inst;

	} else {
		System.out.println("Input AnthURL for convertion into instance was null.");
		return null;
	}
}

Source File: NutchOnlineClassifier.java From anthelion with Apache License 2.0

5 votes

/**
 * Converts an {@link AnthURL} into an {@link Instance} which can be handled
 * by the {@link Classifier}.
 * 
 * @param url
 *            the {@link AnthURL} which should be transformed/converted.
 * @return the resulting {@link Instance}.
 */
private static Instance convert(AnthURL url) {
	if (url != null) {

		Instance inst = new SparseInstance(dimension);
		inst.replaceMissingValues(replaceMissingValues);

		inst.setDataset(instances);
		inst.setValue(attributesIndex.get("class"), (url.sem ? "sem" : "nonsem"));
		inst.setValue(attributesIndex.get("sempar"), (url.semFather ? 1 : 0));
		inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
		inst.setValue(attributesIndex.get("semsib"), (url.semSibling ? 1 : 0));
		inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
		inst.setValue(attributesIndex.get("domain"), url.uri.getHost());
		Set<String> tokens = new HashSet<String>();

		tokens.addAll(tokenizer(url.uri.getPath()));
		tokens.addAll(tokenizer(url.uri.getQuery()));
		tokens.addAll(tokenizer(url.uri.getFragment()));
		for (String tok : tokens) {
			inst.setValue(attributesIndex.get(getAttributeNameOfHash(getHash(tok, hashTrickSize))), 1);
		}
		return inst;

	} else {
		System.out.println("Input AnthURL for convertion into instance was null.");
		return null;
	}
}

Source File: SentimentAnalyser.java From sentiment-analysis with Apache License 2.0

5 votes

/**Decides upon a "disagreed" document by applying the learned model based on the previously build model.*/
private String clarifyOnModel(String tweet){
	String out = "";
	
	// get the text-based representation of the document
       double[] instanceValues = new double[2];
       instanceValues[0] = test.attribute(0).addStringValue(tweet);
       test.add(new SparseInstance(1.0, instanceValues));
       try{
       	stwv.setInputFormat(test);
       	Instances newData = Filter.useFilter(test, stwv);
   		
       	// re-order attributes so that they are compatible with the training set's ones
       	Instances test_instance = reformatText(newData);
       	
       	// find the polarity of the document based on the previously built model
       	test_instance.setClassIndex(0);
       	double[] preds = multiNB.distributionForInstance(test_instance.get(0));
       	if (preds[0]>0.5)
       		out = "light positive";
       	else
       		out = "light negative";
       } catch (Exception e){
       	e.printStackTrace();
       }
       test.remove(0);
	return out;
}

Source File: TweetPreprocessor.java From sentiment-analysis with Apache License 2.0

5 votes

/**Initializes the feature-based Instances*/
private void getFeatureInstances(){
	ArrayList<Attribute> atts = new ArrayList<Attribute>(2);
       ArrayList<String> classVal = new ArrayList<String>();
       classVal.add("positive");
       classVal.add("negative");
       atts.add(new Attribute("sentimentClassAttribute",classVal));
       atts.add(new Attribute("text",(ArrayList<String>)null));
       Instances textRaw = new Instances("TextInstances",atts,0);
       double[] instanceValue1 = new double[textRaw.numAttributes()];
       instanceValue1[1] = textRaw.attribute(1).addStringValue(fp.getProcessed(tweet));
       textRaw.add(new SparseInstance(1.0, instanceValue1));
	feature_instances = new Instances(textRaw);
}

Source File: KMeans.java From Java-Data-Analysis with MIT License

5 votes

private static Instances load(double[][] data) {
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("X"));
    attributes.add(new Attribute("Y"));
    Instances dataset = new Instances("Dataset", attributes, M);
    for (double[] datum : data) {
        Instance instance = new SparseInstance(2);
        instance.setValue(0, datum[0]);
        instance.setValue(1, datum[1]);
        dataset.add(instance);
    }
    return dataset;
}

Source File: ClusteringTask.java From mzmine3 with GNU General Public License v2.0

5 votes

/**
 * Creates the weka data set for clustering of samples
 *
 * @param rawData Data extracted from selected Raw data files and rows.
 * @return Weka library data set
 */
private Instances createSampleWekaDataset(double[][] rawData) {
  FastVector attributes = new FastVector();

  for (int i = 0; i < rawData[0].length; i++) {
    String varName = "Var" + i;
    Attribute var = new Attribute(varName);
    attributes.addElement(var);
  }

  if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
    Attribute name = new Attribute("name", (FastVector) null);
    attributes.addElement(name);
  }
  Instances data = new Instances("Dataset", attributes, 0);

  for (int i = 0; i < rawData.length; i++) {
    double[] values = new double[data.numAttributes()];
    System.arraycopy(rawData[i], 0, values, 0, rawData[0].length);
    if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) {
      values[data.numAttributes() - 1] =
          data.attribute("name").addStringValue(this.selectedRawDataFiles[i].getName());
    }
    Instance inst = new SparseInstance(1.0, values);
    data.add(inst);
  }
  return data;
}

Source File: BagOfPatterns.java From tsml with GNU General Public License v3.0

5 votes

@Override
public double classifyInstance(Instance instance) throws Exception {
    //convert to BOP form
    double[] hist = bop.bagToArray(bop.buildBag(instance));
    
    //stuff into Instance
    Instances newInsts = new Instances(matrix, 1); //copy attribute data
    newInsts.add(new SparseInstance(1.0, hist));
    
    return knn.classifyInstance(newInsts.firstInstance());
}

Source File: BagOfPatterns.java From tsml with GNU General Public License v3.0

5 votes

@Override
public double[] distributionForInstance(Instance instance) throws Exception {
    //convert to BOP form
    double[] hist = bop.bagToArray(bop.buildBag(instance));
    
    //stuff into Instance
    Instances newInsts = new Instances(matrix, 1); //copy attribute data
    newInsts.add(new SparseInstance(1.0, hist));
    
    return knn.distributionForInstance(newInsts.firstInstance());
}

Source File: FPGrowth.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Inserts a single instance into the FPTree.
 * 
 * @param current the instance to insert
 * @param singletons the singleton item sets
 * @param tree the tree to insert into
 * @param minSupport the minimum support threshold
 */
private void insertInstance(Instance current, ArrayList<BinaryItem> singletons, 
    FPTreeRoot tree, int minSupport) {
  ArrayList<BinaryItem> transaction = new ArrayList<BinaryItem>();
  if (current instanceof SparseInstance) {
    for (int j = 0; j < current.numValues(); j++) {
      int attIndex = current.index(j);
      if (singletons.get(attIndex).getFrequency() >= minSupport) {
        transaction.add(singletons.get(attIndex));
      }
    }
    Collections.sort(transaction);
    tree.addItemSet(transaction, 1);
  } else {
    for (int j = 0; j < current.numAttributes(); j++) {
      if (!current.isMissing(j)) {
        if (current.attribute(j).numValues() == 1 
            || current.value(j) == m_positiveIndex - 1) {
          if (singletons.get(j).getFrequency() >= minSupport) {
            transaction.add(singletons.get(j));
          }
        }
      }
    }
    Collections.sort(transaction);
    tree.addItemSet(transaction, 1);
  }
}

Source File: PrincipalComponents.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Convert a pc transformed instance back to the original space
 * 
 * @param inst        the instance to convert
 * @return            the processed instance
 * @throws Exception  if something goes wrong
 */
private Instance convertInstanceToOriginal(Instance inst)
  throws Exception {
  double[] newVals = null;

  if (m_hasClass) {
    newVals = new double[m_numAttribs+1];
  } else {
    newVals = new double[m_numAttribs];
  }

  if (m_hasClass) {
    // class is always appended as the last attribute
    newVals[m_numAttribs] = inst.value(inst.numAttributes() - 1);
  }

  for (int i = 0; i < m_eTranspose[0].length; i++) {
    double tempval = 0.0;
    for (int j = 1; j < m_eTranspose.length; j++) {
      tempval += (m_eTranspose[j][i] * 
                  inst.value(j - 1));
     }
    newVals[i] = tempval;
    if (!m_center) {
      newVals[i] *= m_stdDevs[i];
    } 
    newVals[i] += m_means[i];
  }
  
  if (inst instanceof SparseInstance) {
    return new SparseInstance(inst.weight(), newVals);
  } else {
    return new DenseInstance(inst.weight(), newVals);
  }      
}

Source File: WekaHierarchicalClustering.java From Java-Data-Analysis with MIT License

5 votes

private static Instances load(double[][] data) {
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("X"));
    attributes.add(new Attribute("Y"));
    Instances dataset = new Instances("Dataset", attributes, M);
    for (double[] datum : data) {
        Instance instance = new SparseInstance(2);
        instance.setValue(0, datum[0]);
        instance.setValue(1, datum[1]);
        dataset.add(instance);
    }
    return dataset;
}

Source File: PrincipalComponents.java From tsml with GNU General Public License v3.0

4 votes

/**
 * Transform an instance in original (unormalized) format. Convert back
 * to the original space if requested.
 * @param instance an instance in the original (unormalized) format
 * @return a transformed instance
 * @throws Exception if instance cant be transformed
 */
public Instance convertInstance(Instance instance) throws Exception {

  if (m_eigenvalues == null) {
    throw new Exception("convertInstance: Principal components not "
                        +"built yet");
  }

  double[] newVals = new double[m_outputNumAtts];
  Instance tempInst = (Instance)instance.copy();
  if (!instance.dataset().equalHeaders(m_trainHeader)) {
    throw new Exception("Can't convert instance: header's don't match: "
                        +"PrincipalComponents\n"
                        + instance.dataset().equalHeadersMsg(m_trainHeader));
  }

  m_replaceMissingFilter.input(tempInst);
  m_replaceMissingFilter.batchFinished();
  tempInst = m_replaceMissingFilter.output();

  /*if (m_normalize) {
    m_normalizeFilter.input(tempInst);
    m_normalizeFilter.batchFinished();
    tempInst = m_normalizeFilter.output();
  }*/

  m_nominalToBinFilter.input(tempInst);
  m_nominalToBinFilter.batchFinished();
  tempInst = m_nominalToBinFilter.output();

  if (m_attributeFilter != null) {
    m_attributeFilter.input(tempInst);
    m_attributeFilter.batchFinished();
    tempInst = m_attributeFilter.output();
  }
  
  if (!m_center) {
    m_standardizeFilter.input(tempInst);
    m_standardizeFilter.batchFinished();
    tempInst = m_standardizeFilter.output();
  } else {
    m_centerFilter.input(tempInst);
    m_centerFilter.batchFinished();
    tempInst = m_centerFilter.output();
  }

  if (m_hasClass) {
     newVals[m_outputNumAtts - 1] = instance.value(instance.classIndex());
  }

  double cumulative = 0;
  for (int i = m_numAttribs - 1; i >= 0; i--) {
    double tempval = 0.0;
    for (int j = 0; j < m_numAttribs; j++) {
      tempval += (m_eigenvectors[j][m_sortedEigens[i]] * 
                  tempInst.value(j));
     }
    newVals[m_numAttribs - i - 1] = tempval;
    cumulative+=m_eigenvalues[m_sortedEigens[i]];
    if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) {
      break;
    }
  }
  
  if (!m_transBackToOriginal) {
    if (instance instanceof SparseInstance) {
    return new SparseInstance(instance.weight(), newVals);
    } else {
      return new DenseInstance(instance.weight(), newVals);
    }      
  } else {
    if (instance instanceof SparseInstance) {
      return convertInstanceToOriginal(new SparseInstance(instance.weight(), 
                                                          newVals));
    } else {
      return convertInstanceToOriginal(new DenseInstance(instance.weight(),
                                                    newVals));
    }
  }
}

Source File: SAXVSM.java From tsml with GNU General Public License v3.0

4 votes

/**
 * If skip = one of <0 ... numInstances-1>, will not include instance at that index into the corpus
 * Part of leave one out cv, while avoiding unnecessary repeats of the BoP transformation 
 */
private Instances tfxidf(Instances bopData, int skip) {
    int numClasses = bopData.numClasses();
    int numInstances = bopData.numInstances();
    int numTerms = bopData.numAttributes()-1; //minus class attribute
    
    //initialise class weights
    double[][] classWeights = new double[numClasses][numTerms];

    //build class bags
    int inst = 0;
    for (Instance in : bopData) {
        if (inst++ == skip) //skip 'this' one, for leave-one-out cv
            continue;

        int classVal = (int)in.classValue();
        for (int j = 0; j < numTerms; ++j) {
            classWeights[classVal][j] += in.value(j);
        }
    }
        
    //apply tf x idf
    for (int i = 0; i < numTerms; ++i) { //for each term
        double df = 0; //document frequency
        for (int j = 0; j < numClasses; ++j) //find how many classes (documents) this term appears in
            if (classWeights[j][i] != 0)
                ++df;
        
        if (df != 0) { //if it appears
            if (df != numClasses) { //but not in all, apply weighting
                for (int j = 0; j < numClasses; ++j) 
                    if (classWeights[j][i] != 0) 
                        classWeights[j][i] = Math.log(1 + classWeights[j][i]) * Math.log(numClasses / df);                
            }
            else { //appears in all
                //avoid log calculations
                //if df == num classes -> idf = log(N/df) = log(1) = 0
                for (int j = 0; j < numClasses; ++j) 
                    classWeights[j][i] = 0;
            }      
        }
    }
    
    Instances tfxidfCorpus = new Instances(bopData, numClasses);
    for (int i = 0; i < numClasses; ++i)
        tfxidfCorpus.add(new SparseInstance(1.0, classWeights[i]));
    
    return tfxidfCorpus;
}

Source File: CLOPE.java From tsml with GNU General Public License v3.0

4 votes

/**
     * Calculate Delta
     */
     public double DeltaAdd(Instance inst, double r) {
//System.out.println("DeltaAdd");
int S_new;
int W_new;
double profit;
double profit_new;
double deltaprofit;
S_new = 0;
W_new = occ.size();

if (inst instanceof SparseInstance) {
  //System.out.println("DeltaAddSparceInstance");
  for (int i = 0; i < inst.numValues(); i++) {
    S_new++;

    if ((Integer) this.occ.get(inst.index(i)) == null) {
      W_new++;
    }
  }
} else {
  for (int i = 0; i < inst.numAttributes(); i++) {
    if (!inst.isMissing(i)) {
      S_new++;
      if ((Integer) this.occ.get(i + inst.toString(i)) == null) {
	W_new++;
      }
    }
  }
}
S_new += S;


if (N == 0) {
  deltaprofit = S_new / Math.pow(W_new, r);
} else {
  profit = S * N / Math.pow(W, r);
  profit_new = S_new * (N + 1) / Math.pow(W_new, r);
  deltaprofit = profit_new - profit;
}
return deltaprofit;
     }

Source File: CLOPE.java From tsml with GNU General Public License v3.0

4 votes

/**
  * Move instance to best cluster
  */
 public int MoveInstanceToBestCluster(Instance inst) {

   clusters.get(m_clusterAssignments.get(m_processed_InstanceID)).DeleteInstance(inst);
   m_clusterAssignments.set(m_processed_InstanceID, -1);
   double delta;
   double deltamax;
   int clustermax = -1;
   int tempS = 0;
   int tempW = 0;

   if (inst instanceof SparseInstance) {
     for (int i = 0; i < inst.numValues(); i++) {
tempS++;
tempW++;
     }
   } else {
     for (int i = 0; i < inst.numAttributes(); i++) {
if (!inst.isMissing(i)) {
  tempS++;
  tempW++;
}
     }
   }

   deltamax = tempS / Math.pow(tempW, m_Repulsion);
   for (int i = 0; i < clusters.size(); i++) {
     CLOPECluster tempcluster = clusters.get(i);
     delta = tempcluster.DeltaAdd(inst, m_Repulsion);
     // System.out.println("delta " + delta);
     if (delta > deltamax) {
deltamax = delta;
clustermax = i;
     }
   }
   if (clustermax == -1) {
     CLOPECluster newcluster = new CLOPECluster();
     clusters.add(newcluster);
     newcluster.AddInstance(inst);
     return clusters.size() - 1;
   }
   clusters.get(clustermax).AddInstance(inst);
   return clustermax;
 }

Source File: AnthOnlineClassifier.java From anthelion with Apache License 2.0

4 votes

/**
 * Converts an {@link AnthURL} into an {@link Instance} which can be handled
 * by the {@link Classifier}.
 * 
 * @param url
 *            the {@link AnthURL} which should be transformed/converted.
 * @return the resulting {@link Instance}.
 */
private Instance convert(AnthURL url) {
	if (url != null) {
		try {
			Instance inst = new SparseInstance(dimension);
			inst.replaceMissingValues(replaceMissingValues);

			inst.setDataset(instances);
			inst.setValue(attributesIndex.get("class"), (url.sem ? "sem"
					: "nonsem"));
			inst.setValue(attributesIndex.get("sempar"), (url.semFather ? 1
					: 0));
			inst.setValue(attributesIndex.get("nonsempar"),
					(url.nonSemFather ? 1 : 0));
			inst.setValue(attributesIndex.get("semsib"),
					(url.semSibling ? 1 : 0));
			inst.setValue(attributesIndex.get("nonsempar"),
					(url.nonSemFather ? 1 : 0));
			inst.setValue(attributesIndex.get("domain"), url.uri.getHost());
			Set<String> tokens = new HashSet<String>();

			tokens.addAll(tokenizer(url.uri.getPath()));
			tokens.addAll(tokenizer(url.uri.getQuery()));
			tokens.addAll(tokenizer(url.uri.getFragment()));
			for (String tok : tokens) {
				inst.setValue(attributesIndex
						.get(getAttributeNameOfHash(getHash(tok,
								hashTrickSize))), 1);
			}
			return inst;
		} catch (NullPointerException npe) {
			System.out
					.println("Could not convert AnthURL into Instance for classification of URL: "
							+ (url != null ? (url.uri != null ? url.uri
									.toString() : "URI null")
									: "AnthURL null."));
			return null;
		}
	} else {
		System.out
				.println("Input AnthURL for convertion into instance was null.");
		return null;
	}
}

Source File: ReduceDimensionFilter.java From anthelion with Apache License 2.0

4 votes

/**
 * Returns the next instances based on the configuration of this class.
 */
public Instance nextInstance() {
	Instance inst = this.inputStream.nextInstance();

	Instance newInst = new SparseInstance(hashSize
			+ notHashableAttributes.size());
	newInst.setDataset(newInstances);
	newInst.replaceMissingValues(replacementArray);
	if (newInstances.size() > 0)
		newInstances.remove(0);
	// newInstances.add(0, newInst);
	for (int i = 0; i < inst.numAttributes(); i++) {
		if (inst.classIndex() == i) {
			newInst.setValue(
					attributesIndex.get(inst.classAttribute().name()),
					inst.classValue());
		} else {
			// check if attributes should be manipulated
			if (ignoreAttributes.contains(i)) {
				inst.setValue(i, 0);
			}
			if (makeBinaryAttributes.contains(i) && inst.value(i) > 0) {
				inst.setValue(i, 1);
			}
			// check what should be done with the attributes.
			if (notHashableAttributes.contains(i)) {
				newInst.setValue(
						attributesIndex.get(inst.attribute(i).name()),
						inst.value(i));

			} else {
				// calculate the hash of the attribute name which is
				// included in
				// the vector and set it to 1
				if (inst.value(i) > 0) {
					newInst.setValue(attributesIndex
							.get(getAttributeNameOfHash(getHash(inst
									.attribute(i).name(), hashSize))), 1);
				}
			}
		}
	}
	// System.out.println(newInst.toString());
	return newInst;
}

weka.core.SparseInstance Java Examples