weka.core.Attribute#isNumeric

Source File: ContractRotationForest.java From tsml with GNU General Public License v3.0

6 votes

/** 
  * Adds random instances to the dataset.
  * 
  * @param dataset the dataset
  * @param numInstances the number of instances
  * @param random a random number generator
  */
 protected void addRandomInstances( Instances dataset, int numInstances, 
                                 Random random ) {
   int n = dataset.numAttributes();				
   double [] v = new double[ n ];
   for( int i = 0; i < numInstances; i++ ) {
     for( int j = 0; j < n; j++ ) {
       Attribute att = dataset.attribute( j );
       if( att.isNumeric() ) {
  v[ j ] = random.nextDouble();
}
else if ( att.isNominal() ) { 
  v[ j ] = random.nextInt( att.numValues() );
}
     }
     dataset.add( new DenseInstance( 1, v ) );
   }
 }

Source File: ActiveHNode.java From tsml with GNU General Public License v3.0

6 votes

@Override
public void updateNode(Instance inst) throws Exception {
  super.updateDistribution(inst);

  for (int i = 0; i < inst.numAttributes(); i++) {
    Attribute a = inst.attribute(i);
    if (i != inst.classIndex()) {
      ConditionalSufficientStats stats = m_nodeStats.get(a.name());
      if (stats == null) {
        if (a.isNumeric()) {
          stats = new GaussianConditionalSufficientStats();
        } else {
          stats = new NominalConditionalSufficientStats();
        }
        m_nodeStats.put(a.name(), stats);
      }

      stats
          .update(inst.value(a),
              inst.classAttribute().value((int) inst.classValue()),
              inst.weight());
    }
  }
}

Source File: Test.java From tsml with GNU General Public License v3.0

6 votes

/**
 * Returns the test represented by a string in Prolog notation.
 *
 * @return a string representing the test in Prolog notation
 */   
public String toPrologString() {
  Attribute att = m_Dataset.attribute(m_AttIndex);
  StringBuffer str = new StringBuffer();
  String attName = m_Dataset.attribute(m_AttIndex).name();
  if (att.isNumeric()) {
    str = str.append(attName + " ");
    if (m_Not) str = str.append(">= " + Utils.doubleToString(m_Split, 3));
    else str = str.append("< " + Utils.doubleToString(m_Split, 3));
  } else {
    String value = att.value((int)m_Split);
  
    if (value == "false") { str = str.append("not(" + attName + ")"); }      
    else { str = str.append(attName); }
  }
return str.toString();
}

Source File: RDG1.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Generates a new rule for the decision list
 * and classifies the new example.
 *
 * @param random random number generator
 * @param example the instance to classify
 * @return a list of tests
 * @throws Exception if dataset format not defined
 */
private FastVector generateTestList(Random random, Instance example) 
 throws Exception {

  Instances format = getDatasetFormat();
  if (format == null) 
    throw new Exception("Dataset format not defined.");

  int numTests = getNumAttributes() - getNumIrrelevant();
  FastVector TestList = new FastVector(numTests);
  boolean[] irrelevant = getAttList_Irr();

  for (int i = 0; i < getNumAttributes(); i++) {
    if (!irrelevant[i]) {
      Test newTest = null;
      Attribute att = example.attribute(i);
      if (att.isNumeric()) {
        double newSplit = random.nextDouble();
        boolean newNot = newSplit < example.value(i);
        newTest = new Test(i, newSplit, format, newNot);
      } else {
        newTest = new Test(i, example.value(i), format, false);
      }
    TestList.addElement (newTest);     
    }
  }
  
  return TestList;
}

Source File: MekaInstancesUtil.java From AILibs with GNU Affero General Public License v3.0

5 votes

public static IAttribute transformWEKAAttributeToAttributeType(final Attribute att) {
	String attributeName = att.name();
	if (att.isNumeric()) {
		return new NumericAttribute(attributeName);
	} else if (att.isNominal()) {
		List<String> domain = new LinkedList<>();
		for (int i = 0; i < att.numValues(); i++) {
			domain.add(att.value(i));
		}
		return new IntBasedCategoricalAttribute(attributeName, domain);
	}
	throw new IllegalArgumentException("Can only transform numeric or categorical attributes");
}

Source File: WekaUtil.java From AILibs with GNU Affero General Public License v3.0

5 votes

public static boolean hasOnlyNumericAttributes(final Instances instances) {
	for (Attribute a : getAttributes(instances, false)) {
		if (!a.isNumeric()) {
			return false;
		}
	}
	return true;
}

Source File: WekaInstancesUtil.java From AILibs with GNU Affero General Public License v3.0

5 votes

public static IAttribute transformWEKAAttributeToAttributeType(final Attribute att) {
	String attributeName = att.name();
	if (att.isNumeric()) {
		return new NumericAttribute(attributeName);
	} else if (att.isNominal()) {
		List<String> domain = new LinkedList<>();
		for (int i = 0; i < att.numValues(); i++) {
			domain.add(att.value(i));
		}
		return new IntBasedCategoricalAttribute(attributeName, domain);
	}
	throw new IllegalArgumentException("Can only transform numeric or categorical attributes");
}

Source File: Test.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Gives a string representation of the test in Prolog notation, starting
 * from the comparison symbol.
 *
 * @return a string representing the test in Prolog notation
 */   
private String testPrologComparisonString() {
  Attribute att = m_Dataset.attribute(m_AttIndex);
  if (att.isNumeric()) {
    return ((m_Not ? ">= " : "< ") + Utils.doubleToString(m_Split,3));
  }
  else {
    if (att.numValues() != 2) 
      return ((m_Not ? "!= " : "= ") + att.value((int)m_Split));
    else return ("= " 
                 + (m_Not ? att.value((int)m_Split == 0 ? 1 : 0) 
                        : att.value((int)m_Split)));
  }
}

Source File: Test.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Gives a string representation of the test, starting from the comparison
 * symbol.
 *
 * @return a string representing the test
 */   
private String testComparisonString() {
  Attribute att = m_Dataset.attribute(m_AttIndex);
  if (att.isNumeric()) {
    return ((m_Not ? ">= " : "< ") + Utils.doubleToString(m_Split,3));
  }
  else {
    if (att.numValues() != 2) 
      return ((m_Not ? "!= " : "= ") + att.value((int)m_Split));
    else return ("= " 
                 + (m_Not ?
    att.value((int)m_Split == 0 ? 1 : 0) : att.value((int)m_Split)));
  }
}

Source File: BinaryItem.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Constructor.
 * 
 * @param att the attribute that backs this item.
 * @param valueIndex the index of the value for this item.
 * @throws Exception if the backing attribute is not binary or unary.
 */
public BinaryItem(Attribute att, int valueIndex) throws Exception {
  super(att, valueIndex);
  
  if (att.isNumeric() || (att.isNominal() && att.numValues() > 2)) {
    throw new Exception("BinaryItem must be constructed using a nominal attribute" +
    		" with at most 2 values!");
  }
}

Source File: NominalItem.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Constructs a new NominalItem.
 * 
 * @param att the attribute that backs the item.
 * @param valueIndex the index of the value for this item.
 * @throws Exception if the NominalItem can't be constructed.
 */
public NominalItem(Attribute att, int valueIndex) throws Exception {
  
  super(att);
  
  if (att.isNumeric()) {
    throw new Exception("NominalItem must be constructed using a nominal attribute");
  }
  m_attribute = att;
  if (m_attribute.numValues() == 1) {
    m_valueIndex = 0; // unary attribute (? used to indicate absence from a basket)
  } else {
    m_valueIndex = valueIndex;
  }
}

Source File: Analyzer.java From NLIWOD with GNU Affero General Public License v3.0

5 votes

/**
 * Analyzes the question and extracts all features that were set for this Analyzer.
 * @param q question string
 * @return feature vector for the input question
 */
public Instance analyze(String q) {
	Instance tmpInstance = new DenseInstance(fvWekaAttributes.size());
	
	for (IAnalyzer analyzer : analyzers) {
		//special case for PartOfSpeechTags, need to set 36 attributes
		if(analyzer instanceof PartOfSpeechTags) {
			analyzePOS(tmpInstance, (PartOfSpeechTags) analyzer, q);
			continue;
		}		
		
		//special case for Dependencies, need to set 18 attributes
		if(analyzer instanceof Dependencies) {
			analyzeDeps(tmpInstance, (Dependencies) analyzer, q);
			continue;
		}
		
		Attribute attribute = analyzer.getAttribute();
		if (attribute.isNumeric()) {
			tmpInstance.setValue(attribute, (double) analyzer.analyze(q));
		} else if (attribute.isNominal() || attribute.isString()) {
			String value = (String) analyzer.analyze(q);
			tmpInstance.setValue(attribute,value);
			tmpInstance.setDataset(null);
		}
	}
	return tmpInstance;
}

Source File: Ridor.java From tsml with GNU General Public License v3.0

4 votes

/**
    * Build one rule using the growing data
    *
    * @param data the growing data used to build the rule
    */    
   private void grow(Instances data){
     Instances growData = new Instances(data);
    
     m_AccuG = computeDefAccu(growData);
     m_CoverG = growData.sumOfWeights();
     /* Compute the default accurate rate of the growing data */
     double defAcRt= m_AccuG / m_CoverG; 
    
     /* Keep the record of which attributes have already been used*/    
     boolean[] used=new boolean [growData.numAttributes()];
     for (int k=0; k<used.length; k++)
used[k]=false;
     int numUnused=used.length;
    
     double maxInfoGain;
     boolean isContinue = true; // The stopping criterion of this rule
    
     while (isContinue){   
maxInfoGain = 0;       // We require that infoGain be positive
	
/* Build a list of antecedents */
Antd oneAntd=null;
Instances coverData = null;
Enumeration enumAttr=growData.enumerateAttributes();	    
int index=-1;  
	
/* Build one condition based on all attributes not used yet*/
while (enumAttr.hasMoreElements()){
  Attribute att= (Attribute)(enumAttr.nextElement());
  index++;
	    
  Antd antd =null;	
  if(att.isNumeric())
    antd = new NumericAntd(att);
  else
    antd = new NominalAntd(att);
	    
  if(!used[index]){
    /* Compute the best information gain for each attribute,
       it's stored in the antecedent formed by this attribute.
       This procedure returns the data covered by the antecedent*/
    Instances coveredData = computeInfoGain(growData, defAcRt, antd);
    if(coveredData != null){
      double infoGain = antd.getMaxInfoGain();			
      if(Utils.gr(infoGain, maxInfoGain)){
	oneAntd=antd;
	coverData = coveredData;  
	maxInfoGain = infoGain;
      }		    
    }
  }
}
	
if(oneAntd == null)	 return;
	
//Numeric attributes can be used more than once
if(!oneAntd.getAttr().isNumeric()){ 
  used[oneAntd.getAttr().index()]=true;
  numUnused--;
}
	
m_Antds.addElement((Object)oneAntd);
growData = coverData;// Grow data size is shrinking 
	
defAcRt = oneAntd.getAccuRate();
	
/* Stop if no more data, rule perfect, no more attributes */
if(Utils.eq(growData.sumOfWeights(), 0.0) || Utils.eq(defAcRt, 1.0) || (numUnused == 0))
  isContinue = false;
     }
   }

Source File: InputMappedClassifier.java From tsml with GNU General Public License v3.0

4 votes

public Instance constructMappedInstance(Instance incoming) throws Exception {
  
  boolean regenerateMapping = false;
  
  if (m_inputHeader == null) {
    m_inputHeader = incoming.dataset();
    regenerateMapping = true;
    m_initialTestStructureKnown = false;
  } else if (!m_inputHeader.equalHeaders(incoming.dataset())) {
    /*System.out.println("[InputMappedClassifier] incoming data does not match " +
              "last known input format - regenerating mapping...");
    System.out.println("Incoming\n" + new Instances(incoming.dataset(), 0));
    System.out.println("Stored input header\n" + new Instances(m_inputHeader, 0));
    System.out.println("Model header\n" + new Instances(m_modelHeader, 0)); */
    m_inputHeader = incoming.dataset();
    
    regenerateMapping = true;
    m_initialTestStructureKnown = false;
  } else if (m_attributeMap == null) {
    regenerateMapping = true;
    m_initialTestStructureKnown = false;
  }
  
  if (regenerateMapping) {
    regenerateMapping();
    m_vals = null;
    
    if (!m_suppressMappingReport) {
      StringBuffer result = createMappingReport();
      System.out.println(result.toString());
    }
  }    
  
  m_vals = new double[m_modelHeader.numAttributes()];
  
  for (int i = 0; i < m_modelHeader.numAttributes(); i++) {
    if (m_attributeStatus[i] == OK) {
      Attribute modelAtt = m_modelHeader.attribute(i);
      Attribute incomingAtt = m_inputHeader.attribute(m_attributeMap[i]);
      
      if (Utils.isMissingValue(incoming.value(m_attributeMap[i]))) {
        m_vals[i] = Utils.missingValue();
        continue;
      }
      
      if (modelAtt.isNumeric()) {
        m_vals[i] = incoming.value(m_attributeMap[i]);
      } else if (modelAtt.isNominal()) {
        int mapVal = m_nominalValueMap[i][(int)incoming.value(m_attributeMap[i])];
        
        if (mapVal == NO_MATCH) {
          m_vals[i] = Utils.missingValue();
        } else {
          m_vals[i] = mapVal;
        }
      }
    } else {
      m_vals[i] = Utils.missingValue();
    }
  }
  
  Instance newInst = new DenseInstance(incoming.weight(), m_vals);
  newInst.setDataset(m_modelHeader);

  return newInst;
}

Source File: InputMappedClassifier.java From tsml with GNU General Public License v3.0

4 votes

private StringBuffer createMappingReport() {
  StringBuffer result = new StringBuffer();
  result.append("Attribute mappings:\n\n");
  
  int maxLength = 0;
  for (int i = 0; i < m_modelHeader.numAttributes(); i++) {
    if (m_modelHeader.attribute(i).name().length() > maxLength) {
      maxLength = m_modelHeader.attribute(i).name().length();        
    }
  }
  maxLength += 12;
  
  int minLength = 16;
  String headerS = "Model attributes";
  String sep = "----------------";

  if (maxLength < minLength) {
    maxLength = minLength;
  }
  
  headerS = getFixedLengthString(headerS, ' ', maxLength);
  sep = getFixedLengthString(sep, '-', maxLength);
  sep += "\t    ----------------\n";
  headerS += "\t    Incoming attributes\n";
  result.append(headerS);
  result.append(sep);
  
  for (int i = 0; i < m_modelHeader.numAttributes(); i++) {
    Attribute temp = m_modelHeader.attribute(i);
    String attName = "("
      + ((temp.isNumeric())
         ? "numeric)"
         : "nominal)") 
      + " " + temp.name();
    attName = getFixedLengthString(attName, ' ', maxLength);
    attName +=  "\t--> ";
    result.append(attName);
    String inAttNum = "";
    if (m_attributeStatus[i] == NO_MATCH) {
      inAttNum += "- ";
      result.append(inAttNum + "missing (no match)\n");
    } else if (m_attributeStatus[i] == TYPE_MISMATCH) {       
      inAttNum += (m_attributeMap[i] + 1) + " ";
      result.append(inAttNum + "missing (type mis-match)\n");
    } else {
      Attribute inAtt = m_inputHeader.attribute(m_attributeMap[i]);
      String inName = "" + (m_attributeMap[i] + 1) + " (" +
      ((inAtt.isNumeric())
          ? "numeric)"
          : "nominal)")
          + " " + inAtt.name();
      result.append(inName + "\n");
    }
  }
  
  return result;
}

Source File: BFTree.java From tsml with GNU General Public License v3.0

4 votes

/**
  * Split data into two subsets and store sorted indices and weights for two
  * successor nodes.
  *
  * @param subsetIndices 	sorted indecis of instances for each attribute for two successor node
  * @param subsetWeights 	weights of instances for each attribute for two successor node
  * @param att 		attribute the split based on
  * @param splitPoint 		split point the split based on if att is numeric
  * @param splitStr 		split subset the split based on if att is nominal
  * @param sortedIndices 	sorted indices of the instances to be split
  * @param weights 		weights of the instances to bes split
  * @param data 		training data
  * @throws Exception 		if something goes wrong  
  */
 protected void splitData(int[][][] subsetIndices, double[][][] subsetWeights,
     Attribute att, double splitPoint, String splitStr, int[][] sortedIndices,
     double[][] weights, Instances data) throws Exception {

   int j;
   // For each attribute
   for (int i = 0; i < data.numAttributes(); i++) {
     if (i==data.classIndex()) continue;
     int[] num = new int[2];
     for (int k = 0; k < 2; k++) {
subsetIndices[k][i] = new int[sortedIndices[i].length];
subsetWeights[k][i] = new double[weights[i].length];
     }

     for (j = 0; j < sortedIndices[i].length; j++) {
Instance inst = data.instance(sortedIndices[i][j]);
if (inst.isMissing(att)) {
  // Split instance up
  for (int k = 0; k < 2; k++) {
    if (m_Props[k] > 0) {
      subsetIndices[k][i][num[k]] = sortedIndices[i][j];
      subsetWeights[k][i][num[k]] = m_Props[k] * weights[i][j];
      num[k]++;
    }
  }
} else {
  int subset;
  if (att.isNumeric())  {
    subset = (inst.value(att) < splitPoint) ? 0 : 1;
  } else { // nominal attribute
    if (splitStr.indexOf
	("(" + att.value((int)inst.value(att.index()))+")")!=-1) {
      subset = 0;
    } else subset = 1;
  }
  subsetIndices[subset][i][num[subset]] = sortedIndices[i][j];
  subsetWeights[subset][i][num[subset]] = weights[i][j];
  num[subset]++;
}
     }

     // Trim arrays
     for (int k = 0; k < 2; k++) {
int[] copy = new int[num[k]];
System.arraycopy(subsetIndices[k][i], 0, copy, 0, num[k]);
subsetIndices[k][i] = copy;
double[] copyWeights = new double[num[k]];
System.arraycopy(subsetWeights[k][i], 0 ,copyWeights, 0, num[k]);
subsetWeights[k][i] = copyWeights;
     }
   }
 }

Source File: SimpleCart.java From tsml with GNU General Public License v3.0

4 votes

/**
  * Split data into two subsets and store sorted indices and weights for two
  * successor nodes.
  * 
  * @param subsetIndices 	sorted indecis of instances for each attribute 
  * 				for two successor node
  * @param subsetWeights 	weights of instances for each attribute for 
  * 				two successor node
  * @param att 		attribute the split based on
  * @param splitPoint 		split point the split based on if att is numeric
  * @param splitStr 		split subset the split based on if att is nominal
  * @param sortedIndices 	sorted indices of the instances to be split
  * @param weights 		weights of the instances to bes split
  * @param data 		training data
  * @throws Exception 		if something goes wrong  
  */
 protected void splitData(int[][][] subsetIndices, double[][][] subsetWeights,
     Attribute att, double splitPoint, String splitStr, int[][] sortedIndices,
     double[][] weights, Instances data) throws Exception {

   int j;
   // For each attribute
   for (int i = 0; i < data.numAttributes(); i++) {
     if (i==data.classIndex()) continue;
     int[] num = new int[2];
     for (int k = 0; k < 2; k++) {
subsetIndices[k][i] = new int[sortedIndices[i].length];
subsetWeights[k][i] = new double[weights[i].length];
     }

     for (j = 0; j < sortedIndices[i].length; j++) {
Instance inst = data.instance(sortedIndices[i][j]);
if (inst.isMissing(att)) {
  // Split instance up
  for (int k = 0; k < 2; k++) {
    if (m_Props[k] > 0) {
      subsetIndices[k][i][num[k]] = sortedIndices[i][j];
      subsetWeights[k][i][num[k]] = m_Props[k] * weights[i][j];
      num[k]++;
    }
  }
} else {
  int subset;
  if (att.isNumeric())  {
    subset = (inst.value(att) < splitPoint) ? 0 : 1;
  } else { // nominal attribute
    if (splitStr.indexOf
	("(" + att.value((int)inst.value(att.index()))+")")!=-1) {
      subset = 0;
    } else subset = 1;
  }
  subsetIndices[subset][i][num[subset]] = sortedIndices[i][j];
  subsetWeights[subset][i][num[subset]] = weights[i][j];
  num[subset]++;
}
     }

     // Trim arrays
     for (int k = 0; k < 2; k++) {
int[] copy = new int[num[k]];
System.arraycopy(subsetIndices[k][i], 0, copy, 0, num[k]);
subsetIndices[k][i] = copy;
double[] copyWeights = new double[num[k]];
System.arraycopy(subsetWeights[k][i], 0 ,copyWeights, 0, num[k]);
subsetWeights[k][i] = copyWeights;
     }
   }
 }

Source File: LHSSampler.java From bestconf with Apache License 2.0

4 votes

/**
 * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
private static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
	
	int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
	double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
	ArrayList<Integer>[] setWithMaxMinDist=null;
	//generate L sets of sampleSetSize points
	for(int i=0; i<L; i++){
		ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
		//compute the minimum distance minDist between any sample pair for each set
		crntMinDist = minDistForSet(setPerm);
		//select the set with the maximum minDist
		if(crntMinDist>maxMinDist){
			setWithMaxMinDist = setPerm;
			maxMinDist = crntMinDist;
		}
	}
	
	//generate and output the set with the maximum minDist as the result
	
	//first, divide the domain of each attribute into sampleSetSize equal subdomain
	double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
	Iterator<Attribute> itr = atts.iterator();
	Attribute crntAttr;
	double pace;
	for(int i=0;i<bounds.length;i++){
		crntAttr = itr.next();
		
		if(crntAttr.isNumeric()){
			bounds[i][0] = crntAttr.getLowerNumericBound();
			bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
			pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound())/sampleSetSize;
			for(int j=1;j<sampleSetSize;j++){
				bounds[i][j] = bounds[i][j-1] + pace;
			}
		}else{//crntAttr.isNominal()
			if(crntAttr.numValues()>=sampleSetSize){
				//randomly select among the set
				for(int j=0;j<=sampleSetSize;j++)
					bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values
			}else{
				//first round-robin
				int lastPart = sampleSetSize%crntAttr.numValues();
				for(int j=0;j<sampleSetSize-lastPart;j++)
					bounds[i][j] = j%crntAttr.numValues();
				//then randomly select
				for(int j=sampleSetSize-lastPart;j<=sampleSetSize;j++)
					bounds[i][j] = uniRand.nextInt(crntAttr.numValues());
			}
		}//nominal attribute
	}//get all subdomains
	
	//second, generate the set according to setWithMaxMinDist
	Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
	for(int i=0;i<sampleSetSize;i++){
		double[] vals = new double[atts.size()];
		for(int j=0;j<vals.length;j++){
			if(atts.get(j).isNumeric()){
				vals[j] = useMid?
						(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
							bounds[j][setWithMaxMinDist[j].get(i)]+
							(
								(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
							);
			}else{//isNominal()
				vals[j] = bounds[j][setWithMaxMinDist[j].get(i)];
			}
		}
		data.add(new DenseInstance(1.0, vals));
	}
	
	//third, return the generated points
	return data;
}

Source File: LHSInitializer.java From bestconf with Apache License 2.0

4 votes

/**
 * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
public static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
	
	int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
	double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
	ArrayList<Integer>[] setWithMaxMinDist=null;
	//generate L sets of sampleSetSize points
	for(int i=0; i<L; i++){
		ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
		//compute the minimum distance minDist between any sample pair for each set
		crntMinDist = minDistForSet(setPerm);
		//select the set with the maximum minDist
		if(crntMinDist>maxMinDist){
			setWithMaxMinDist = setPerm;
			maxMinDist = crntMinDist;
		}
	}
	
	//generate and output the set with the maximum minDist as the result
	
	//first, divide the domain of each attribute into sampleSetSize equal subdomain
	double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
	Iterator<Attribute> itr = atts.iterator();
	Attribute crntAttr;
	double pace;
	for(int i=0;i<bounds.length;i++){
		crntAttr = itr.next();
		
		if(crntAttr.isNumeric()){
			bounds[i][0] = crntAttr.getLowerNumericBound();
			bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
			pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound())/sampleSetSize;
			for(int j=1;j<sampleSetSize;j++){
				bounds[i][j] = bounds[i][j-1] + pace;
			}
		}else{//crntAttr.isNominal()
			if(crntAttr.numValues()>=sampleSetSize){
				//randomly select among the set
				for(int j=0;j<=sampleSetSize;j++)
					bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values
			}else{
				//first round-robin
				int lastPart = sampleSetSize%crntAttr.numValues();
				for(int j=0;j<sampleSetSize-lastPart;j++)
					bounds[i][j] = j%crntAttr.numValues();
				//then randomly select
				for(int j=sampleSetSize-lastPart;j<=sampleSetSize;j++)
					bounds[i][j] = uniRand.nextInt(crntAttr.numValues());
			}
		}//nominal attribute
	}//get all subdomains
	
	//second, generate the set according to setWithMaxMinDist
	Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
	for(int i=0;i<sampleSetSize;i++){
		double[] vals = new double[atts.size()];
		for(int j=0;j<vals.length;j++){
			if(atts.get(j).isNumeric()){
				vals[j] = useMid?
						(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
							bounds[j][setWithMaxMinDist[j].get(i)]+
							(
								(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
							);
			}else{//isNominal()
				vals[j] = bounds[j][setWithMaxMinDist[j].get(i)];
			}
		}
		data.add(new DenseInstance(1.0, vals));
	}
	
	//third, return the generated points
	return data;
}

Source File: LabelWordVectors.java From AffectiveTweets with GNU General Public License v3.0

2 votes

@Override
protected Instances determineOutputFormat(Instances inputFormat)
		throws Exception {

	ArrayList<Attribute> atts = new ArrayList<Attribute>();

	// Adds all attributes of the inputformat
	for (int i = 0; i < inputFormat.numAttributes(); i++) {
		atts.add(inputFormat.attribute(i));
	}


	// The dictionaries of the lexicons are intialized only in the first batch
	if(!this.isFirstBatchDone())
		this.initializeDicts();



	for(ArffLexiconWordLabeller lexEval:this.lexiconLabs){
		for(Attribute att:lexEval.getAttributes()){
			if(att.isNumeric())
				atts.add(new Attribute(lexEval.getLexiconName()+"-"+att.name()));
			else if(att.isNominal()){
				List<String> attValues=new ArrayList<String>();

				for(int i=0;i<att.numValues();i++){
					attValues.add(att.value(i));
				}					

				atts.add(new Attribute(lexEval.getLexiconName()+"-"+att.name(),attValues));

			}


		}

	}




	Instances result = new Instances(inputFormat.relationName(), atts, 0);

	// set the class index
	result.setClassIndex(inputFormat.classIndex());



	return result;
}

Java Code Examples for weka.core.Attribute#isNumeric()