Java Code Examples for weka.core.Attribute#isNumeric()
The following examples show how to use
weka.core.Attribute#isNumeric() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ContractRotationForest.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Adds random instances to the dataset. * * @param dataset the dataset * @param numInstances the number of instances * @param random a random number generator */ protected void addRandomInstances( Instances dataset, int numInstances, Random random ) { int n = dataset.numAttributes(); double [] v = new double[ n ]; for( int i = 0; i < numInstances; i++ ) { for( int j = 0; j < n; j++ ) { Attribute att = dataset.attribute( j ); if( att.isNumeric() ) { v[ j ] = random.nextDouble(); } else if ( att.isNominal() ) { v[ j ] = random.nextInt( att.numValues() ); } } dataset.add( new DenseInstance( 1, v ) ); } }
Example 2
Source File: ActiveHNode.java From tsml with GNU General Public License v3.0 | 6 votes |
@Override public void updateNode(Instance inst) throws Exception { super.updateDistribution(inst); for (int i = 0; i < inst.numAttributes(); i++) { Attribute a = inst.attribute(i); if (i != inst.classIndex()) { ConditionalSufficientStats stats = m_nodeStats.get(a.name()); if (stats == null) { if (a.isNumeric()) { stats = new GaussianConditionalSufficientStats(); } else { stats = new NominalConditionalSufficientStats(); } m_nodeStats.put(a.name(), stats); } stats .update(inst.value(a), inst.classAttribute().value((int) inst.classValue()), inst.weight()); } } }
Example 3
Source File: Test.java From tsml with GNU General Public License v3.0 | 6 votes |
/** * Returns the test represented by a string in Prolog notation. * * @return a string representing the test in Prolog notation */ public String toPrologString() { Attribute att = m_Dataset.attribute(m_AttIndex); StringBuffer str = new StringBuffer(); String attName = m_Dataset.attribute(m_AttIndex).name(); if (att.isNumeric()) { str = str.append(attName + " "); if (m_Not) str = str.append(">= " + Utils.doubleToString(m_Split, 3)); else str = str.append("< " + Utils.doubleToString(m_Split, 3)); } else { String value = att.value((int)m_Split); if (value == "false") { str = str.append("not(" + attName + ")"); } else { str = str.append(attName); } } return str.toString(); }
Example 4
Source File: RDG1.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Generates a new rule for the decision list * and classifies the new example. * * @param random random number generator * @param example the instance to classify * @return a list of tests * @throws Exception if dataset format not defined */ private FastVector generateTestList(Random random, Instance example) throws Exception { Instances format = getDatasetFormat(); if (format == null) throw new Exception("Dataset format not defined."); int numTests = getNumAttributes() - getNumIrrelevant(); FastVector TestList = new FastVector(numTests); boolean[] irrelevant = getAttList_Irr(); for (int i = 0; i < getNumAttributes(); i++) { if (!irrelevant[i]) { Test newTest = null; Attribute att = example.attribute(i); if (att.isNumeric()) { double newSplit = random.nextDouble(); boolean newNot = newSplit < example.value(i); newTest = new Test(i, newSplit, format, newNot); } else { newTest = new Test(i, example.value(i), format, false); } TestList.addElement (newTest); } } return TestList; }
Example 5
Source File: MekaInstancesUtil.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
public static IAttribute transformWEKAAttributeToAttributeType(final Attribute att) { String attributeName = att.name(); if (att.isNumeric()) { return new NumericAttribute(attributeName); } else if (att.isNominal()) { List<String> domain = new LinkedList<>(); for (int i = 0; i < att.numValues(); i++) { domain.add(att.value(i)); } return new IntBasedCategoricalAttribute(attributeName, domain); } throw new IllegalArgumentException("Can only transform numeric or categorical attributes"); }
Example 6
Source File: WekaUtil.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
public static boolean hasOnlyNumericAttributes(final Instances instances) { for (Attribute a : getAttributes(instances, false)) { if (!a.isNumeric()) { return false; } } return true; }
Example 7
Source File: WekaInstancesUtil.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
public static IAttribute transformWEKAAttributeToAttributeType(final Attribute att) { String attributeName = att.name(); if (att.isNumeric()) { return new NumericAttribute(attributeName); } else if (att.isNominal()) { List<String> domain = new LinkedList<>(); for (int i = 0; i < att.numValues(); i++) { domain.add(att.value(i)); } return new IntBasedCategoricalAttribute(attributeName, domain); } throw new IllegalArgumentException("Can only transform numeric or categorical attributes"); }
Example 8
Source File: Test.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Gives a string representation of the test in Prolog notation, starting * from the comparison symbol. * * @return a string representing the test in Prolog notation */ private String testPrologComparisonString() { Attribute att = m_Dataset.attribute(m_AttIndex); if (att.isNumeric()) { return ((m_Not ? ">= " : "< ") + Utils.doubleToString(m_Split,3)); } else { if (att.numValues() != 2) return ((m_Not ? "!= " : "= ") + att.value((int)m_Split)); else return ("= " + (m_Not ? att.value((int)m_Split == 0 ? 1 : 0) : att.value((int)m_Split))); } }
Example 9
Source File: Test.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Gives a string representation of the test, starting from the comparison * symbol. * * @return a string representing the test */ private String testComparisonString() { Attribute att = m_Dataset.attribute(m_AttIndex); if (att.isNumeric()) { return ((m_Not ? ">= " : "< ") + Utils.doubleToString(m_Split,3)); } else { if (att.numValues() != 2) return ((m_Not ? "!= " : "= ") + att.value((int)m_Split)); else return ("= " + (m_Not ? att.value((int)m_Split == 0 ? 1 : 0) : att.value((int)m_Split))); } }
Example 10
Source File: BinaryItem.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Constructor. * * @param att the attribute that backs this item. * @param valueIndex the index of the value for this item. * @throws Exception if the backing attribute is not binary or unary. */ public BinaryItem(Attribute att, int valueIndex) throws Exception { super(att, valueIndex); if (att.isNumeric() || (att.isNominal() && att.numValues() > 2)) { throw new Exception("BinaryItem must be constructed using a nominal attribute" + " with at most 2 values!"); } }
Example 11
Source File: NominalItem.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Constructs a new NominalItem. * * @param att the attribute that backs the item. * @param valueIndex the index of the value for this item. * @throws Exception if the NominalItem can't be constructed. */ public NominalItem(Attribute att, int valueIndex) throws Exception { super(att); if (att.isNumeric()) { throw new Exception("NominalItem must be constructed using a nominal attribute"); } m_attribute = att; if (m_attribute.numValues() == 1) { m_valueIndex = 0; // unary attribute (? used to indicate absence from a basket) } else { m_valueIndex = valueIndex; } }
Example 12
Source File: Analyzer.java From NLIWOD with GNU Affero General Public License v3.0 | 5 votes |
/** * Analyzes the question and extracts all features that were set for this Analyzer. * @param q question string * @return feature vector for the input question */ public Instance analyze(String q) { Instance tmpInstance = new DenseInstance(fvWekaAttributes.size()); for (IAnalyzer analyzer : analyzers) { //special case for PartOfSpeechTags, need to set 36 attributes if(analyzer instanceof PartOfSpeechTags) { analyzePOS(tmpInstance, (PartOfSpeechTags) analyzer, q); continue; } //special case for Dependencies, need to set 18 attributes if(analyzer instanceof Dependencies) { analyzeDeps(tmpInstance, (Dependencies) analyzer, q); continue; } Attribute attribute = analyzer.getAttribute(); if (attribute.isNumeric()) { tmpInstance.setValue(attribute, (double) analyzer.analyze(q)); } else if (attribute.isNominal() || attribute.isString()) { String value = (String) analyzer.analyze(q); tmpInstance.setValue(attribute,value); tmpInstance.setDataset(null); } } return tmpInstance; }
Example 13
Source File: Ridor.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Build one rule using the growing data * * @param data the growing data used to build the rule */ private void grow(Instances data){ Instances growData = new Instances(data); m_AccuG = computeDefAccu(growData); m_CoverG = growData.sumOfWeights(); /* Compute the default accurate rate of the growing data */ double defAcRt= m_AccuG / m_CoverG; /* Keep the record of which attributes have already been used*/ boolean[] used=new boolean [growData.numAttributes()]; for (int k=0; k<used.length; k++) used[k]=false; int numUnused=used.length; double maxInfoGain; boolean isContinue = true; // The stopping criterion of this rule while (isContinue){ maxInfoGain = 0; // We require that infoGain be positive /* Build a list of antecedents */ Antd oneAntd=null; Instances coverData = null; Enumeration enumAttr=growData.enumerateAttributes(); int index=-1; /* Build one condition based on all attributes not used yet*/ while (enumAttr.hasMoreElements()){ Attribute att= (Attribute)(enumAttr.nextElement()); index++; Antd antd =null; if(att.isNumeric()) antd = new NumericAntd(att); else antd = new NominalAntd(att); if(!used[index]){ /* Compute the best information gain for each attribute, it's stored in the antecedent formed by this attribute. This procedure returns the data covered by the antecedent*/ Instances coveredData = computeInfoGain(growData, defAcRt, antd); if(coveredData != null){ double infoGain = antd.getMaxInfoGain(); if(Utils.gr(infoGain, maxInfoGain)){ oneAntd=antd; coverData = coveredData; maxInfoGain = infoGain; } } } } if(oneAntd == null) return; //Numeric attributes can be used more than once if(!oneAntd.getAttr().isNumeric()){ used[oneAntd.getAttr().index()]=true; numUnused--; } m_Antds.addElement((Object)oneAntd); growData = coverData;// Grow data size is shrinking defAcRt = oneAntd.getAccuRate(); /* Stop if no more data, rule perfect, no more attributes */ if(Utils.eq(growData.sumOfWeights(), 0.0) || Utils.eq(defAcRt, 1.0) || (numUnused == 0)) isContinue = false; } }
Example 14
Source File: InputMappedClassifier.java From tsml with GNU General Public License v3.0 | 4 votes |
public Instance constructMappedInstance(Instance incoming) throws Exception { boolean regenerateMapping = false; if (m_inputHeader == null) { m_inputHeader = incoming.dataset(); regenerateMapping = true; m_initialTestStructureKnown = false; } else if (!m_inputHeader.equalHeaders(incoming.dataset())) { /*System.out.println("[InputMappedClassifier] incoming data does not match " + "last known input format - regenerating mapping..."); System.out.println("Incoming\n" + new Instances(incoming.dataset(), 0)); System.out.println("Stored input header\n" + new Instances(m_inputHeader, 0)); System.out.println("Model header\n" + new Instances(m_modelHeader, 0)); */ m_inputHeader = incoming.dataset(); regenerateMapping = true; m_initialTestStructureKnown = false; } else if (m_attributeMap == null) { regenerateMapping = true; m_initialTestStructureKnown = false; } if (regenerateMapping) { regenerateMapping(); m_vals = null; if (!m_suppressMappingReport) { StringBuffer result = createMappingReport(); System.out.println(result.toString()); } } m_vals = new double[m_modelHeader.numAttributes()]; for (int i = 0; i < m_modelHeader.numAttributes(); i++) { if (m_attributeStatus[i] == OK) { Attribute modelAtt = m_modelHeader.attribute(i); Attribute incomingAtt = m_inputHeader.attribute(m_attributeMap[i]); if (Utils.isMissingValue(incoming.value(m_attributeMap[i]))) { m_vals[i] = Utils.missingValue(); continue; } if (modelAtt.isNumeric()) { m_vals[i] = incoming.value(m_attributeMap[i]); } else if (modelAtt.isNominal()) { int mapVal = m_nominalValueMap[i][(int)incoming.value(m_attributeMap[i])]; if (mapVal == NO_MATCH) { m_vals[i] = Utils.missingValue(); } else { m_vals[i] = mapVal; } } } else { m_vals[i] = Utils.missingValue(); } } Instance newInst = new DenseInstance(incoming.weight(), m_vals); newInst.setDataset(m_modelHeader); return newInst; }
Example 15
Source File: InputMappedClassifier.java From tsml with GNU General Public License v3.0 | 4 votes |
private StringBuffer createMappingReport() { StringBuffer result = new StringBuffer(); result.append("Attribute mappings:\n\n"); int maxLength = 0; for (int i = 0; i < m_modelHeader.numAttributes(); i++) { if (m_modelHeader.attribute(i).name().length() > maxLength) { maxLength = m_modelHeader.attribute(i).name().length(); } } maxLength += 12; int minLength = 16; String headerS = "Model attributes"; String sep = "----------------"; if (maxLength < minLength) { maxLength = minLength; } headerS = getFixedLengthString(headerS, ' ', maxLength); sep = getFixedLengthString(sep, '-', maxLength); sep += "\t ----------------\n"; headerS += "\t Incoming attributes\n"; result.append(headerS); result.append(sep); for (int i = 0; i < m_modelHeader.numAttributes(); i++) { Attribute temp = m_modelHeader.attribute(i); String attName = "(" + ((temp.isNumeric()) ? "numeric)" : "nominal)") + " " + temp.name(); attName = getFixedLengthString(attName, ' ', maxLength); attName += "\t--> "; result.append(attName); String inAttNum = ""; if (m_attributeStatus[i] == NO_MATCH) { inAttNum += "- "; result.append(inAttNum + "missing (no match)\n"); } else if (m_attributeStatus[i] == TYPE_MISMATCH) { inAttNum += (m_attributeMap[i] + 1) + " "; result.append(inAttNum + "missing (type mis-match)\n"); } else { Attribute inAtt = m_inputHeader.attribute(m_attributeMap[i]); String inName = "" + (m_attributeMap[i] + 1) + " (" + ((inAtt.isNumeric()) ? "numeric)" : "nominal)") + " " + inAtt.name(); result.append(inName + "\n"); } } return result; }
Example 16
Source File: BFTree.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Split data into two subsets and store sorted indices and weights for two * successor nodes. * * @param subsetIndices sorted indecis of instances for each attribute for two successor node * @param subsetWeights weights of instances for each attribute for two successor node * @param att attribute the split based on * @param splitPoint split point the split based on if att is numeric * @param splitStr split subset the split based on if att is nominal * @param sortedIndices sorted indices of the instances to be split * @param weights weights of the instances to bes split * @param data training data * @throws Exception if something goes wrong */ protected void splitData(int[][][] subsetIndices, double[][][] subsetWeights, Attribute att, double splitPoint, String splitStr, int[][] sortedIndices, double[][] weights, Instances data) throws Exception { int j; // For each attribute for (int i = 0; i < data.numAttributes(); i++) { if (i==data.classIndex()) continue; int[] num = new int[2]; for (int k = 0; k < 2; k++) { subsetIndices[k][i] = new int[sortedIndices[i].length]; subsetWeights[k][i] = new double[weights[i].length]; } for (j = 0; j < sortedIndices[i].length; j++) { Instance inst = data.instance(sortedIndices[i][j]); if (inst.isMissing(att)) { // Split instance up for (int k = 0; k < 2; k++) { if (m_Props[k] > 0) { subsetIndices[k][i][num[k]] = sortedIndices[i][j]; subsetWeights[k][i][num[k]] = m_Props[k] * weights[i][j]; num[k]++; } } } else { int subset; if (att.isNumeric()) { subset = (inst.value(att) < splitPoint) ? 0 : 1; } else { // nominal attribute if (splitStr.indexOf ("(" + att.value((int)inst.value(att.index()))+")")!=-1) { subset = 0; } else subset = 1; } subsetIndices[subset][i][num[subset]] = sortedIndices[i][j]; subsetWeights[subset][i][num[subset]] = weights[i][j]; num[subset]++; } } // Trim arrays for (int k = 0; k < 2; k++) { int[] copy = new int[num[k]]; System.arraycopy(subsetIndices[k][i], 0, copy, 0, num[k]); subsetIndices[k][i] = copy; double[] copyWeights = new double[num[k]]; System.arraycopy(subsetWeights[k][i], 0 ,copyWeights, 0, num[k]); subsetWeights[k][i] = copyWeights; } } }
Example 17
Source File: SimpleCart.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * Split data into two subsets and store sorted indices and weights for two * successor nodes. * * @param subsetIndices sorted indecis of instances for each attribute * for two successor node * @param subsetWeights weights of instances for each attribute for * two successor node * @param att attribute the split based on * @param splitPoint split point the split based on if att is numeric * @param splitStr split subset the split based on if att is nominal * @param sortedIndices sorted indices of the instances to be split * @param weights weights of the instances to bes split * @param data training data * @throws Exception if something goes wrong */ protected void splitData(int[][][] subsetIndices, double[][][] subsetWeights, Attribute att, double splitPoint, String splitStr, int[][] sortedIndices, double[][] weights, Instances data) throws Exception { int j; // For each attribute for (int i = 0; i < data.numAttributes(); i++) { if (i==data.classIndex()) continue; int[] num = new int[2]; for (int k = 0; k < 2; k++) { subsetIndices[k][i] = new int[sortedIndices[i].length]; subsetWeights[k][i] = new double[weights[i].length]; } for (j = 0; j < sortedIndices[i].length; j++) { Instance inst = data.instance(sortedIndices[i][j]); if (inst.isMissing(att)) { // Split instance up for (int k = 0; k < 2; k++) { if (m_Props[k] > 0) { subsetIndices[k][i][num[k]] = sortedIndices[i][j]; subsetWeights[k][i][num[k]] = m_Props[k] * weights[i][j]; num[k]++; } } } else { int subset; if (att.isNumeric()) { subset = (inst.value(att) < splitPoint) ? 0 : 1; } else { // nominal attribute if (splitStr.indexOf ("(" + att.value((int)inst.value(att.index()))+")")!=-1) { subset = 0; } else subset = 1; } subsetIndices[subset][i][num[subset]] = sortedIndices[i][j]; subsetWeights[subset][i][num[subset]] = weights[i][j]; num[subset]++; } } // Trim arrays for (int k = 0; k < 2; k++) { int[] copy = new int[num[k]]; System.arraycopy(subsetIndices[k][i], 0, copy, 0, num[k]); subsetIndices[k][i] = copy; double[] copyWeights = new double[num[k]]; System.arraycopy(subsetWeights[k][i], 0 ,copyWeights, 0, num[k]); subsetWeights[k][i] = copyWeights; } } }
Example 18
Source File: LHSSampler.java From bestconf with Apache License 2.0 | 4 votes |
/** * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable * * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain */ private static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){ int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set ArrayList<Integer>[] setWithMaxMinDist=null; //generate L sets of sampleSetSize points for(int i=0; i<L; i++){ ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size()); //compute the minimum distance minDist between any sample pair for each set crntMinDist = minDistForSet(setPerm); //select the set with the maximum minDist if(crntMinDist>maxMinDist){ setWithMaxMinDist = setPerm; maxMinDist = crntMinDist; } } //generate and output the set with the maximum minDist as the result //first, divide the domain of each attribute into sampleSetSize equal subdomain double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds Iterator<Attribute> itr = atts.iterator(); Attribute crntAttr; double pace; for(int i=0;i<bounds.length;i++){ crntAttr = itr.next(); if(crntAttr.isNumeric()){ bounds[i][0] = crntAttr.getLowerNumericBound(); bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound(); pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound())/sampleSetSize; for(int j=1;j<sampleSetSize;j++){ bounds[i][j] = bounds[i][j-1] + pace; } }else{//crntAttr.isNominal() if(crntAttr.numValues()>=sampleSetSize){ //randomly select among the set for(int j=0;j<=sampleSetSize;j++) bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values }else{ //first round-robin int lastPart = sampleSetSize%crntAttr.numValues(); for(int j=0;j<sampleSetSize-lastPart;j++) bounds[i][j] = j%crntAttr.numValues(); //then randomly select for(int j=sampleSetSize-lastPart;j<=sampleSetSize;j++) bounds[i][j] = uniRand.nextInt(crntAttr.numValues()); } }//nominal attribute }//get all subdomains //second, generate the set according to setWithMaxMinDist Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize); for(int i=0;i<sampleSetSize;i++){ double[] vals = new double[atts.size()]; for(int j=0;j<vals.length;j++){ if(atts.get(j).isNumeric()){ vals[j] = useMid? (bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2: bounds[j][setWithMaxMinDist[j].get(i)]+ ( (bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble() ); }else{//isNominal() vals[j] = bounds[j][setWithMaxMinDist[j].get(i)]; } } data.add(new DenseInstance(1.0, vals)); } //third, return the generated points return data; }
Example 19
Source File: LHSInitializer.java From bestconf with Apache License 2.0 | 4 votes |
/** * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable * * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain */ public static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){ int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set ArrayList<Integer>[] setWithMaxMinDist=null; //generate L sets of sampleSetSize points for(int i=0; i<L; i++){ ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size()); //compute the minimum distance minDist between any sample pair for each set crntMinDist = minDistForSet(setPerm); //select the set with the maximum minDist if(crntMinDist>maxMinDist){ setWithMaxMinDist = setPerm; maxMinDist = crntMinDist; } } //generate and output the set with the maximum minDist as the result //first, divide the domain of each attribute into sampleSetSize equal subdomain double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds Iterator<Attribute> itr = atts.iterator(); Attribute crntAttr; double pace; for(int i=0;i<bounds.length;i++){ crntAttr = itr.next(); if(crntAttr.isNumeric()){ bounds[i][0] = crntAttr.getLowerNumericBound(); bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound(); pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound())/sampleSetSize; for(int j=1;j<sampleSetSize;j++){ bounds[i][j] = bounds[i][j-1] + pace; } }else{//crntAttr.isNominal() if(crntAttr.numValues()>=sampleSetSize){ //randomly select among the set for(int j=0;j<=sampleSetSize;j++) bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values }else{ //first round-robin int lastPart = sampleSetSize%crntAttr.numValues(); for(int j=0;j<sampleSetSize-lastPart;j++) bounds[i][j] = j%crntAttr.numValues(); //then randomly select for(int j=sampleSetSize-lastPart;j<=sampleSetSize;j++) bounds[i][j] = uniRand.nextInt(crntAttr.numValues()); } }//nominal attribute }//get all subdomains //second, generate the set according to setWithMaxMinDist Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize); for(int i=0;i<sampleSetSize;i++){ double[] vals = new double[atts.size()]; for(int j=0;j<vals.length;j++){ if(atts.get(j).isNumeric()){ vals[j] = useMid? (bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2: bounds[j][setWithMaxMinDist[j].get(i)]+ ( (bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble() ); }else{//isNominal() vals[j] = bounds[j][setWithMaxMinDist[j].get(i)]; } } data.add(new DenseInstance(1.0, vals)); } //third, return the generated points return data; }
Example 20
Source File: LabelWordVectors.java From AffectiveTweets with GNU General Public License v3.0 | 2 votes |
@Override protected Instances determineOutputFormat(Instances inputFormat) throws Exception { ArrayList<Attribute> atts = new ArrayList<Attribute>(); // Adds all attributes of the inputformat for (int i = 0; i < inputFormat.numAttributes(); i++) { atts.add(inputFormat.attribute(i)); } // The dictionaries of the lexicons are intialized only in the first batch if(!this.isFirstBatchDone()) this.initializeDicts(); for(ArffLexiconWordLabeller lexEval:this.lexiconLabs){ for(Attribute att:lexEval.getAttributes()){ if(att.isNumeric()) atts.add(new Attribute(lexEval.getLexiconName()+"-"+att.name())); else if(att.isNominal()){ List<String> attValues=new ArrayList<String>(); for(int i=0;i<att.numValues();i++){ attValues.add(att.value(i)); } atts.add(new Attribute(lexEval.getLexiconName()+"-"+att.name(),attValues)); } } } Instances result = new Instances(inputFormat.relationName(), atts, 0); // set the class index result.setClassIndex(inputFormat.classIndex()); return result; }