Java Code Examples for org.apache.mahout.math.Vector#nonZeroes()
The following examples show how to use
org.apache.mahout.math.Vector#nonZeroes() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LogisticL2DiffFunction.java From laser with Apache License 2.0 | 6 votes |
public double evaluatePrimalObjective(double[] x) { double result = 0.0; for (int row = 0; row < this.m; row++) { Vector v = this.a[row]; double ax = 0; for (Element e : v.nonZeroes()) { // Calculate dot product: ai'*x, where i ai denotes the ith row // of a ax += e.get() * x[e.index()]; } double axb = ax * b[row]; double thisLoopResult = Math.log(1.0 + Math.exp(-axb)); result += thisLoopResult; } result /= m; return result; }
Example 2
Source File: CRFF1Loss.java From pyramid with Apache License 2.0 | 6 votes |
private double calEmpiricalCountForFeature(int parameterIndex) { double empiricalCount = 0.0; int classIndex = parameterToClass[parameterIndex]; int featureIndex = parameterToFeature[parameterIndex]; if (featureIndex==-1){ for (int i=0; i<dataSet.getNumDataPoints(); i++) { if (dataSet.getMultiLabels()[i].matchClass(classIndex)) { empiricalCount += 1; } } } else{ Vector column = dataSet.getColumn(featureIndex); MultiLabel[] multiLabels = dataSet.getMultiLabels(); for (Vector.Element element: column.nonZeroes()){ int dataIndex = element.index(); double featureValue = element.get(); if (multiLabels[dataIndex].matchClass(classIndex)){ empiricalCount += featureValue; } } } return empiricalCount; }
Example 3
Source File: CMLCRFElasticNet.java From pyramid with Apache License 2.0 | 6 votes |
private double calPredictedFeatureCounts(int parameterIndex) { double count = 0.0; int classIndex = parameterToClass[parameterIndex]; int featureIndex = parameterToFeature[parameterIndex]; if (featureIndex == -1) { for (int i=0; i<numData; i++) { count += this.classProbMatrix[i][classIndex]; } } else { Vector featureColumn = dataSet.getColumn(featureIndex); for (Vector.Element element : featureColumn.nonZeroes()) { int dataPointIndex = element.index(); double featureValue = element.get(); count += this.classProbMatrix[dataPointIndex][classIndex] * featureValue; } } return count; }
Example 4
Source File: ElasticNetLogisticTrainer.java From pyramid with Apache License 2.0 | 6 votes |
private double calEmpricalCount(int parameterIndex){ int classIndex = logisticRegression.getWeights().getClassIndex(parameterIndex); int featureIndex = logisticRegression.getWeights().getFeatureIndex(parameterIndex); double count = 0; //bias if (featureIndex == -1){ for (int i=0;i<dataSet.getNumDataPoints();i++){ count += targets[i][classIndex]; } } else { Vector featureColumn = dataSet.getColumn(featureIndex); for (Vector.Element element: featureColumn.nonZeroes()){ int dataPointIndex = element.index(); double featureValue = element.get(); count += featureValue * targets[dataPointIndex][classIndex]; } } return count; }
Example 5
Source File: DataSetUtil.java From pyramid with Apache License 2.0 | 6 votes |
public static void normalize(DataSet dataSet, double[] normalizationConstants){ for (int j=0;j<dataSet.getNumFeatures();j++){ Vector column = dataSet.getColumn(j); List<Integer> indices = new ArrayList<>(); List<Double> values = new ArrayList<>(); for (Vector.Element nonzero: column.nonZeroes()){ indices.add(nonzero.index()); values.add(nonzero.get()); } for (int i=0;i<indices.size();i++){ int dataId = indices.get(i); double old = values.get(i); // if normalization constant is 0, use 0 as the normalized value dataSet.setFeatureValue(dataId,j, SafeDivide.divide(old,old/normalizationConstants[j],0.0)); } } }
Example 6
Source File: MLLogisticLoss.java From pyramid with Apache License 2.0 | 6 votes |
private double calPredictedCount(int parameterIndex){ int classIndex = mlLogisticRegression.getWeights().getClassIndex(parameterIndex); int featureIndex = mlLogisticRegression.getWeights().getFeatureIndex(parameterIndex); double count = 0; //bias if (featureIndex == -1){ for (int i=0;i<dataSet.getNumDataPoints();i++){ count += this.classProbMatrix[i][classIndex]; } } else { Vector featureColumn = dataSet.getColumn(featureIndex); for (Vector.Element element: featureColumn.nonZeroes()){ int dataPointIndex = element.index(); double featureValue = element.get(); count += this.classProbMatrix[dataPointIndex][classIndex] * featureValue; } } return count; }
Example 7
Source File: MLLogisticLoss.java From pyramid with Apache License 2.0 | 6 votes |
private double calEmpricalCount(int parameterIndex){ int classIndex = mlLogisticRegression.getWeights().getClassIndex(parameterIndex); MultiLabel[] labels = dataSet.getMultiLabels(); int featureIndex = mlLogisticRegression.getWeights().getFeatureIndex(parameterIndex); double count = 0; //bias if (featureIndex == -1){ for (int i=0;i<dataSet.getNumDataPoints();i++){ if (labels[i].matchClass(classIndex)){ count +=1; } } } else { Vector featureColumn = dataSet.getColumn(featureIndex); for (Vector.Element element: featureColumn.nonZeroes()){ int dataPointIndex = element.index(); double featureValue = element.get(); MultiLabel label = labels[dataPointIndex]; if (label.matchClass(classIndex)){ count += featureValue; } } } return count; }
Example 8
Source File: DataSetUtil.java From pyramid with Apache License 2.0 | 6 votes |
public static Pair<DataSet, double[][]> sampleData(DataSet dataSet, double[][] targetDistribution, List<Integer> indices){ DataSet sample; int numClasses = targetDistribution[0].length; double[][] sampledTargets = new double[indices.size()][numClasses]; sample = DataSetBuilder.getBuilder().dense(dataSet.isDense()).missingValue(dataSet.hasMissingValue()) .numDataPoints(indices.size()).numFeatures(dataSet.getNumFeatures()).build(); for (int i=0;i<indices.size();i++){ int indexInOld = indices.get(i); Vector oldVector = dataSet.getRow(indexInOld); double[] targets = targetDistribution[indexInOld]; //copy label sampledTargets[i] = Arrays.copyOf(targets,targets.length); //copy row feature values, optimized for sparse vector for (Vector.Element element: oldVector.nonZeroes()){ sample.setFeatureValue(i,element.index(),element.get()); } } sample.setFeatureList(dataSet.getFeatureList()); //ignore idTranslator as we may have duplicate extIds return new Pair<>(sample, sampledTargets); }
Example 9
Source File: BlockwiseCD.java From pyramid with Apache License 2.0 | 6 votes |
private double calHessiansForFeature(int l, int m) { double count = 0.0; if (m == -1) { for (int i=0; i<numData; i++) { count += (Math.pow(this.classProbMatrix[i][l],2) - this.classProbMatrix[i][l]); } } else { Vector featureColumn = dataSet.getColumn(m); for (Vector.Element element : featureColumn.nonZeroes()) { int dataPointIndex = element.index(); double featureValue = element.get(); count += (Math.pow(this.classProbMatrix[dataPointIndex][l]*featureValue, 2) - this.classProbMatrix[dataPointIndex][l] * Math.pow(featureValue,2)); } } return count; }
Example 10
Source File: IMLLogisticLoss.java From pyramid with Apache License 2.0 | 6 votes |
private double calPredictedCount(int parameterIndex){ int classIndex = logisticRegression.getWeights().getClassIndex(parameterIndex); int featureIndex = logisticRegression.getWeights().getFeatureIndex(parameterIndex); double count = 0; //bias if (featureIndex == -1){ for (int i=0;i<dataSet.getNumDataPoints();i++){ count += this.classProbMatrix[i][classIndex]; } } else { Vector featureColumn = dataSet.getColumn(featureIndex); for (Vector.Element element: featureColumn.nonZeroes()){ int dataPointIndex = element.index(); double featureValue = element.get(); count += this.classProbMatrix[dataPointIndex][classIndex] * featureValue; } } return count; }
Example 11
Source File: CRFLoss.java From pyramid with Apache License 2.0 | 5 votes |
private double calGradientForFeature(int parameterIndex) { double count = 0.0; int classIndex = parameterToClass[parameterIndex]; int featureIndex = parameterToFeature[parameterIndex]; if (featureIndex == -1) { for (int i=0; i<dataSet.getNumDataPoints(); i++) { count += this.classProbMatrix[i][classIndex]; } } else { Vector featureColumn = dataSet.getColumn(featureIndex); for (Vector.Element element: featureColumn.nonZeroes()) { int dataPointIndex = element.index(); double featureValue = element.get(); count += this.classProbMatrix[dataPointIndex][classIndex] * featureValue; } } count -= this.empiricalCounts[parameterIndex]; // regularize if (regularizeAll){ count += cmlcrf.getWeights().getWeightForIndex(parameterIndex)/gaussianPriorVariance; } else { if (featureIndex != -1) { count += cmlcrf.getWeights().getWeightForIndex(parameterIndex)/gaussianPriorVariance; } } return count; }
Example 12
Source File: DataSetUtil.java From pyramid with Apache License 2.0 | 5 votes |
/** * create a subset with the indices * it's fine to have duplicate indices * idTranslator is not saved in sampleData as we may have duplicate extIds * @param dataSet * @param indices * @return */ public static ClfDataSet sampleData(ClfDataSet dataSet, List<Integer> indices){ ClfDataSet sample; int numClasses = dataSet.getNumClasses(); boolean missingValue = dataSet.hasMissingValue(); if (dataSet instanceof DenseClfDataSet){ sample = new DenseClfDataSet(indices.size(),dataSet.getNumFeatures(), missingValue, numClasses); } else { sample = new SparseClfDataSet(indices.size(),dataSet.getNumFeatures(), missingValue, numClasses); } int[] labels = dataSet.getLabels(); for (int i=0;i<indices.size();i++){ int indexInOld = indices.get(i); Vector oldVector = dataSet.getRow(indexInOld); int label = labels[indexInOld]; //copy label sample.setLabel(i,label); //copy row feature values, optimized for sparse vector for (Vector.Element element: oldVector.nonZeroes()){ sample.setFeatureValue(i,element.index(),element.get()); } } sample.setLabelTranslator(dataSet.getLabelTranslator()); sample.setFeatureList(dataSet.getFeatureList()); //ignore idTranslator as we may have duplicate extIds return sample; }
Example 13
Source File: CRFF1Loss.java From pyramid with Apache License 2.0 | 5 votes |
private double calGradientForFeature(int parameterIndex) { double count = 0.0; int classIndex = parameterToClass[parameterIndex]; int featureIndex = parameterToFeature[parameterIndex]; if (featureIndex == -1) { for (int i=0; i<dataSet.getNumDataPoints(); i++) { count += this.classProbMatrix[i][classIndex]; } } else { Vector featureColumn = dataSet.getColumn(featureIndex); for (Vector.Element element: featureColumn.nonZeroes()) { int dataPointIndex = element.index(); double featureValue = element.get(); count += this.classProbMatrix[dataPointIndex][classIndex] * featureValue; } } count -= this.empiricalCounts[parameterIndex]; // regularize if (regularizeAll){ count += cmlcrf.getWeights().getWeightForIndex(parameterIndex)/gaussianPriorVariance; } else { if (featureIndex != -1) { count += cmlcrf.getWeights().getWeightForIndex(parameterIndex)/gaussianPriorVariance; } } return count; }
Example 14
Source File: AbstractRobustCBMOptimizer.java From pyramid with Apache License 2.0 | 5 votes |
private double effectivePositives(int componentIndex, int labelIndex){ double sum = 0; Vector labelColumn = labelMatrix.getColumn(labelIndex); for (Vector.Element element: labelColumn.nonZeroes()){ int dataIndex = element.index(); sum += gammas[dataIndex][componentIndex] * noiseLabelWeights[dataIndex][labelIndex]; } return sum; }
Example 15
Source File: DataSetUtil.java From pyramid with Apache License 2.0 | 5 votes |
/** * make every non-zero feature 1 * @param dataSet */ public static void binarizeFeature(DataSet dataSet){ for (int i=0;i<dataSet.getNumDataPoints();i++){ List<Integer> nonZeors = new ArrayList<>(); Vector row = dataSet.getRow(i); for (Vector.Element element: row.nonZeroes()){ nonZeors.add(element.index()); } for (int j:nonZeors){ dataSet.setFeatureValue(i,j,1); } } }
Example 16
Source File: AugmentedLRLoss.java From pyramid with Apache License 2.0 | 5 votes |
private double calPredictedCountFeatureWeight(int d){ Vector featureColumn = dataSet.getColumn(d); double sum = 0; for (Vector.Element element: featureColumn.nonZeroes()){ int dataIndex = element.index(); double feature = element.get(); sum += feature* expectedProbs[dataIndex]; } return sum; }
Example 17
Source File: MultiLabel.java From pyramid with Apache License 2.0 | 5 votes |
/** * * @param vector a binary label vector */ public MultiLabel(Vector vector){ this(); for (Vector.Element element:vector.nonZeroes()){ this.addLabel(element.index()); } }
Example 18
Source File: BMTrainer.java From pyramid with Apache License 2.0 | 5 votes |
private double weightedSum(int clusterIndex, int dimensionIndex){ Vector column = dataSet.getColumn(dimensionIndex); double sum = 0; for (Vector.Element nonzero: column.nonZeroes()){ int i = nonzero.index(); sum += gammas[i][clusterIndex]; } return sum; }
Example 19
Source File: AbstractRecoverCBMOptimizer.java From pyramid with Apache License 2.0 | 5 votes |
private double effectivePositives(int componentIndex, int labelIndex){ double sum = 0; Vector labelColumn = labelMatrix.getColumn(labelIndex); for (Vector.Element element: labelColumn.nonZeroes()){ int dataIndex = element.index(); sum += gammas[dataIndex][componentIndex]; } return sum; }
Example 20
Source File: DataSetUtil.java From pyramid with Apache License 2.0 | 4 votes |
/** * only keep the selected featureList * @param dataSet * @return */ public static MultiLabelClfDataSet sampleFeatures(MultiLabelClfDataSet dataSet, List<Integer> columnsToKeep){ MultiLabelClfDataSet trimmed ; boolean missingValue = dataSet.hasMissingValue(); int numClasses = dataSet.getNumClasses(); // keep density if (dataSet.isDense()) { trimmed = new DenseMLClfDataSet(dataSet.getNumDataPoints(), columnsToKeep.size(), missingValue, numClasses); } else{ trimmed = new SparseMLClfDataSet(dataSet.getNumDataPoints(),columnsToKeep.size(), missingValue, numClasses); } for (int j=0;j<trimmed.getNumFeatures();j++){ int oldColumnIndex = columnsToKeep.get(j); Vector vector = dataSet.getColumn(oldColumnIndex); for (Vector.Element element: vector.nonZeroes()){ int dataPointIndex = element.index(); double value = element.get(); trimmed.setFeatureValue(dataPointIndex,j,value); } } //copy labels MultiLabel[] multiLabels = dataSet.getMultiLabels(); for (int i=0;i<trimmed.getNumDataPoints();i++){ trimmed.addLabels(i,multiLabels[i].getMatchedLabels()); } //just copy settings trimmed.setLabelTranslator(dataSet.getLabelTranslator()); trimmed.setIdTranslator(dataSet.getIdTranslator()); List<Feature> oldFeatures = dataSet.getFeatureList().getAll(); List<Feature> newFeatures = columnsToKeep.stream().map(oldFeatures::get).collect(Collectors.toList()); for (int i=0;i<newFeatures.size();i++){ newFeatures.get(i).setIndex(i); } trimmed.setFeatureList(new FeatureList(newFeatures)); return trimmed; }