org.apache.mahout.math.Vector Java Examples
The following examples show how to use
org.apache.mahout.math.Vector.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Reranker.java From pyramid with Apache License 2.0 | 6 votes |
public double prob(Vector vector, MultiLabel multiLabel){ double[] marginals = labelCalibrator.calibratedClassProbs(classProbEstimator.predictClassProbs(vector)); DynamicProgramming dynamicProgramming = new DynamicProgramming(marginals); List<Pair<MultiLabel,Double>> topK = dynamicProgramming.topK(numCandidate); PredictionCandidate predictionCandidate = new PredictionCandidate(); predictionCandidate.x = vector; predictionCandidate.labelProbs = marginals; predictionCandidate.multiLabel = multiLabel; predictionCandidate.sparseJoint = topK; Vector feature = predictionFeatureExtractor.extractFeatures(predictionCandidate); double score = regressor.predict(feature); if (score>1){ score=1; } if (score<0){ score=0; } return score; }
Example #2
Source File: IntervalSplitterTest.java From pyramid with Apache License 2.0 | 6 votes |
static void test9(){ RegTreeConfig regTreeConfig = new RegTreeConfig().setNumSplitIntervals(2); Vector vector = new DenseVector(4); vector.set(0,0); vector.set(1,1); vector.set(2,Double.NaN); vector.set(3,3); double[] probs = {1,0.5,1,0.6}; double[] labels = {1,2,3,4}; Splitter.GlobalStats globalStats = new Splitter.GlobalStats(labels,probs); List<Interval> intervals = IntervalSplitter.generateIntervals(regTreeConfig, vector, probs, labels,globalStats); System.out.println(intervals); System.out.println(IntervalSplitter.compress(intervals)); System.out.println(1.5/(1.5+0.6)); System.out.println(1+1+3*1.5/(1.5+0.6)); }
Example #3
Source File: AdmmIterationMapper.java From laser with Apache License 2.0 | 6 votes |
private AdmmMapperContext assembleMapperContextFromCache( Vector[] inputSplitData, String splitId) throws IOException { try { AdmmMapperContext preContext = readPreviousAdmmMapperContext( splitId, previousIntermediateOutputLocationPath, fs, conf); return new AdmmMapperContext(splitId, inputSplitData, preContext.getUInitial(), preContext.getXInitial(), preContext.getZInitial(), preContext.getRho(), preContext.getLambdaValue(), preContext.getPrimalObjectiveValue(), preContext.getRNorm(), preContext.getSNorm()); } catch (IOException e) { LOG.info("Key not found. Split ID: " + splitId + e.getMessage()); throw new IOException("Key not found. Split ID: " + splitId + e.getMessage()); } }
Example #4
Source File: FusedKolmogorovFilterTest.java From pyramid with Apache License 2.0 | 6 votes |
private static void test1(){ Vector vector = new DenseVector(10); vector.set(0,0.1); vector.set(1,0.2); vector.set(2,0.15); vector.set(3,0.4); vector.set(4,0.7); vector.set(8,0.9); int[] labels = new int[10]; labels[0] = 0 ; labels[1] = 1; labels[2] = 1; labels[3] = 1; labels[9] = 1; FusedKolmogorovFilter filter = new FusedKolmogorovFilter(); filter.setNumBins(10); List<List<Double>> inputsEachClass = filter.generateInputsEachClass(vector, labels, 2); System.out.println(inputsEachClass); List<EmpiricalCDF> empiricalCDFs = filter.generateCDFs(vector,inputsEachClass); System.out.println(empiricalCDFs); System.out.println(filter.maxDistance(empiricalCDFs)); }
Example #5
Source File: KMeans.java From pyramid with Apache License 2.0 | 6 votes |
private void assign(int i, boolean print){ int previousAssignment = assignments[i]; Vector vector = dataSet.getRow(i); double[] distances = IntStream.range(0,numComponents).mapToDouble(k->distance(vector, centers[k])) .toArray(); int assignedC = ArgMin.argMin(distances); assignments[i] = assignedC; if (print){ if (assigned[i] && (previousAssignment!=assignedC)){ System.out.println("assign instance "+(i+1)+" to cluster "+(assignedC+1)+", previously in cluster "+(previousAssignment+1)); } else { System.out.println("assign instance "+(i+1)+" to cluster "+(assignedC+1)); } } assigned[i] = true; }
Example #6
Source File: KLLoss.java From pyramid with Apache License 2.0 | 6 votes |
private double calEmpiricalCountForFeature(int parameterIndex) { double empiricalCount = 0.0; int classIndex = parameterToClass[parameterIndex]; int featureIndex = parameterToFeature[parameterIndex]; if (featureIndex==-1){ for (int i=0; i<dataSet.getNumDataPoints(); i++) { empiricalCount += targetMarginals[i][classIndex]; } } else{ Vector column = dataSet.getColumn(featureIndex); for (Vector.Element element: column.nonZeroes()){ int dataIndex = element.index(); double featureValue = element.get(); empiricalCount += featureValue*targetMarginals[dataIndex][classIndex]; } } return empiricalCount; }
Example #7
Source File: AdmmMapperContext.java From laser with Apache License 2.0 | 6 votes |
public AdmmMapperContext(String splitId, Vector[] a, double[] b, double[] uInitial, double[] xInitial, double[] zInitial, double rho, double lambdaValue, double primalObjectiveValue, double rNorm, double sNorm) { this.splitId = splitId; this.a = a; this.b = b; this.uInitial = uInitial; this.xInitial = xInitial; this.zInitial = zInitial; this.rho = rho; this.lambdaValue = lambdaValue; this.primalObjectiveValue = primalObjectiveValue; this.rNorm = rNorm; this.sNorm = sNorm; }
Example #8
Source File: CRFLoss.java From pyramid with Apache License 2.0 | 6 votes |
/** * gradient of log likelihood * @return */ @Override public Vector getGradient() { if (isGradientCacheValid) { return this.gradient; } if (logger.isDebugEnabled()){ logger.debug("start method getGradient()"); } // O(NdL) updateClassScoreMatrix(); updateAssignmentScoreMatrix(); updateAssignmentProbMatrix(); updateCombProbSums(); updateClassProbMatrix(); updateGradient(); this.isGradientCacheValid = true; if (logger.isDebugEnabled()){ logger.debug("finish method getGradient()"); } return this.gradient; }
Example #9
Source File: Vectors.java From pyramid with Apache License 2.0 | 6 votes |
public static double dot(Vector vector1, Vector vector2){ if (vector1.size()!=vector2.size()){ throw new IllegalArgumentException("vector1.size()!=vector2.size()"); } boolean vector1Dense = vector1.isDense(); boolean vector2Dense = vector2.isDense(); if (vector1Dense&&vector2Dense){ return dotDenseDense(vector1,vector2); } else if (vector1Dense && !vector2Dense){ return dotDenseSparse(vector1,vector2); } else if (!vector1Dense && vector2Dense){ return dotDenseSparse(vector2,vector1); } else { throw new UnsupportedOperationException("sparse dot sparse is not supported"); } }
Example #10
Source File: KLLoss.java From pyramid with Apache License 2.0 | 6 votes |
/** * gradient of log likelihood * @return */ @Override public Vector getGradient() { if (isGradientCacheValid) { return this.gradient; } if (logger.isDebugEnabled()){ logger.debug("start method getGradient()"); } // O(NdL) updateClassScoreMatrix(); updateAssignmentScoreMatrix(); updateAssignmentProbMatrix(); updateCombProbSums(); updateClassProbMatrix(); updateGradient(); this.isGradientCacheValid = true; if (logger.isDebugEnabled()){ logger.debug("finish method getGradient()"); } return this.gradient; }
Example #11
Source File: CBMUtilityOptimizer.java From pyramid with Apache License 2.0 | 6 votes |
private void updateGamma(int n) { Vector x = dataSet.getRow(n); BMDistribution bmDistribution = cbm.computeBM(x); // size = combination * components List<double[]> logPosteriors = new ArrayList<>(); for (int c=0;c<combinations.size();c++){ MultiLabel combination = combinations.get(c); double[] pos = bmDistribution.logPosteriorMembership(combination); logPosteriors.add(pos); } double[] sums = new double[cbm.numComponents]; for (int k=0;k<cbm.numComponents;k++){ double sum = 0; for (int c=0;c<combinations.size();c++){ sum += targets[n][c]*logPosteriors.get(c)[k]; } sums[k] = sum; } double[] posterior = MathUtil.softmax(sums); for (int k=0; k<cbm.numComponents; k++) { gammas[n][k] = posterior[k]; gammasT[k][n] = posterior[k]; } }
Example #12
Source File: ElasticNetLogisticTrainer.java From pyramid with Apache License 2.0 | 5 votes |
/** * a special back track line search for sufficient decrease with elasticnet penalized model * reference: * An improved glmnet for l1-regularized logistic regression. * @param searchDirection * @return */ private void lineSearch(Vector searchDirection, Vector gradient){ Vector localSearchDir; double initialStepLength = 1; double shrinkage = 0.5; double c = 1e-4; double stepLength = initialStepLength; Vector start = logisticRegression.getWeights().getAllWeights(); double penalty = penalty(); double value = loss(penalty); if (logger.isDebugEnabled()){ logger.debug("start line search"); logger.debug("initial loss = "+loss()); } double product = gradient.dot(searchDirection); localSearchDir = searchDirection; while(true){ Vector step = localSearchDir.times(stepLength); Vector target = start.plus(step); logisticRegression.getWeights().setWeightVector(target); double targetPenalty = penalty(); double targetValue = loss(targetPenalty); if (targetValue <= value + c*stepLength*(product + targetPenalty - penalty)){ if (logger.isDebugEnabled()){ logger.debug("step size = "+stepLength); logger.debug("final loss = "+targetValue); logger.debug("line search done"); } break; } stepLength *= shrinkage; } }
Example #13
Source File: ALSWRFactorizer.java From elasticsearch-taste with Apache License 2.0 | 5 votes |
protected Vector sparseItemRatingVector(final PreferenceArray prefs) { final SequentialAccessSparseVector ratings = new SequentialAccessSparseVector( Integer.MAX_VALUE, prefs.length()); for (final Preference preference : prefs) { ratings.set((int) preference.getUserID(), preference.getValue()); } return ratings; }
Example #14
Source File: CBMS.java From pyramid with Apache License 2.0 | 5 votes |
/** * sort marginals, and keep top few * @param vector * @param top * @return */ public MultiLabel predictByMarginals(Vector vector, int top){ double[] probs = predictClassProbs(vector); int[] sortedIndices = ArgSort.argSortDescending(probs); MultiLabel prediction = new MultiLabel(); for (int i=0;i<top;i++){ prediction.addLabel(sortedIndices[i]); } return prediction; }
Example #15
Source File: MLLogisticRegression.java From pyramid with Apache License 2.0 | 5 votes |
double logLikelihood(Vector vector, MultiLabel multiLabel){ double[] classScores = predictClassScores(vector); int numAssignments = assignments.size(); double[] assignmentScores = calAssignmentScores(classScores); // double[] assignmentScores = new double[numAssignments]; // for (int a=0;a<numAssignments;a++){ // MultiLabel assignment = assignments.get(a); // assignmentScores[a] = this.calAssignmentScore(assignment, classScores); // } double logDenominator = MathUtil.logSumExp(assignmentScores); double logNumerator = this.calAssignmentScore(multiLabel, classScores); return logNumerator-logDenominator; }
Example #16
Source File: RegressionTreeTest.java From pyramid with Apache License 2.0 | 5 votes |
private static void test5(){ RegressionTree tree = RegressionTree.newStump(10,0.5,-1.2,3); System.out.println(tree); Vector vector = new DenseVector(100); vector.set(10,0.6); System.out.println(tree.predict(vector)); }
Example #17
Source File: RidgeBinaryLogisticLoss.java From pyramid with Apache License 2.0 | 5 votes |
/** * dot product of a column vector and another vector * @param columnIndex the bias feature has index 0 * @param vector * @return */ private double columnDot(int columnIndex, Vector vector){ if (columnIndex==0){ return vector.zSum(); } else { return dataSet.getColumn(columnIndex-1).dot(vector); } }
Example #18
Source File: SupervisedEmbeddingLoss.java From pyramid with Apache License 2.0 | 5 votes |
public void setParameters(Vector parameters) { int numData = this.updatedEmbeddingMatrix.getNumDataPoints(); int numFeatures = this.updatedEmbeddingMatrix.getNumFeatures(); for (int i = 0; i < numData; i++) { for (int j = 0; j < numFeatures; j++) { this.updatedEmbeddingMatrix.setFeatureValue(i, j, parameters.get(i * numFeatures + j)); } } }
Example #19
Source File: LogisticLoss.java From pyramid with Apache License 2.0 | 5 votes |
private Vector penaltyGradient(){ Vector weightsVector = this.logisticRegression.getWeights().getAllWeights(); Vector penalty = new DenseVector(weightsVector.size()); penalty = penalty.plus(weightsVector.divide(priorGaussianVariance)); for (int j:logisticRegression.getWeights().getAllBiasPositions()){ penalty.set(j,0); } return penalty; }
Example #20
Source File: VectorCardIsoSetCalibrator.java From pyramid with Apache License 2.0 | 5 votes |
public double calibrate(Vector vector){ double uncalibrated = vector.get(scoreIndex); int cardinality = (int)vector.get(cardIndex); //deal with unseen cardinality if (!calibrations.containsKey(cardinality)){ return 0; } return calibrations.get(cardinality).predict(uncalibrated); }
Example #21
Source File: PluginF1.java From pyramid with Apache License 2.0 | 5 votes |
private MultiLabel predictBySampling(Vector vector){ List<MultiLabel> samples = cbm.samples(vector, numSamples); GeneralF1Predictor generalF1Predictor = new GeneralF1Predictor(); generalF1Predictor.setMaxSize(maxSize); return generalF1Predictor.predict(cbm.getNumClasses(), samples); // unique the sample set and apply GFM // List<MultiLabel> uniqueSamples = new ArrayList(new HashSet(samples)); // List<Double> probs = cbm.predictAssignmentProbs(vector, uniqueSamples); // return GeneralF1Predictor.predict(cbm.getNumClasses(), uniqueSamples, probs); }
Example #22
Source File: DataSetUtil.java From pyramid with Apache License 2.0 | 5 votes |
/** * merge to binary dataset * k=positive (1), others = negative(0) * @param dataSet * @param k * @return */ public static ClfDataSet toBinary(MultiLabelClfDataSet dataSet, int k){ int numDataPoints = dataSet.getNumDataPoints(); int numFeatures = dataSet.getNumFeatures(); boolean missingValue = dataSet.hasMissingValue(); ClfDataSet clfDataSet; if (dataSet.isDense()){ clfDataSet = new DenseClfDataSet(numDataPoints,numFeatures,missingValue, 2); } else { clfDataSet = new SparseClfDataSet(numDataPoints,numFeatures,missingValue, 2); } for (int i=0;i<numDataPoints;i++){ //only copy non-zero elements Vector vector = dataSet.getRow(i); for (Vector.Element element: vector.nonZeroes()){ int featureIndex = element.index(); double value = element.get(); clfDataSet.setFeatureValue(i,featureIndex,value); } if (dataSet.getMultiLabels()[i].matchClass(k)){ clfDataSet.setLabel(i,1); } else { clfDataSet.setLabel(i,0); } } List<String> extLabels = new ArrayList<>(); String extLabel = dataSet.getLabelTranslator().toExtLabel(k); extLabels.add("NOT "+extLabel); extLabels.add(extLabel); LabelTranslator labelTranslator = new LabelTranslator(extLabels); clfDataSet.setLabelTranslator(labelTranslator); clfDataSet.setFeatureList(dataSet.getFeatureList()); return clfDataSet; }
Example #23
Source File: IMLLogisticRegression.java From pyramid with Apache License 2.0 | 5 votes |
/** * only consider these assignments * @param vector * @return */ private MultiLabel predictWithConstraints(Vector vector){ double maxScore = Double.NEGATIVE_INFINITY; MultiLabel prediction = null; double[] classScores = predictClassScores(vector); for (MultiLabel assignment: this.assignments){ double score = this.calAssignmentScore(assignment,classScores); if (score > maxScore){ maxScore = score; prediction = assignment; } } return prediction; }
Example #24
Source File: L2Boost.java From pyramid with Apache License 2.0 | 5 votes |
@Override public double predictClassScore(Vector vector, int k) { if (k==0){ return 0; } else { return getEnsemble(0).score(vector); } }
Example #25
Source File: IntervalSplitterTest.java From pyramid with Apache License 2.0 | 5 votes |
static void test12(){ RegTreeConfig regTreeConfig = new RegTreeConfig().setNumSplitIntervals(2); Vector vector = new DenseVector(4); vector.set(0,Double.NaN); vector.set(1,Double.NaN); vector.set(2,Double.NaN); vector.set(3,3); double[] probs = {1,0.5,1,0.6}; double[] labels = {1,2,3,4}; Splitter.GlobalStats globalStats = new Splitter.GlobalStats(labels,probs); List<Interval> intervals = IntervalSplitter.generateIntervals(regTreeConfig, vector, probs, labels,globalStats); System.out.println(intervals); System.out.println(IntervalSplitter.compress(intervals)); }
Example #26
Source File: LogisticL2DiffFunction.java From laser with Apache License 2.0 | 5 votes |
public LogisticL2DiffFunction(Vector[] a, double[] b, double rho, double[] u, double[] z) { LOG.info("Initialize LogisticL2DiffFunction"); this.a = a; this.b = b; this.rho = rho; this.m = a.length; if (this.m > 0) { this.n = this.a[0].size() - 1; } else { this.n = 0; } // Long bytes = (long) 0; // for (int row = 0; row < this.m; row++) { // Vector v = this.a[row]; // for (Element e : v.nonZeroes()) { // bytes += Integer.SIZE + Double.SIZE; // } // } // LOG.info("Bytes {} reside on this map.", bytes); this.u = u; this.z = z; LOG.info("Initialize LogisticL2DiffFunction Finish"); }
Example #27
Source File: RegressionTree.java From pyramid with Apache License 2.0 | 5 votes |
private double predictWithMissingValue(Vector vector){ // use as a simple cache int numNodes = this.numNodes; boolean[] calculated = new boolean[numNodes]; double[] probs = new double[numNodes]; double prediction = 0; for (Node leaf: this.leaves){ double prob = probability(vector,leaf, calculated, probs); prediction += prob*leaf.getValue(); } return prediction; }
Example #28
Source File: FusedKolmogorovFilter.java From pyramid with Apache License 2.0 | 5 votes |
/** * always use global min and max * @param vector * @param inputsEachClass * @return */ public List<EmpiricalCDF> generateCDFs(Vector vector, List<List<Double>> inputsEachClass){ double min = vector.minValue(); double max = vector.maxValue(); return inputsEachClass.stream().map(list -> new EmpiricalCDF(list,min,max,numBins)).collect(Collectors.toList()); }
Example #29
Source File: DataSetUtil.java From pyramid with Apache License 2.0 | 5 votes |
/** * only keep the selected features * @param dataSet * @return */ public static ClfDataSet sampleFeatures(ClfDataSet dataSet, List<Integer> columnsToKeep){ ClfDataSet trimmed ; int numClasses = dataSet.getNumClasses(); boolean missingValue = dataSet.hasMissingValue(); // keep density if (dataSet.isDense()) { trimmed = new DenseClfDataSet(dataSet.getNumDataPoints(), columnsToKeep.size(), missingValue, numClasses); } else{ trimmed = new SparseClfDataSet(dataSet.getNumDataPoints(),columnsToKeep.size(), missingValue, numClasses); } for (int j=0;j<trimmed.getNumFeatures();j++){ int oldColumnIndex = columnsToKeep.get(j); Vector vector = dataSet.getColumn(oldColumnIndex); for (Vector.Element element: vector.nonZeroes()){ int dataPointIndex = element.index(); double value = element.get(); trimmed.setFeatureValue(dataPointIndex,j,value); } } //copy labels int[] labels = dataSet.getLabels(); for (int i=0;i<trimmed.getNumDataPoints();i++){ trimmed.setLabel(i,labels[i]); } trimmed.setLabelTranslator(dataSet.getLabelTranslator()); trimmed.setIdTranslator(dataSet.getIdTranslator()); List<Feature> oldFeatures = dataSet.getFeatureList().getAll(); List<Feature> newFeatures = columnsToKeep.stream().map(oldFeatures::get).collect(Collectors.toList()); for (int i=0;i<newFeatures.size();i++){ newFeatures.get(i).setIndex(i); } trimmed.setFeatureList(new FeatureList(newFeatures)); return trimmed; }
Example #30
Source File: CMLCRF.java From pyramid with Apache License 2.0 | 5 votes |
double[] predictClassScores(Vector vector){ double[] scores = new double[numClasses]; for (int k=0;k<numClasses;k++){ scores[k] = predictClassScore(vector, k); } return scores; }