org.apache.mahout.math.Vector Java Exaples

Source File: Reranker.java From pyramid with Apache License 2.0

6 votes

public double prob(Vector vector, MultiLabel multiLabel){
    double[] marginals = labelCalibrator.calibratedClassProbs(classProbEstimator.predictClassProbs(vector));
    DynamicProgramming dynamicProgramming = new DynamicProgramming(marginals);
    List<Pair<MultiLabel,Double>> topK = dynamicProgramming.topK(numCandidate);

    PredictionCandidate predictionCandidate = new PredictionCandidate();
    predictionCandidate.x = vector;
    predictionCandidate.labelProbs = marginals;
    predictionCandidate.multiLabel = multiLabel;
    predictionCandidate.sparseJoint = topK;
    Vector feature = predictionFeatureExtractor.extractFeatures(predictionCandidate);
    double score = regressor.predict(feature);
    if (score>1){
        score=1;
    }

    if (score<0){
        score=0;
    }
    return score;
}

Source File: IntervalSplitterTest.java From pyramid with Apache License 2.0

6 votes

static void test9(){
    RegTreeConfig regTreeConfig = new RegTreeConfig().setNumSplitIntervals(2);
    Vector vector = new DenseVector(4);
    vector.set(0,0);
    vector.set(1,1);
    vector.set(2,Double.NaN);
    vector.set(3,3);
    double[] probs = {1,0.5,1,0.6};
    double[] labels = {1,2,3,4};
    Splitter.GlobalStats globalStats = new Splitter.GlobalStats(labels,probs);
    List<Interval> intervals = IntervalSplitter.generateIntervals(regTreeConfig, vector, probs, labels,globalStats);
    System.out.println(intervals);
    System.out.println(IntervalSplitter.compress(intervals));
    System.out.println(1.5/(1.5+0.6));
    System.out.println(1+1+3*1.5/(1.5+0.6));

}

Source File: AdmmIterationMapper.java From laser with Apache License 2.0

6 votes

private AdmmMapperContext assembleMapperContextFromCache(
		Vector[] inputSplitData, String splitId) throws IOException {
	try {
		AdmmMapperContext preContext = readPreviousAdmmMapperContext(
				splitId, previousIntermediateOutputLocationPath, fs, conf);
		return new AdmmMapperContext(splitId, inputSplitData,
				preContext.getUInitial(), preContext.getXInitial(),
				preContext.getZInitial(), preContext.getRho(),
				preContext.getLambdaValue(),
				preContext.getPrimalObjectiveValue(),
				preContext.getRNorm(), preContext.getSNorm());
	} catch (IOException e) {
		LOG.info("Key not found. Split ID: " + splitId + e.getMessage());
		throw new IOException("Key not found.  Split ID: " + splitId
				+ e.getMessage());
	}
}

Source File: FusedKolmogorovFilterTest.java From pyramid with Apache License 2.0

6 votes

private static void test1(){
    Vector vector = new DenseVector(10);
    vector.set(0,0.1);
    vector.set(1,0.2);
    vector.set(2,0.15);
    vector.set(3,0.4);
    vector.set(4,0.7);
    vector.set(8,0.9);
    int[] labels = new int[10];
    labels[0] = 0 ;
    labels[1] = 1;
    labels[2] = 1;
    labels[3] = 1;
    labels[9] = 1;
    FusedKolmogorovFilter filter = new FusedKolmogorovFilter();
    filter.setNumBins(10);
    List<List<Double>> inputsEachClass = filter.generateInputsEachClass(vector, labels, 2);
    System.out.println(inputsEachClass);
    List<EmpiricalCDF> empiricalCDFs = filter.generateCDFs(vector,inputsEachClass);
    System.out.println(empiricalCDFs);
    System.out.println(filter.maxDistance(empiricalCDFs));
}

Source File: KMeans.java From pyramid with Apache License 2.0

6 votes

private void assign(int i, boolean print){
    int previousAssignment = assignments[i];
    Vector vector = dataSet.getRow(i);
    double[] distances = IntStream.range(0,numComponents).mapToDouble(k->distance(vector, centers[k]))
            .toArray();
    int assignedC =  ArgMin.argMin(distances);
    assignments[i] = assignedC;
    if (print){
        if (assigned[i] && (previousAssignment!=assignedC)){
            System.out.println("assign instance "+(i+1)+" to cluster "+(assignedC+1)+", previously in cluster "+(previousAssignment+1));
        } else {
            System.out.println("assign instance "+(i+1)+" to cluster "+(assignedC+1));
        }
    }


    assigned[i] = true;
}

Source File: KLLoss.java From pyramid with Apache License 2.0

6 votes

private double calEmpiricalCountForFeature(int parameterIndex) {
    double empiricalCount = 0.0;
    int classIndex = parameterToClass[parameterIndex];
    int featureIndex = parameterToFeature[parameterIndex];
    if (featureIndex==-1){
        for (int i=0; i<dataSet.getNumDataPoints(); i++) {
            empiricalCount += targetMarginals[i][classIndex];
        }
    } else{
        Vector column = dataSet.getColumn(featureIndex);
        for (Vector.Element element: column.nonZeroes()){
            int dataIndex = element.index();
            double featureValue = element.get();
            empiricalCount += featureValue*targetMarginals[dataIndex][classIndex];
        }
    }
    return empiricalCount;
}

Source File: AdmmMapperContext.java From laser with Apache License 2.0

6 votes

public AdmmMapperContext(String splitId, Vector[] a, double[] b,
		double[] uInitial, double[] xInitial, double[] zInitial,
		double rho, double lambdaValue, double primalObjectiveValue,
		double rNorm, double sNorm) {
	this.splitId = splitId;
	this.a = a;
	this.b = b;
	this.uInitial = uInitial;
	this.xInitial = xInitial;
	this.zInitial = zInitial;
	this.rho = rho;
	this.lambdaValue = lambdaValue;
	this.primalObjectiveValue = primalObjectiveValue;
	this.rNorm = rNorm;
	this.sNorm = sNorm;
}

Source File: CRFLoss.java From pyramid with Apache License 2.0

6 votes

/**
 * gradient of log likelihood
 * @return
 */
@Override
public Vector getGradient() {
    if (isGradientCacheValid) {
        return this.gradient;
    }
    if (logger.isDebugEnabled()){
        logger.debug("start method getGradient()");
    }
    // O(NdL)
    updateClassScoreMatrix();
    updateAssignmentScoreMatrix();
    updateAssignmentProbMatrix();
    updateCombProbSums();
    updateClassProbMatrix();
    updateGradient();
    this.isGradientCacheValid = true;
    if (logger.isDebugEnabled()){
        logger.debug("finish method getGradient()");
    }
    return this.gradient;
}

Source File: Vectors.java From pyramid with Apache License 2.0

6 votes

public static double dot(Vector vector1, Vector vector2){
    if (vector1.size()!=vector2.size()){
        throw new IllegalArgumentException("vector1.size()!=vector2.size()");
    }

    boolean vector1Dense = vector1.isDense();
    boolean vector2Dense = vector2.isDense();

    if (vector1Dense&&vector2Dense){
        return dotDenseDense(vector1,vector2);
    } else if (vector1Dense && !vector2Dense){
        return dotDenseSparse(vector1,vector2);
    } else if (!vector1Dense && vector2Dense){
        return dotDenseSparse(vector2,vector1);
    } else {
        throw new UnsupportedOperationException("sparse dot sparse is not supported");
    }

}

Source File: KLLoss.java From pyramid with Apache License 2.0

6 votes

/**
 * gradient of log likelihood
 * @return
 */
@Override
public Vector getGradient() {
    if (isGradientCacheValid) {
        return this.gradient;
    }
    if (logger.isDebugEnabled()){
        logger.debug("start method getGradient()");
    }
    // O(NdL)
    updateClassScoreMatrix();
    updateAssignmentScoreMatrix();
    updateAssignmentProbMatrix();
    updateCombProbSums();
    updateClassProbMatrix();
    updateGradient();
    this.isGradientCacheValid = true;
    if (logger.isDebugEnabled()){
        logger.debug("finish method getGradient()");
    }
    return this.gradient;
}

Source File: CBMUtilityOptimizer.java From pyramid with Apache License 2.0

6 votes

private void updateGamma(int n) {
    Vector x = dataSet.getRow(n);
    BMDistribution bmDistribution = cbm.computeBM(x);
    // size = combination * components
    List<double[]> logPosteriors = new ArrayList<>();
    for (int c=0;c<combinations.size();c++){
        MultiLabel combination = combinations.get(c);
        double[] pos = bmDistribution.logPosteriorMembership(combination);
        logPosteriors.add(pos);
    }

    double[] sums = new double[cbm.numComponents];
    for (int k=0;k<cbm.numComponents;k++){
        double sum = 0;
        for (int c=0;c<combinations.size();c++){
            sum += targets[n][c]*logPosteriors.get(c)[k];
        }
        sums[k] = sum;
    }
    double[] posterior = MathUtil.softmax(sums);
    for (int k=0; k<cbm.numComponents; k++) {
        gammas[n][k] = posterior[k];
        gammasT[k][n] = posterior[k];
    }
}

Source File: ElasticNetLogisticTrainer.java From pyramid with Apache License 2.0

5 votes

/**
 * a special back track line search for sufficient decrease with elasticnet penalized model
 * reference:
 * An improved glmnet for l1-regularized logistic regression.
 * @param searchDirection
 * @return
 */
private void lineSearch(Vector searchDirection, Vector gradient){
    Vector localSearchDir;
    double initialStepLength = 1;
    double shrinkage = 0.5;
    double c = 1e-4;
    double stepLength = initialStepLength;
    Vector start = logisticRegression.getWeights().getAllWeights();
    double penalty = penalty();
    double value = loss(penalty);
    if (logger.isDebugEnabled()){
        logger.debug("start line search");
        logger.debug("initial loss = "+loss());
    }
    double product = gradient.dot(searchDirection);

    localSearchDir = searchDirection;

    while(true){
        Vector step = localSearchDir.times(stepLength);
        Vector target = start.plus(step);
        logisticRegression.getWeights().setWeightVector(target);
        double targetPenalty = penalty();
        double targetValue = loss(targetPenalty);
        if (targetValue <= value + c*stepLength*(product + targetPenalty - penalty)){
            if (logger.isDebugEnabled()){
                logger.debug("step size = "+stepLength);
                logger.debug("final loss = "+targetValue);
                logger.debug("line search done");
            }
            break;
        }
        stepLength *= shrinkage;
    }
}

Source File: ALSWRFactorizer.java From elasticsearch-taste with Apache License 2.0

5 votes

protected Vector sparseItemRatingVector(final PreferenceArray prefs) {
    final SequentialAccessSparseVector ratings = new SequentialAccessSparseVector(
            Integer.MAX_VALUE, prefs.length());
    for (final Preference preference : prefs) {
        ratings.set((int) preference.getUserID(), preference.getValue());
    }
    return ratings;
}

Source File: CBMS.java From pyramid with Apache License 2.0

5 votes

/**
 * sort marginals, and keep top few
 * @param vector
 * @param top
 * @return
 */
public MultiLabel predictByMarginals(Vector vector, int top){
    double[] probs = predictClassProbs(vector);
    int[] sortedIndices = ArgSort.argSortDescending(probs);
    MultiLabel prediction = new MultiLabel();
    for (int i=0;i<top;i++){
        prediction.addLabel(sortedIndices[i]);
    }
    return prediction;
}

Source File: MLLogisticRegression.java From pyramid with Apache License 2.0

5 votes

double logLikelihood(Vector vector, MultiLabel multiLabel){
        double[] classScores = predictClassScores(vector);
        int numAssignments = assignments.size();
        double[] assignmentScores = calAssignmentScores(classScores);
//        double[] assignmentScores = new double[numAssignments];
//        for (int a=0;a<numAssignments;a++){
//            MultiLabel assignment = assignments.get(a);
//            assignmentScores[a] = this.calAssignmentScore(assignment, classScores);
//        }
        double logDenominator = MathUtil.logSumExp(assignmentScores);

        double logNumerator = this.calAssignmentScore(multiLabel, classScores);
        return logNumerator-logDenominator;
    }

Source File: RegressionTreeTest.java From pyramid with Apache License 2.0

5 votes

private static void test5(){
    RegressionTree tree = RegressionTree.newStump(10,0.5,-1.2,3);
    System.out.println(tree);
    Vector vector = new DenseVector(100);
    vector.set(10,0.6);
    System.out.println(tree.predict(vector));
}

Source File: RidgeBinaryLogisticLoss.java From pyramid with Apache License 2.0

5 votes

/**
 * dot product of a column vector and another vector
 * @param columnIndex the bias feature has index 0
 * @param vector
 * @return
 */
private double columnDot(int columnIndex, Vector vector){
    if (columnIndex==0){
        return vector.zSum();
    } else {
        return dataSet.getColumn(columnIndex-1).dot(vector);
    }
}

Source File: SupervisedEmbeddingLoss.java From pyramid with Apache License 2.0

5 votes

public void setParameters(Vector parameters) {
    int numData = this.updatedEmbeddingMatrix.getNumDataPoints();
    int numFeatures = this.updatedEmbeddingMatrix.getNumFeatures();
    for (int i = 0; i < numData; i++) {
        for (int j = 0; j < numFeatures; j++) {
            this.updatedEmbeddingMatrix.setFeatureValue(i, j, parameters.get(i * numFeatures + j));
        }
    }
}

Source File: LogisticLoss.java From pyramid with Apache License 2.0

5 votes

private Vector penaltyGradient(){
    Vector weightsVector = this.logisticRegression.getWeights().getAllWeights();
    Vector penalty = new DenseVector(weightsVector.size());

    penalty = penalty.plus(weightsVector.divide(priorGaussianVariance));

    for (int j:logisticRegression.getWeights().getAllBiasPositions()){
        penalty.set(j,0);
    }
    return penalty;
}

Source File: VectorCardIsoSetCalibrator.java From pyramid with Apache License 2.0

5 votes

public double calibrate(Vector vector){
    double uncalibrated = vector.get(scoreIndex);
    int cardinality = (int)vector.get(cardIndex);
    //deal with unseen cardinality
    if (!calibrations.containsKey(cardinality)){
        return 0;
    }
    return calibrations.get(cardinality).predict(uncalibrated);
}

Source File: PluginF1.java From pyramid with Apache License 2.0

5 votes

private MultiLabel predictBySampling(Vector vector){
        List<MultiLabel> samples = cbm.samples(vector, numSamples);
        GeneralF1Predictor generalF1Predictor = new GeneralF1Predictor();
        generalF1Predictor.setMaxSize(maxSize);
        return generalF1Predictor.predict(cbm.getNumClasses(), samples);
//      unique the sample set and apply GFM
//        List<MultiLabel> uniqueSamples = new ArrayList(new HashSet(samples));
//        List<Double> probs = cbm.predictAssignmentProbs(vector, uniqueSamples);
//        return GeneralF1Predictor.predict(cbm.getNumClasses(), uniqueSamples, probs);
    }

Source File: DataSetUtil.java From pyramid with Apache License 2.0

5 votes

/**
 * merge to binary dataset
 * k=positive (1), others = negative(0)
 * @param dataSet
 * @param k
 * @return
 */
public static ClfDataSet toBinary(MultiLabelClfDataSet dataSet, int k){
    int numDataPoints = dataSet.getNumDataPoints();
    int numFeatures = dataSet.getNumFeatures();
    boolean missingValue = dataSet.hasMissingValue();
    ClfDataSet clfDataSet;
    if (dataSet.isDense()){
        clfDataSet = new DenseClfDataSet(numDataPoints,numFeatures,missingValue, 2);
    } else {
        clfDataSet = new SparseClfDataSet(numDataPoints,numFeatures,missingValue, 2);
    }

    for (int i=0;i<numDataPoints;i++){
        //only copy non-zero elements
        Vector vector = dataSet.getRow(i);
        for (Vector.Element element: vector.nonZeroes()){
            int featureIndex = element.index();
            double value = element.get();
            clfDataSet.setFeatureValue(i,featureIndex,value);
        }
        if (dataSet.getMultiLabels()[i].matchClass(k)){
            clfDataSet.setLabel(i,1);
        } else {
            clfDataSet.setLabel(i,0);
        }
    }

    List<String> extLabels = new ArrayList<>();
    String extLabel = dataSet.getLabelTranslator().toExtLabel(k);
    extLabels.add("NOT "+extLabel);
    extLabels.add(extLabel);
    LabelTranslator labelTranslator = new LabelTranslator(extLabels);
    clfDataSet.setLabelTranslator(labelTranslator);
    clfDataSet.setFeatureList(dataSet.getFeatureList());


    return clfDataSet;
}

Source File: IMLLogisticRegression.java From pyramid with Apache License 2.0

5 votes

/**
 * only consider these assignments
 * @param vector
 * @return
 */
private MultiLabel predictWithConstraints(Vector vector){
    double maxScore = Double.NEGATIVE_INFINITY;
    MultiLabel prediction = null;
    double[] classScores = predictClassScores(vector);
    for (MultiLabel assignment: this.assignments){
        double score = this.calAssignmentScore(assignment,classScores);
        if (score > maxScore){
            maxScore = score;
            prediction = assignment;
        }
    }
    return prediction;
}

Source File: L2Boost.java From pyramid with Apache License 2.0

5 votes

@Override
public double predictClassScore(Vector vector, int k) {
    if (k==0){
        return 0;
    } else {
        return getEnsemble(0).score(vector);
    }
}

Source File: IntervalSplitterTest.java From pyramid with Apache License 2.0

5 votes

static void test12(){
    RegTreeConfig regTreeConfig = new RegTreeConfig().setNumSplitIntervals(2);
    Vector vector = new DenseVector(4);
    vector.set(0,Double.NaN);
    vector.set(1,Double.NaN);
    vector.set(2,Double.NaN);
    vector.set(3,3);
    double[] probs = {1,0.5,1,0.6};
    double[] labels = {1,2,3,4};
    Splitter.GlobalStats globalStats = new Splitter.GlobalStats(labels,probs);
    List<Interval> intervals = IntervalSplitter.generateIntervals(regTreeConfig, vector, probs, labels,globalStats);
    System.out.println(intervals);
    System.out.println(IntervalSplitter.compress(intervals));

}

Source File: LogisticL2DiffFunction.java From laser with Apache License 2.0

5 votes

public LogisticL2DiffFunction(Vector[] a, double[] b, double rho,
			double[] u, double[] z) {
		LOG.info("Initialize LogisticL2DiffFunction");
		this.a = a;
		this.b = b;
		this.rho = rho;
		this.m = a.length;
		if (this.m > 0) {
			this.n = this.a[0].size() - 1;
		} else {
			this.n = 0;
		}

//		Long bytes = (long) 0;
//		for (int row = 0; row < this.m; row++) {
//			Vector v = this.a[row];
//			for (Element e : v.nonZeroes()) {
//				bytes += Integer.SIZE + Double.SIZE;
//			}
//		}
//		LOG.info("Bytes {} reside on this map.", bytes);

		this.u = u;
		this.z = z;
		LOG.info("Initialize LogisticL2DiffFunction Finish");

	}

Source File: RegressionTree.java From pyramid with Apache License 2.0

5 votes

private double predictWithMissingValue(Vector vector){
    // use as a simple cache
    int numNodes = this.numNodes;
    boolean[] calculated = new boolean[numNodes];
    double[] probs = new double[numNodes];
    double prediction = 0;
    for (Node leaf: this.leaves){
        double prob = probability(vector,leaf, calculated, probs);
        prediction += prob*leaf.getValue();
    }
    return prediction;
}

Source File: FusedKolmogorovFilter.java From pyramid with Apache License 2.0

5 votes

/**
 * always use global min and max
 * @param vector
 * @param inputsEachClass
 * @return
 */
public List<EmpiricalCDF> generateCDFs(Vector vector, List<List<Double>> inputsEachClass){
    double min = vector.minValue();
    double max = vector.maxValue();
    return inputsEachClass.stream().map(list -> new EmpiricalCDF(list,min,max,numBins)).collect(Collectors.toList());

}

Source File: DataSetUtil.java From pyramid with Apache License 2.0

5 votes

/**
 * only keep the selected features
 * @param dataSet
 * @return
 */
public static ClfDataSet sampleFeatures(ClfDataSet dataSet, List<Integer> columnsToKeep){
    ClfDataSet trimmed ;
    int numClasses = dataSet.getNumClasses();
    boolean missingValue = dataSet.hasMissingValue();
    // keep density
    if (dataSet.isDense()) {
        trimmed = new DenseClfDataSet(dataSet.getNumDataPoints(), columnsToKeep.size(), missingValue, numClasses);
    } else{
        trimmed = new SparseClfDataSet(dataSet.getNumDataPoints(),columnsToKeep.size(), missingValue, numClasses);
    }


    for (int j=0;j<trimmed.getNumFeatures();j++){
        int oldColumnIndex = columnsToKeep.get(j);
        Vector vector = dataSet.getColumn(oldColumnIndex);
        for (Vector.Element element: vector.nonZeroes()){
            int dataPointIndex = element.index();
            double value = element.get();
            trimmed.setFeatureValue(dataPointIndex,j,value);
        }
    }
    //copy labels
    int[] labels = dataSet.getLabels();
    for (int i=0;i<trimmed.getNumDataPoints();i++){
        trimmed.setLabel(i,labels[i]);
    }

    trimmed.setLabelTranslator(dataSet.getLabelTranslator());
    trimmed.setIdTranslator(dataSet.getIdTranslator());
    List<Feature> oldFeatures = dataSet.getFeatureList().getAll();
    List<Feature> newFeatures = columnsToKeep.stream().map(oldFeatures::get).collect(Collectors.toList());
    for (int i=0;i<newFeatures.size();i++){
        newFeatures.get(i).setIndex(i);
    }
    trimmed.setFeatureList(new FeatureList(newFeatures));
    return trimmed;
}

Source File: CMLCRF.java From pyramid with Apache License 2.0

5 votes

double[] predictClassScores(Vector vector){
    double[] scores = new double[numClasses];
    for (int k=0;k<numClasses;k++){
        scores[k] = predictClassScore(vector, k);
    }
    return scores;
}

org.apache.mahout.math.Vector Java Examples