gnu.trove.TIntDoubleHashMap Java Exaples

Source File: CosineSimilarityFunction.java From jatecs with GNU General Public License v3.0

6 votes

public double compute(TIntDoubleHashMap doc1, TIntDoubleHashMap doc2, IIntIterator features) {

        double numerator = 0;
        double denominator1 = 0;
        double denominator2 = 0;

        features.begin();
        while (features.hasNext()) {
            int featID = features.next();

            double doc1s = doc1.get(featID);
            double doc2s = doc2.get(featID);

            numerator += (doc1s * doc2s);
            denominator1 += (doc1s * doc1s);
            denominator2 += (doc2s * doc2s);
        }

        double denominator = Math.sqrt(denominator1) * Math.sqrt(denominator2);

        double similarity = numerator / denominator;
        if (similarity > 1)
            similarity = 1;

        return similarity;
    }

Source File: ConfidenceBased.java From jatecs with GNU General Public License v3.0

6 votes

public TIntDoubleHashMap getTable() {
    TIntDoubleHashMap rank = new TIntDoubleHashMap((int) (testSize + testSize * 0.25), (float) 0.75);
    for (int docId = 0; docId < testSize; docId++) {
        Set<Entry<Short, ClassifierRangeWithScore>> entries = classification.getDocumentScoresAsSet(docId);
        Iterator<Entry<Short, ClassifierRangeWithScore>> iterator = entries.iterator();
        double sum = 0.0;
        while (iterator.hasNext()) {
            Entry<Short, ClassifierRangeWithScore> next = iterator.next();
            if (categoriesFilter.contains(next.getKey()) && docCategoriesFilter[docId].contains(next.getKey())) {
                ClassifierRangeWithScore value = next.getValue();
                sum += probability(Math.abs(value.score - value.border), next.getKey());
                //System.out.println(docId + " " + next.getKey() + " " + probability(Math.abs(value.score - value.border), next.getKey()));
                //System.out.println(next.getKey() + " " + slopes[next.getKey()] + " " + value.score);
            }
        }
        rank.put(docId, sum);
    }
    return rank;
}

Source File: Incremental.java From jatecs with GNU General Public License v3.0

6 votes

public Incremental(int trainSize, ClassificationScoreDB classification, TIntHashSet categoriesFilter,
                   EstimationType estimation, ContingencyTableSet evaluation, IGain gain, IGain firstRankGain, double[] probabilitySlope, double[] prevalencies) {
    super(trainSize, classification, categoriesFilter, estimation, evaluation, firstRankGain, probabilitySlope, prevalencies);
    macroRankTable = new TIntDoubleHashMap((int) (testSize + testSize * 0.25), (float) 0.75);
    microRankTable = new TIntDoubleHashMap((int) (testSize + testSize * 0.25), (float) 0.75);
    macroAlreadySeen = new TIntHashSet((int) (testSize + testSize * 0.25), (float) 0.75);
    microAlreadySeen = new TIntHashSet((int) (testSize + testSize * 0.25), (float) 0.75);
    probabilities = new double[testSize][numOfCategories];
    for (int docId = 0; docId < testSize; docId++) {
        Set<Entry<Short, ClassifierRangeWithScore>> entries = classification.getDocumentScoresAsSet(docId);
        Iterator<Entry<Short, ClassifierRangeWithScore>> iterator = entries.iterator();
        while (iterator.hasNext()) {
            Entry<Short, ClassifierRangeWithScore> next = iterator.next();
            ClassifierRangeWithScore value = next.getValue();
            if (categoriesFilter.contains(next.getKey())) {
                probabilities[docId][catMap.get(next.getKey())] = probability(Math.abs(value.score - value.border), next.getKey());
            }
        }
    }
}

Source File: Ranker.java From jatecs with GNU General Public License v3.0

6 votes

public TIntArrayList get(TIntDoubleHashMap table) {
    final ArrayList<ComparablePair> list = new ArrayList<ComparablePair>(
            table.size());
    class Procedure implements TIntDoubleProcedure {
        @Override
        public boolean execute(int a, double b) {
            list.add(new ComparablePair(a, b));
            return true;
        }
    }
    table.forEachEntry(new Procedure());
    Collections.sort(list);
    TIntArrayList result = new TIntArrayList(list.size());
    for (int i = 0; i < list.size(); i++) {
        result.add(list.get(i).getFirst());
    }
    return result;
}

Source File: BaseSimilarityFunction.java From jatecs with GNU General Public License v3.0

6 votes

public double compute(int doc1, int doc2, IIndex index) {

        TIntDoubleHashMap ar1 = new TIntDoubleHashMap(index.getFeatureDB()
                .getFeaturesCount());
        TIntDoubleHashMap ar2 = new TIntDoubleHashMap(index.getFeatureDB()
                .getFeaturesCount());

        IIntIterator features = index.getFeatureDB().getFeatures();
        while (features.hasNext()) {
            int featID = features.next();

            ar1.put(featID,
                    index.getWeightingDB().getDocumentFeatureWeight(doc1,
                            featID));
            ar2.put(featID,
                    index.getWeightingDB().getDocumentFeatureWeight(doc2,
                            featID));
        }

        features.begin();
        return compute(ar1, ar2, features);
    }

Source File: BaseSimilarityFunction.java From jatecs with GNU General Public License v3.0

6 votes

public double compute(int doc1, IIndex idx1, int doc2, IIndex idx2) {

        TIntDoubleHashMap ar1 = new TIntDoubleHashMap(idx1.getFeatureDB()
                .getFeaturesCount());
        TIntDoubleHashMap ar2 = new TIntDoubleHashMap(idx1.getFeatureDB()
                .getFeaturesCount());

        IIntIterator features = idx1.getFeatureDB().getFeatures();
        while (features.hasNext()) {
            int featID = features.next();

            ar1.put(featID,
                    idx1.getWeightingDB()
                            .getDocumentFeatureWeight(doc1, featID));
            ar2.put(featID,
                    idx2.getWeightingDB()
                            .getDocumentFeatureWeight(doc2, featID));
        }

        features.begin();
        return compute(ar1, ar2, features);
    }

Source File: TroveWeightingDB.java From jatecs with GNU General Public License v3.0

5 votes

public double getDocumentFeatureWeight(int document, int feature) {
    if (_contentDB.hasDocumentFeature(document, feature)) {
        if (document < _documentsWeights.size()) {
            TIntDoubleHashMap weights = _documentsWeights.get(document);
            if (weights.containsKey(feature))
                return weights.get(feature);
            else
                return 1.0;
        } else
            return 1.0;
    } else
        return 0.0;
}

Source File: DotProduct.java From jatecs with GNU General Public License v3.0

5 votes

public double compute(TIntDoubleHashMap doc1, TIntDoubleHashMap doc2) {

        double w = 0;

        TIntDoubleIterator it = doc1.iterator();
        while (it.hasNext()) {
            it.advance();

            double doc2w = doc2.get(it.key());
            w += it.value() * doc2w;
        }

        return w;
    }

Source File: BestAutomaticNegativesChooser.java From jatecs with GNU General Public License v3.0

5 votes

protected TIntDoubleHashMap getDocumentAsMap(int docID, IIndex index) {
    TIntDoubleHashMap d2 = new TIntDoubleHashMap(index.getFeatureDB()
            .getFeaturesCount());
    IIntIterator features = index.getFeatureDB().getFeatures();
    while (features.hasNext()) {
        int featID = features.next();
        d2.put(featID,
                index.getWeightingDB().getDocumentFeatureWeight(docID,
                        featID));
    }

    return d2;
}

Source File: SparseVector.java From jatecs with GNU General Public License v3.0

5 votes

public SparseVector(SparseVector other) {
	_dim_value = new TIntDoubleHashMap(other.size());
	int[] dims=other._dim_value.keys();
	for(int dim:dims)
		_dim_value.put(dim, other._dim_value.get(dim));
	_k = other._k;
}

Source File: ConfidenceBasedOracle.java From jatecs with GNU General Public License v3.0

5 votes

@Override
public TIntDoubleHashMap getTable() {
    TIntDoubleHashMap rank = super.getTable();
    for (int docId = 0; docId < testSize; docId++) {
        if (rank.get(docId) != 0.0) {
            rank.adjustValue(docId, Math.random());
        }
    }
    return rank;
}

Source File: Random.java From jatecs with GNU General Public License v3.0

5 votes

public TIntDoubleHashMap getTable() {
    TIntDoubleHashMap rank = new TIntDoubleHashMap(testSize);
    for (int i = 0; i < testSize; i++) {
        rank.put(i, Math.random());
    }
    return rank;
}

Source File: ConfidenceBased.java From jatecs with GNU General Public License v3.0

5 votes

private TIntHashSet filterByTopProbabilities(int docId, int topK) {
    TIntDoubleHashMap topProbRank = new TIntDoubleHashMap((int) (testSize + testSize * 0.25), (float) 0.75);
    Set<Entry<Short, ClassifierRangeWithScore>> entries = classification.getDocumentScoresAsSet(docId);
    Iterator<Entry<Short, ClassifierRangeWithScore>> iterator = entries.iterator();
    while (iterator.hasNext()) {
        Entry<Short, ClassifierRangeWithScore> next = iterator.next();
        if (categoriesFilter.contains(next.getKey())) {
            ClassifierRangeWithScore value = next.getValue();
            topProbRank.put(next.getKey(), probability(Math.abs(value.score - value.border), next.getKey()));
        }
    }
    Ranker r = new Ranker();
    return new TIntHashSet(r.get(topProbRank).toNativeArray(0, topK));
}

Source File: UtilityBased.java From jatecs with GNU General Public License v3.0

5 votes

public TIntDoubleHashMap getTable(double[][] utilities) {
    TIntDoubleHashMap rank = new TIntDoubleHashMap(
            (int) (testSize + testSize * 0.25), (float) 0.75);
    for (int docId = 0; docId < testSize; docId++) {
        double sum = 0.0;
        for (TIntIterator it = categoriesFilter.iterator(); it.hasNext(); ) {
            int catId = it.next();
            if (docCategoriesFilter[docId].contains(catId)) {
                sum += utilities[docId][catMap.get(catId)];
            }
        }
        rank.put(docId, sum);
    }
    return rank;
}

Source File: Ranker.java From jatecs with GNU General Public License v3.0

5 votes

static public int getMax(TIntDoubleHashMap table) {
    int maxKey = -Integer.MIN_VALUE;
    double maxValue = Double.NEGATIVE_INFINITY;
    TIntDoubleIterator it = table.iterator();
    while (it.hasNext()) {
        it.advance();
        if (it.value() > maxValue) {
            maxValue = it.value();
            maxKey = it.key();
        }
    }
    return maxKey;
}

Source File: BaseSimilarityFunction.java From jatecs with GNU General Public License v3.0

5 votes

public double compute(TIntDoubleHashMap doc1, int doc2, IIndex index) {

        TIntDoubleHashMap d2 = new TIntDoubleHashMap(index.getFeatureDB()
                .getFeaturesCount());
        IIntIterator features = index.getFeatureDB().getFeatures();
        while (features.hasNext()) {
            int featID = features.next();
            d2.put(featID,
                    index.getWeightingDB().getDocumentFeatureWeight(doc2,
                            featID));
        }

        features.begin();
        return compute(doc1, d2, features);
    }

Source File: TroveWeightingDB.java From jatecs with GNU General Public License v3.0

5 votes

public TroveWeightingDB(IContentDB contentDB) {
    super();
    _contentDB = contentDB;
    int size = contentDB.getDocumentDB().getDocumentsCount();
    _documentsWeights = new Vector<TIntDoubleHashMap>(size);
    for (int i = 0; i < size; ++i) {
        _documentsWeights.add(new TIntDoubleHashMap());
    }

    _name = "generic";
}

Source File: TroveWeightingDB.java From jatecs with GNU General Public License v3.0

5 votes

public IWeightingDB cloneDB(IContentDB contentDB) {
    TroveWeightingDB weightingDB = new TroveWeightingDB(contentDB);
    weightingDB._name = new String(_name);

    weightingDB._documentsWeights = new Vector<TIntDoubleHashMap>(
            _documentsWeights.size());
    for (int i = 0; i < _documentsWeights.size(); ++i)
        weightingDB._documentsWeights
                .add((TIntDoubleHashMap) _documentsWeights.get(i).clone());

    return weightingDB;
}

Source File: TroveWeightingDBBuilder.java From jatecs with GNU General Public License v3.0

5 votes

public void setDocumentFeatureWeight(int document, int feature,
                                     double weight) {
    if (_weightingDB.getContentDB().hasDocumentFeature(document, feature)) {
        while (document >= _weightingDB._documentsWeights.size())
            _weightingDB._documentsWeights.add(new TIntDoubleHashMap());
        _weightingDB._documentsWeights.get(document).put(feature, weight);
    }
}

Source File: Clustering.java From jatecs with GNU General Public License v3.0

5 votes

public static TIntDoubleHashMap computeDocumentCentroid(IIntIterator docs,
                                                        IIndex index) {
    TIntDoubleHashMap centroid = new TIntDoubleHashMap(index.getFeatureDB()
            .getFeaturesCount());

    int numDoc = 0;
    docs.begin();
    while (docs.hasNext()) {
        int docID = docs.next();
        IIntIterator feats = index.getContentDB()
                .getDocumentFeatures(docID);
        while (feats.hasNext()) {
            int featID = feats.next();

            centroid.put(
                    featID,
                    centroid.get(featID)
                            + index.getWeightingDB()
                            .getDocumentFeatureWeight(docID, featID));
        }

        numDoc++;
    }

    int keys[] = centroid.keys();
    for (int i = 0; i < keys.length; i++) {
        centroid.put(keys[i], centroid.get(keys[i]) / (double) numDoc);
    }

    return centroid;
}

Source File: EuclideanDistance.java From jatecs with GNU General Public License v3.0

5 votes

public double compute(TIntDoubleHashMap doc1, TIntDoubleHashMap doc2, IIntIterator features) {

        double l = 0;

        features.begin();
        while (features.hasNext()) {
            int featID = features.next();
            l += Math.pow(doc1.get(featID) - doc2.get(featID), 2);
        }

        l = Math.sqrt(l);

        return l;
    }

Source File: EuclideanSquareDistance.java From jatecs with GNU General Public License v3.0

5 votes

public double compute(TIntDoubleHashMap doc1, TIntDoubleHashMap doc2, IIntIterator features) {

		double dist = 0;

		features.begin();
		while(features.hasNext()){
			int featID = features.next();
			dist += Math.pow(doc1.get(featID)-doc2.get(featID), 2);
		}

		return dist;
	}

Source File: Random.java From jatecs with GNU General Public License v3.0

4 votes

@Override
public TIntDoubleHashMap getMacroTable() {
    return getTable();
}

Source File: RocchioDataManager.java From jatecs with GNU General Public License v3.0

4 votes

@Override
public IClassifier read(IStorageManager storageManager, String modelName) {
    if (storageManager == null)
        throw new NullPointerException("The storage manager is 'null'");
    if (modelName == null || modelName.isEmpty())
        throw new IllegalArgumentException("The model name is invalid");
    if (!storageManager.isOpen())
        throw new IllegalStateException("The storage manager is not open");

    RocchioClassifier c = new RocchioClassifier();

    try {
        String vc = modelName + storageManager.getPathSeparator()
                + "validCategories.db";
        DataInputStream valid_os = new DataInputStream(
                new BufferedInputStream(
                        storageManager.getInputStreamForResource(vc), 4096));

        int numCats = valid_os.readInt();

        RocchioClassifierCustomizer cust = (RocchioClassifierCustomizer) c
                .getRuntimeCustomizer();
        cust._ranges = new Hashtable<Short, ClassifierRange>(numCats);
        c.vectors = new TIntDoubleHashMap[numCats];

        for (short catID = 0; catID < numCats; catID++) {
            String fname = modelName + storageManager.getPathSeparator()
                    + catID + ".db";
            DataInputStream is = new DataInputStream(
                    new BufferedInputStream(storageManager
                            .getInputStreamForResource(fname), 4096));

            c.vectors[catID] = new TIntDoubleHashMap();

            int numFeatures = is.readInt();
            for (int i = 0; i < numFeatures; i++) {
                int featID = is.readInt();
                double w = is.readDouble();

                c.vectors[catID].put(featID, w);
            }

            is.close();

            fname = modelName + storageManager.getPathSeparator() + catID
                    + "_range.db";
            is = new DataInputStream(new BufferedInputStream(
                    storageManager.getInputStreamForResource(fname), 4096));
            ClassifierRange cr = new ClassifierRange();

            cr.border = is.readDouble();
            cr.maximum = is.readDouble();
            cr.minimum = is.readDouble();
            is.close();

            cust._ranges.put(catID, cr);
        }

        return c;
    } catch (Exception e) {
        throw new RuntimeException("Reading classifier data", e);
    }
}

Source File: RocchioDataManager.java From jatecs with GNU General Public License v3.0

4 votes

public IClassifier read(String modelDir) throws IOException {
    RocchioClassifier c = new RocchioClassifier();

    String vc = modelDir + Os.pathSeparator() + "validCategories.db";
    DataInputStream valid_os = new DataInputStream(new BufferedInputStream(
            new FileInputStream(vc), 4096));

    int numCats = valid_os.readInt();

    RocchioClassifierCustomizer cust = (RocchioClassifierCustomizer) c
            .getRuntimeCustomizer();
    cust._ranges = new Hashtable<Short, ClassifierRange>(numCats);
    c.vectors = new TIntDoubleHashMap[numCats];

    for (short catID = 0; catID < numCats; catID++) {
        String fname = modelDir + Os.pathSeparator() + catID + ".db";
        DataInputStream is = new DataInputStream(new BufferedInputStream(
                new FileInputStream(fname), 4096));

        c.vectors[catID] = new TIntDoubleHashMap();

        int numFeatures = is.readInt();
        for (int i = 0; i < numFeatures; i++) {
            int featID = is.readInt();
            double w = is.readDouble();

            c.vectors[catID].put(featID, w);
        }

        is.close();

        fname = modelDir + Os.pathSeparator() + catID + "_range.db";
        is = new DataInputStream(new BufferedInputStream(
                new FileInputStream(fname), 4096));
        ClassifierRange cr = new ClassifierRange();

        cr.border = is.readDouble();
        cr.maximum = is.readDouble();
        cr.minimum = is.readDouble();
        is.close();

        cust._ranges.put(catID, cr);
    }

    valid_os.close();
    return c;
}

Source File: TroveWeightingDB.java From jatecs with GNU General Public License v3.0

4 votes

public void removeFeatures(IIntIterator removedFeatures) {
    for (int i = 0; i < _documentsWeights.size(); ++i) {
        TIntDoubleHashMap weigs = _documentsWeights.get(i);
        TIntArrayList feats = new TIntArrayList(weigs.size());
        TDoubleArrayList weigths = new TDoubleArrayList(weigs.size());
        TIntDoubleIterator wit = weigs.iterator();
        while (wit.hasNext()) {
            wit.advance();
            feats.add(wit.key());
            weigths.add(wit.value());
        }
        int j = 0;
        int shift = 0;
        int feat;
        int rem;
        if (j < feats.size() && removedFeatures.hasNext()) {
            feat = feats.getQuick(j);
            rem = removedFeatures.next();

            while (true) {
                if (feat == rem) {
                    feats.remove(j);
                    weigths.remove(j);
                    if (j < feats.size() && removedFeatures.hasNext()) {
                        feat = feats.getQuick(j);
                        rem = removedFeatures.next();
                        ++shift;
                    } else
                        break;
                } else if (feat > rem) {
                    if (removedFeatures.hasNext()) {
                        rem = removedFeatures.next();
                        ++shift;
                    } else
                        break;
                } else {
                    feats.setQuick(j, feat - shift);
                    ++j;
                    if (j < feats.size())
                        feat = feats.getQuick(j);
                    else
                        break;
                }
            }
            ++shift;
        }
        while (j < feats.size()) {
            feats.setQuick(j, feats.getQuick(j) - shift);
            ++j;
        }

        weigs.clear();
        for (j = 0; j < feats.size(); ++j)
            weigs.put(feats.getQuick(j), weigths.getQuick(j));

        removedFeatures.begin();
    }
}

Source File: ALpoolRank.java From jatecs with GNU General Public License v3.0

4 votes

public TIntDoubleHashMap getRanking() {
    return rankingMap;
}

Source File: ALpoolRank.java From jatecs with GNU General Public License v3.0

4 votes

public ALpoolRank(ClassificationScoreDB confidenceUnlabelled, IIndex trainingSet) {
    super(confidenceUnlabelled, trainingSet);
    unlabelledSize = confidenceUnlabelled.getDocumentCount();
    rankingMap = new TIntDoubleHashMap(
            (int) (unlabelledSize + unlabelledSize * 0.25), (float) 0.75);
}

Source File: SparseVector.java From jatecs with GNU General Public License v3.0

4 votes

public SparseVector(int k) {
	_dim_value = new TIntDoubleHashMap();
	_k = k;
}

Source File: ALAdaptive.java From jatecs with GNU General Public License v3.0

4 votes

@Override
public TIntDoubleHashMap getMicroTable() {
    return getMacroTable();
}

gnu.trove.TIntDoubleHashMap Java Examples