gnu.trove.TIntDoubleHashMap Java Examples

The following examples show how to use gnu.trove.TIntDoubleHashMap. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CosineSimilarityFunction.java    From jatecs with GNU General Public License v3.0 6 votes vote down vote up
public double compute(TIntDoubleHashMap doc1, TIntDoubleHashMap doc2, IIntIterator features) {

        double numerator = 0;
        double denominator1 = 0;
        double denominator2 = 0;

        features.begin();
        while (features.hasNext()) {
            int featID = features.next();

            double doc1s = doc1.get(featID);
            double doc2s = doc2.get(featID);

            numerator += (doc1s * doc2s);
            denominator1 += (doc1s * doc1s);
            denominator2 += (doc2s * doc2s);
        }

        double denominator = Math.sqrt(denominator1) * Math.sqrt(denominator2);

        double similarity = numerator / denominator;
        if (similarity > 1)
            similarity = 1;

        return similarity;
    }
 
Example #2
Source File: ConfidenceBased.java    From jatecs with GNU General Public License v3.0 6 votes vote down vote up
public TIntDoubleHashMap getTable() {
    TIntDoubleHashMap rank = new TIntDoubleHashMap((int) (testSize + testSize * 0.25), (float) 0.75);
    for (int docId = 0; docId < testSize; docId++) {
        Set<Entry<Short, ClassifierRangeWithScore>> entries = classification.getDocumentScoresAsSet(docId);
        Iterator<Entry<Short, ClassifierRangeWithScore>> iterator = entries.iterator();
        double sum = 0.0;
        while (iterator.hasNext()) {
            Entry<Short, ClassifierRangeWithScore> next = iterator.next();
            if (categoriesFilter.contains(next.getKey()) && docCategoriesFilter[docId].contains(next.getKey())) {
                ClassifierRangeWithScore value = next.getValue();
                sum += probability(Math.abs(value.score - value.border), next.getKey());
                //System.out.println(docId + " " + next.getKey() + " " + probability(Math.abs(value.score - value.border), next.getKey()));
                //System.out.println(next.getKey() + " " + slopes[next.getKey()] + " " + value.score);
            }
        }
        rank.put(docId, sum);
    }
    return rank;
}
 
Example #3
Source File: Incremental.java    From jatecs with GNU General Public License v3.0 6 votes vote down vote up
public Incremental(int trainSize, ClassificationScoreDB classification, TIntHashSet categoriesFilter,
                   EstimationType estimation, ContingencyTableSet evaluation, IGain gain, IGain firstRankGain, double[] probabilitySlope, double[] prevalencies) {
    super(trainSize, classification, categoriesFilter, estimation, evaluation, firstRankGain, probabilitySlope, prevalencies);
    macroRankTable = new TIntDoubleHashMap((int) (testSize + testSize * 0.25), (float) 0.75);
    microRankTable = new TIntDoubleHashMap((int) (testSize + testSize * 0.25), (float) 0.75);
    macroAlreadySeen = new TIntHashSet((int) (testSize + testSize * 0.25), (float) 0.75);
    microAlreadySeen = new TIntHashSet((int) (testSize + testSize * 0.25), (float) 0.75);
    probabilities = new double[testSize][numOfCategories];
    for (int docId = 0; docId < testSize; docId++) {
        Set<Entry<Short, ClassifierRangeWithScore>> entries = classification.getDocumentScoresAsSet(docId);
        Iterator<Entry<Short, ClassifierRangeWithScore>> iterator = entries.iterator();
        while (iterator.hasNext()) {
            Entry<Short, ClassifierRangeWithScore> next = iterator.next();
            ClassifierRangeWithScore value = next.getValue();
            if (categoriesFilter.contains(next.getKey())) {
                probabilities[docId][catMap.get(next.getKey())] = probability(Math.abs(value.score - value.border), next.getKey());
            }
        }
    }
}
 
Example #4
Source File: Ranker.java    From jatecs with GNU General Public License v3.0 6 votes vote down vote up
public TIntArrayList get(TIntDoubleHashMap table) {
    final ArrayList<ComparablePair> list = new ArrayList<ComparablePair>(
            table.size());
    class Procedure implements TIntDoubleProcedure {
        @Override
        public boolean execute(int a, double b) {
            list.add(new ComparablePair(a, b));
            return true;
        }
    }
    table.forEachEntry(new Procedure());
    Collections.sort(list);
    TIntArrayList result = new TIntArrayList(list.size());
    for (int i = 0; i < list.size(); i++) {
        result.add(list.get(i).getFirst());
    }
    return result;
}
 
Example #5
Source File: BaseSimilarityFunction.java    From jatecs with GNU General Public License v3.0 6 votes vote down vote up
public double compute(int doc1, int doc2, IIndex index) {

        TIntDoubleHashMap ar1 = new TIntDoubleHashMap(index.getFeatureDB()
                .getFeaturesCount());
        TIntDoubleHashMap ar2 = new TIntDoubleHashMap(index.getFeatureDB()
                .getFeaturesCount());

        IIntIterator features = index.getFeatureDB().getFeatures();
        while (features.hasNext()) {
            int featID = features.next();

            ar1.put(featID,
                    index.getWeightingDB().getDocumentFeatureWeight(doc1,
                            featID));
            ar2.put(featID,
                    index.getWeightingDB().getDocumentFeatureWeight(doc2,
                            featID));
        }

        features.begin();
        return compute(ar1, ar2, features);
    }
 
Example #6
Source File: BaseSimilarityFunction.java    From jatecs with GNU General Public License v3.0 6 votes vote down vote up
public double compute(int doc1, IIndex idx1, int doc2, IIndex idx2) {

        TIntDoubleHashMap ar1 = new TIntDoubleHashMap(idx1.getFeatureDB()
                .getFeaturesCount());
        TIntDoubleHashMap ar2 = new TIntDoubleHashMap(idx1.getFeatureDB()
                .getFeaturesCount());

        IIntIterator features = idx1.getFeatureDB().getFeatures();
        while (features.hasNext()) {
            int featID = features.next();

            ar1.put(featID,
                    idx1.getWeightingDB()
                            .getDocumentFeatureWeight(doc1, featID));
            ar2.put(featID,
                    idx2.getWeightingDB()
                            .getDocumentFeatureWeight(doc2, featID));
        }

        features.begin();
        return compute(ar1, ar2, features);
    }
 
Example #7
Source File: TroveWeightingDB.java    From jatecs with GNU General Public License v3.0 5 votes vote down vote up
public double getDocumentFeatureWeight(int document, int feature) {
    if (_contentDB.hasDocumentFeature(document, feature)) {
        if (document < _documentsWeights.size()) {
            TIntDoubleHashMap weights = _documentsWeights.get(document);
            if (weights.containsKey(feature))
                return weights.get(feature);
            else
                return 1.0;
        } else
            return 1.0;
    } else
        return 0.0;
}
 
Example #8
Source File: DotProduct.java    From jatecs with GNU General Public License v3.0 5 votes vote down vote up
public double compute(TIntDoubleHashMap doc1, TIntDoubleHashMap doc2) {

        double w = 0;

        TIntDoubleIterator it = doc1.iterator();
        while (it.hasNext()) {
            it.advance();

            double doc2w = doc2.get(it.key());
            w += it.value() * doc2w;
        }

        return w;
    }
 
Example #9
Source File: BestAutomaticNegativesChooser.java    From jatecs with GNU General Public License v3.0 5 votes vote down vote up
protected TIntDoubleHashMap getDocumentAsMap(int docID, IIndex index) {
    TIntDoubleHashMap d2 = new TIntDoubleHashMap(index.getFeatureDB()
            .getFeaturesCount());
    IIntIterator features = index.getFeatureDB().getFeatures();
    while (features.hasNext()) {
        int featID = features.next();
        d2.put(featID,
                index.getWeightingDB().getDocumentFeatureWeight(docID,
                        featID));
    }

    return d2;
}
 
Example #10
Source File: SparseVector.java    From jatecs with GNU General Public License v3.0 5 votes vote down vote up
public SparseVector(SparseVector other) {
	_dim_value = new TIntDoubleHashMap(other.size());
	int[] dims=other._dim_value.keys();
	for(int dim:dims)
		_dim_value.put(dim, other._dim_value.get(dim));
	_k = other._k;
}
 
Example #11
Source File: ConfidenceBasedOracle.java    From jatecs with GNU General Public License v3.0 5 votes vote down vote up
@Override
public TIntDoubleHashMap getTable() {
    TIntDoubleHashMap rank = super.getTable();
    for (int docId = 0; docId < testSize; docId++) {
        if (rank.get(docId) != 0.0) {
            rank.adjustValue(docId, Math.random());
        }
    }
    return rank;
}
 
Example #12
Source File: Random.java    From jatecs with GNU General Public License v3.0 5 votes vote down vote up
public TIntDoubleHashMap getTable() {
    TIntDoubleHashMap rank = new TIntDoubleHashMap(testSize);
    for (int i = 0; i < testSize; i++) {
        rank.put(i, Math.random());
    }
    return rank;
}
 
Example #13
Source File: ConfidenceBased.java    From jatecs with GNU General Public License v3.0 5 votes vote down vote up
private TIntHashSet filterByTopProbabilities(int docId, int topK) {
    TIntDoubleHashMap topProbRank = new TIntDoubleHashMap((int) (testSize + testSize * 0.25), (float) 0.75);
    Set<Entry<Short, ClassifierRangeWithScore>> entries = classification.getDocumentScoresAsSet(docId);
    Iterator<Entry<Short, ClassifierRangeWithScore>> iterator = entries.iterator();
    while (iterator.hasNext()) {
        Entry<Short, ClassifierRangeWithScore> next = iterator.next();
        if (categoriesFilter.contains(next.getKey())) {
            ClassifierRangeWithScore value = next.getValue();
            topProbRank.put(next.getKey(), probability(Math.abs(value.score - value.border), next.getKey()));
        }
    }
    Ranker r = new Ranker();
    return new TIntHashSet(r.get(topProbRank).toNativeArray(0, topK));
}
 
Example #14
Source File: UtilityBased.java    From jatecs with GNU General Public License v3.0 5 votes vote down vote up
public TIntDoubleHashMap getTable(double[][] utilities) {
    TIntDoubleHashMap rank = new TIntDoubleHashMap(
            (int) (testSize + testSize * 0.25), (float) 0.75);
    for (int docId = 0; docId < testSize; docId++) {
        double sum = 0.0;
        for (TIntIterator it = categoriesFilter.iterator(); it.hasNext(); ) {
            int catId = it.next();
            if (docCategoriesFilter[docId].contains(catId)) {
                sum += utilities[docId][catMap.get(catId)];
            }
        }
        rank.put(docId, sum);
    }
    return rank;
}
 
Example #15
Source File: Ranker.java    From jatecs with GNU General Public License v3.0 5 votes vote down vote up
static public int getMax(TIntDoubleHashMap table) {
    int maxKey = -Integer.MIN_VALUE;
    double maxValue = Double.NEGATIVE_INFINITY;
    TIntDoubleIterator it = table.iterator();
    while (it.hasNext()) {
        it.advance();
        if (it.value() > maxValue) {
            maxValue = it.value();
            maxKey = it.key();
        }
    }
    return maxKey;
}
 
Example #16
Source File: BaseSimilarityFunction.java    From jatecs with GNU General Public License v3.0 5 votes vote down vote up
public double compute(TIntDoubleHashMap doc1, int doc2, IIndex index) {

        TIntDoubleHashMap d2 = new TIntDoubleHashMap(index.getFeatureDB()
                .getFeaturesCount());
        IIntIterator features = index.getFeatureDB().getFeatures();
        while (features.hasNext()) {
            int featID = features.next();
            d2.put(featID,
                    index.getWeightingDB().getDocumentFeatureWeight(doc2,
                            featID));
        }

        features.begin();
        return compute(doc1, d2, features);
    }
 
Example #17
Source File: TroveWeightingDB.java    From jatecs with GNU General Public License v3.0 5 votes vote down vote up
public TroveWeightingDB(IContentDB contentDB) {
    super();
    _contentDB = contentDB;
    int size = contentDB.getDocumentDB().getDocumentsCount();
    _documentsWeights = new Vector<TIntDoubleHashMap>(size);
    for (int i = 0; i < size; ++i) {
        _documentsWeights.add(new TIntDoubleHashMap());
    }

    _name = "generic";
}
 
Example #18
Source File: TroveWeightingDB.java    From jatecs with GNU General Public License v3.0 5 votes vote down vote up
public IWeightingDB cloneDB(IContentDB contentDB) {
    TroveWeightingDB weightingDB = new TroveWeightingDB(contentDB);
    weightingDB._name = new String(_name);

    weightingDB._documentsWeights = new Vector<TIntDoubleHashMap>(
            _documentsWeights.size());
    for (int i = 0; i < _documentsWeights.size(); ++i)
        weightingDB._documentsWeights
                .add((TIntDoubleHashMap) _documentsWeights.get(i).clone());

    return weightingDB;
}
 
Example #19
Source File: TroveWeightingDBBuilder.java    From jatecs with GNU General Public License v3.0 5 votes vote down vote up
public void setDocumentFeatureWeight(int document, int feature,
                                     double weight) {
    if (_weightingDB.getContentDB().hasDocumentFeature(document, feature)) {
        while (document >= _weightingDB._documentsWeights.size())
            _weightingDB._documentsWeights.add(new TIntDoubleHashMap());
        _weightingDB._documentsWeights.get(document).put(feature, weight);
    }
}
 
Example #20
Source File: Clustering.java    From jatecs with GNU General Public License v3.0 5 votes vote down vote up
public static TIntDoubleHashMap computeDocumentCentroid(IIntIterator docs,
                                                        IIndex index) {
    TIntDoubleHashMap centroid = new TIntDoubleHashMap(index.getFeatureDB()
            .getFeaturesCount());

    int numDoc = 0;
    docs.begin();
    while (docs.hasNext()) {
        int docID = docs.next();
        IIntIterator feats = index.getContentDB()
                .getDocumentFeatures(docID);
        while (feats.hasNext()) {
            int featID = feats.next();

            centroid.put(
                    featID,
                    centroid.get(featID)
                            + index.getWeightingDB()
                            .getDocumentFeatureWeight(docID, featID));
        }

        numDoc++;
    }

    int keys[] = centroid.keys();
    for (int i = 0; i < keys.length; i++) {
        centroid.put(keys[i], centroid.get(keys[i]) / (double) numDoc);
    }

    return centroid;
}
 
Example #21
Source File: EuclideanDistance.java    From jatecs with GNU General Public License v3.0 5 votes vote down vote up
public double compute(TIntDoubleHashMap doc1, TIntDoubleHashMap doc2, IIntIterator features) {

        double l = 0;

        features.begin();
        while (features.hasNext()) {
            int featID = features.next();
            l += Math.pow(doc1.get(featID) - doc2.get(featID), 2);
        }

        l = Math.sqrt(l);

        return l;
    }
 
Example #22
Source File: EuclideanSquareDistance.java    From jatecs with GNU General Public License v3.0 5 votes vote down vote up
public double compute(TIntDoubleHashMap doc1, TIntDoubleHashMap doc2, IIntIterator features) {

		double dist = 0;

		features.begin();
		while(features.hasNext()){
			int featID = features.next();
			dist += Math.pow(doc1.get(featID)-doc2.get(featID), 2);
		}

		return dist;
	}
 
Example #23
Source File: Random.java    From jatecs with GNU General Public License v3.0 4 votes vote down vote up
@Override
public TIntDoubleHashMap getMacroTable() {
    return getTable();
}
 
Example #24
Source File: RocchioDataManager.java    From jatecs with GNU General Public License v3.0 4 votes vote down vote up
@Override
public IClassifier read(IStorageManager storageManager, String modelName) {
    if (storageManager == null)
        throw new NullPointerException("The storage manager is 'null'");
    if (modelName == null || modelName.isEmpty())
        throw new IllegalArgumentException("The model name is invalid");
    if (!storageManager.isOpen())
        throw new IllegalStateException("The storage manager is not open");

    RocchioClassifier c = new RocchioClassifier();

    try {
        String vc = modelName + storageManager.getPathSeparator()
                + "validCategories.db";
        DataInputStream valid_os = new DataInputStream(
                new BufferedInputStream(
                        storageManager.getInputStreamForResource(vc), 4096));

        int numCats = valid_os.readInt();

        RocchioClassifierCustomizer cust = (RocchioClassifierCustomizer) c
                .getRuntimeCustomizer();
        cust._ranges = new Hashtable<Short, ClassifierRange>(numCats);
        c.vectors = new TIntDoubleHashMap[numCats];

        for (short catID = 0; catID < numCats; catID++) {
            String fname = modelName + storageManager.getPathSeparator()
                    + catID + ".db";
            DataInputStream is = new DataInputStream(
                    new BufferedInputStream(storageManager
                            .getInputStreamForResource(fname), 4096));

            c.vectors[catID] = new TIntDoubleHashMap();

            int numFeatures = is.readInt();
            for (int i = 0; i < numFeatures; i++) {
                int featID = is.readInt();
                double w = is.readDouble();

                c.vectors[catID].put(featID, w);
            }

            is.close();

            fname = modelName + storageManager.getPathSeparator() + catID
                    + "_range.db";
            is = new DataInputStream(new BufferedInputStream(
                    storageManager.getInputStreamForResource(fname), 4096));
            ClassifierRange cr = new ClassifierRange();

            cr.border = is.readDouble();
            cr.maximum = is.readDouble();
            cr.minimum = is.readDouble();
            is.close();

            cust._ranges.put(catID, cr);
        }

        return c;
    } catch (Exception e) {
        throw new RuntimeException("Reading classifier data", e);
    }
}
 
Example #25
Source File: RocchioDataManager.java    From jatecs with GNU General Public License v3.0 4 votes vote down vote up
public IClassifier read(String modelDir) throws IOException {
    RocchioClassifier c = new RocchioClassifier();

    String vc = modelDir + Os.pathSeparator() + "validCategories.db";
    DataInputStream valid_os = new DataInputStream(new BufferedInputStream(
            new FileInputStream(vc), 4096));

    int numCats = valid_os.readInt();

    RocchioClassifierCustomizer cust = (RocchioClassifierCustomizer) c
            .getRuntimeCustomizer();
    cust._ranges = new Hashtable<Short, ClassifierRange>(numCats);
    c.vectors = new TIntDoubleHashMap[numCats];

    for (short catID = 0; catID < numCats; catID++) {
        String fname = modelDir + Os.pathSeparator() + catID + ".db";
        DataInputStream is = new DataInputStream(new BufferedInputStream(
                new FileInputStream(fname), 4096));

        c.vectors[catID] = new TIntDoubleHashMap();

        int numFeatures = is.readInt();
        for (int i = 0; i < numFeatures; i++) {
            int featID = is.readInt();
            double w = is.readDouble();

            c.vectors[catID].put(featID, w);
        }

        is.close();

        fname = modelDir + Os.pathSeparator() + catID + "_range.db";
        is = new DataInputStream(new BufferedInputStream(
                new FileInputStream(fname), 4096));
        ClassifierRange cr = new ClassifierRange();

        cr.border = is.readDouble();
        cr.maximum = is.readDouble();
        cr.minimum = is.readDouble();
        is.close();

        cust._ranges.put(catID, cr);
    }

    valid_os.close();
    return c;
}
 
Example #26
Source File: TroveWeightingDB.java    From jatecs with GNU General Public License v3.0 4 votes vote down vote up
public void removeFeatures(IIntIterator removedFeatures) {
    for (int i = 0; i < _documentsWeights.size(); ++i) {
        TIntDoubleHashMap weigs = _documentsWeights.get(i);
        TIntArrayList feats = new TIntArrayList(weigs.size());
        TDoubleArrayList weigths = new TDoubleArrayList(weigs.size());
        TIntDoubleIterator wit = weigs.iterator();
        while (wit.hasNext()) {
            wit.advance();
            feats.add(wit.key());
            weigths.add(wit.value());
        }
        int j = 0;
        int shift = 0;
        int feat;
        int rem;
        if (j < feats.size() && removedFeatures.hasNext()) {
            feat = feats.getQuick(j);
            rem = removedFeatures.next();

            while (true) {
                if (feat == rem) {
                    feats.remove(j);
                    weigths.remove(j);
                    if (j < feats.size() && removedFeatures.hasNext()) {
                        feat = feats.getQuick(j);
                        rem = removedFeatures.next();
                        ++shift;
                    } else
                        break;
                } else if (feat > rem) {
                    if (removedFeatures.hasNext()) {
                        rem = removedFeatures.next();
                        ++shift;
                    } else
                        break;
                } else {
                    feats.setQuick(j, feat - shift);
                    ++j;
                    if (j < feats.size())
                        feat = feats.getQuick(j);
                    else
                        break;
                }
            }
            ++shift;
        }
        while (j < feats.size()) {
            feats.setQuick(j, feats.getQuick(j) - shift);
            ++j;
        }

        weigs.clear();
        for (j = 0; j < feats.size(); ++j)
            weigs.put(feats.getQuick(j), weigths.getQuick(j));

        removedFeatures.begin();
    }
}
 
Example #27
Source File: ALpoolRank.java    From jatecs with GNU General Public License v3.0 4 votes vote down vote up
public TIntDoubleHashMap getRanking() {
    return rankingMap;
}
 
Example #28
Source File: ALpoolRank.java    From jatecs with GNU General Public License v3.0 4 votes vote down vote up
public ALpoolRank(ClassificationScoreDB confidenceUnlabelled, IIndex trainingSet) {
    super(confidenceUnlabelled, trainingSet);
    unlabelledSize = confidenceUnlabelled.getDocumentCount();
    rankingMap = new TIntDoubleHashMap(
            (int) (unlabelledSize + unlabelledSize * 0.25), (float) 0.75);
}
 
Example #29
Source File: SparseVector.java    From jatecs with GNU General Public License v3.0 4 votes vote down vote up
public SparseVector(int k) {
	_dim_value = new TIntDoubleHashMap();
	_k = k;
}
 
Example #30
Source File: ALAdaptive.java    From jatecs with GNU General Public License v3.0 4 votes vote down vote up
@Override
public TIntDoubleHashMap getMicroTable() {
    return getMacroTable();
}