gnu.trove.map.hash.TIntFloatHashMap Java Exaples

Source File: FuzzySetSimJoin.java From JedAIToolkit with Apache License 2.0

6 votes

/**
 * Find matches for a given set
 */
private TIntFloatHashMap search(int[][] querySet, int[][][] collection, float simThreshold,
        TIntObjectMap<TIntList>[] idx) {

    /* SIGNATURE GENERATION */
    TIntSet[] unflattenedSignature = computeUnflattenedSignature(querySet, simThreshold, idx);

    /* CANDIDATE SELECTION AND CHECK FILTER */
    TIntObjectMap<TIntFloatMap> checkFilterCandidates = applyCheckFilter(querySet, collection,
            unflattenedSignature, idx, simThreshold);

    /* NEAREST NEIGHBOR FILTER */
    TIntSet nnFilterCandidates = applyNNFilter(querySet, collection, checkFilterCandidates, simThreshold);

    /* VERIFICATION */
    TIntFloatHashMap matches = verifyCandidates(querySet, collection, nnFilterCandidates, simThreshold);

    return matches;
}

Source File: FuzzySetSimJoin.java From JedAIToolkit with Apache License 2.0

6 votes

/**
 * Computes the join between two already transformed and indexed collections
 *
 * @param collection1
 * @param collection2
 * @param simThreshold
 * @return
 */
HashMap<String, Float> join(int[][][] collection1, int[][][] collection2, float simThreshold) {
    final HashMap<String, Float> matchingPairs = new HashMap<>();

    /* CREATE INDEX */
    TIntObjectMap<TIntList>[] idx = buildSetInvertedIndex(collection2, tokenDict.size());

    /* EXECUTE THE JOIN ALGORITHM */
    for (int i = 0; i < collection1.length; i++) {
        TIntFloatHashMap matches = search(collection1[i], collection2, simThreshold, idx);
        for (int j : matches.keys()) {
            matchingPairs.put(i + "_" + j, matches.get(j));
        }
    }

    return matchingPairs;
}

Source File: KMeansWordCluster.java From fnlp with GNU Lesser General Public License v3.0

6 votes

private float distanceEuclidean(int n, HashSparseVector sv, float baseDistance) {
    HashSparseVector center = classCenter.get(n);
    int count = classCount.get(n);
    float dist = baseDistance / (count * count);
    TIntFloatHashMap data = center.data;
    TIntFloatIterator it = sv.data.iterator();
    while (it.hasNext()) {
        it.advance();
        int key = it.key();
        if (!data.containsKey(key)) {
            dist += it.value() * it.value();
        }
        else {
            float temp = data.get(key) / count;
            dist -= temp * temp;
            dist += (it.value() - temp) * (it.value() - temp);
        }
    }
    return dist;
}

Source File: KMeansWordCluster.java From fnlp with GNU Lesser General Public License v3.0

6 votes

private void updateBaseDist(int classid, HashSparseVector vector) {
    float base = baseDistList.get(classid);
    TIntFloatHashMap center = classCenter.get(classid).data;
    TIntFloatIterator it =  vector.data.iterator();
    while (it.hasNext()) {
        it.advance();
        if (!center.containsKey(it.key())) {
            base += it.value() * it.value();
        }
        else {
            float temp = center.get(it.key());
            base -= temp * temp;
            base += (it.value() - temp) * (it.value() - temp);
        }
    }
    baseDistList.set(classid, base);
}

Source File: SemSigUtilsTest.java From ADW with GNU General Public License v3.0

6 votes

@Test
public void testGetSortedIndices() 
{
           TIntFloatMap m = new TIntFloatHashMap();
           m.put(0, 1f);
           m.put(1, 10f);
           m.put(2, 5f);
           m.put(3, 2f);

           int[] sorted = SemSigUtils.getSortedIndices(m);
           assertEquals(4, sorted.length);
           assertEquals(1, sorted[0]);
           assertEquals(2, sorted[1]);
           assertEquals(3, sorted[2]);
           assertEquals(0, sorted[3]);
       }

Source File: WordCluster.java From fnlp with GNU Lesser General Public License v3.0

6 votes

private float getweight(int c1, int c2) {
	int max,min;
	if(c1<=c2){
		max = c2;
		min = c1;
	}else{
		max = c1;
		min = c2;
	}
	float w;
	TIntFloatHashMap map2 = wcc.get(min);
	if(map2==null){
		w = 0;
	}else
		w = map2.get(max);
	return w;
}

Source File: SemSigUtils.java From ADW with GNU General Public License v3.0

6 votes

/**
 * Normalizes the probability values in a vector so that to sum to 1.0
 * @param vector
 * @return
 */
public static TIntFloatMap normalizeVector(TIntFloatMap vector)
{
	float total = 0;

               TFloatIterator iter = vector.valueCollection().iterator();
               while (iter.hasNext())
                      total += iter.next();
               
               TIntFloatMap normalized = new TIntFloatHashMap(vector.size());
	
               TIntFloatIterator iter2 = vector.iterator();
               while (iter2.hasNext())
               {
                       iter2.advance();
                       normalized.put(iter2.key(), iter2.value() / total);
               }		
	return normalized;
}

Source File: MyArrays.java From fnlp with GNU Lesser General Public License v3.0

6 votes

/**
 * 得到总能量值大于thres的元素对应的下标
 * 
 * @param data 稀疏向量
 * @param thres
 * @return 元素下标 int[][] 第一列表示大于阈值的元素 第二列表示小于阈值的元素
 */
public static int[][] getTop(TIntFloatHashMap data, float thres) {
	int[] idx = sort(data);
	int i;
	float total = 0;
	float[] cp = new float[idx.length];
	for (i = idx.length; i-- > 0;) {
		cp[i] = (float) Math.pow(data.get(idx[i]), 2);
		total += cp[i];
	}

	float ratio = 0;
	for (i = 0; i < idx.length; i++) {
		ratio += cp[i] / total;
		if (ratio > thres)
			break;
	}
	int[][] a = new int[2][];
	a[0] = Arrays.copyOfRange(idx, 0, i);
	a[1] = Arrays.copyOfRange(idx, i, idx.length);
	return a;
}

Source File: MyCollection.java From fnlp with GNU Lesser General Public License v3.0

6 votes

/**
 * 由大到小排序
 * @param tmap
 * @return 数组下标
 */
public static int[] sort(TIntFloatHashMap tmap) {
	HashMap<Integer, Float> map = new HashMap<Integer, Float>();

	TIntFloatIterator it = tmap.iterator();
	while (it.hasNext()) {
		it.advance();
		int id = it.key();
		float val = it.value();
		map.put(id, Math.abs(val));
	}
	it = null;

	List<Entry> list = sort(map);
	int[] idx = new int[list.size()];
	Iterator<Entry> it1 = list.iterator();
	int i=0;
	while (it1.hasNext()) {
		Entry entry = it1.next();
		idx[i++] = (Integer) entry.getKey();
	}
	return idx;
}

Source File: MyHashSparseArrays.java From fnlp with GNU Lesser General Public License v3.0

6 votes

/**
 * 得到总能量值大于thres的元素对应的下标
 * 
 * @param data 稀疏向量
 * @param thres
 * @return 元素下标 int[][] 第一列表示大于阈值的元素 第二列表示小于阈值的元素
 */
public static int[][] getTop(TIntFloatHashMap data, float thres) {
	int[] idx = sort(data);
	int i;
	float total = 0;
	float[] cp = new float[idx.length];
	for (i = idx.length; i-- > 0;) {
		cp[i] = (float) Math.pow(data.get(idx[i]), 2);
		total += cp[i];
	}

	float ratio = 0;
	for (i = 0; i < idx.length; i++) {
		ratio += cp[i] / total;
		if (ratio > thres)
			break;
	}
	int[][] a = new int[2][];
	a[0] = Arrays.copyOfRange(idx, 0, i);
	a[1] = Arrays.copyOfRange(idx, i, idx.length);
	return a;
}

Source File: SemSigUtilsTest.java From ADW with GNU General Public License v3.0

5 votes

@Test
public void testTruncateVectorNormalized() 
{
           TIntFloatMap m = new TIntFloatHashMap();
           m.put(0, 1f);
           m.put(1, 10f);
           m.put(2, 5f);
           m.put(3, 2f);

           TIntFloatMap truncated = SemSigUtils.truncateVector(m, false, 2, true);
           assertEquals(2, truncated.size());
           assertEquals(10f / 15f, truncated.get(1), 0.1f);
           assertEquals(5f / 15f, truncated.get(2), 0.1f);            
       }

Source File: SemSigUtilsTest.java From ADW with GNU General Public License v3.0

5 votes

@Test
public void testTruncateVector() 
{
           TIntFloatMap m = new TIntFloatHashMap();
           m.put(0, 1f);
           m.put(1, 10f);
           m.put(2, 5f);
           m.put(3, 2f);

           TIntFloatMap truncated = SemSigUtils.truncateVector(m, false, 2, false);
           assertEquals(2, truncated.size());
           assertEquals(10f, truncated.get(1), 0.1f);
           assertEquals(5f, truncated.get(2), 0.1f);
       }

Source File: WordCluster.java From fnlp with GNU Lesser General Public License v3.0

5 votes

private float getProb(int c1, int c2) {
	float p;
	TIntFloatHashMap map = pcc.get(c1);
	if(map == null){
		p = 0f;
	}else{
		p = pcc.get(c1).get(c2);						
	}
	return p;
}

Source File: WordCluster.java From fnlp with GNU Lesser General Public License v3.0

5 votes

/**
 * 一次性统计概率，节约时间
 */
private void statisticProb() {
	System.out.println("统计概率");
	TIntFloatIterator it = wordProb.iterator();
	while(it.hasNext()){
		it.advance();
		float v = it.value()/totalword;
		it.setValue(v);
		int key = it.key();
		if(key<0)
			continue;
		Cluster cluster = new Cluster(key,v,alpahbet.lookupString(key));
		clusters.put(key, cluster);
	}

	TIntObjectIterator<TIntFloatHashMap> it1 = pcc.iterator();
	while(it1.hasNext()){
		it1.advance();
		TIntFloatHashMap map = it1.value();
		TIntFloatIterator it2 = map.iterator();
		while(it2.hasNext()){
			it2.advance();
			it2.setValue(it2.value()/totalword);
		}
	}

}

Source File: MyArrays.java From fnlp with GNU Lesser General Public License v3.0

4 votes

/**
 * 移除能量值小于一定阈值的项
 * @return 
 * 
 */
public static int[] trim(TIntFloatHashMap data, float v) {
	int[][] idx = getTop(data, v);
	setZero(data, idx[1]);
	return idx[0];
}

Source File: MyHashSparseArrays.java From fnlp with GNU Lesser General Public License v3.0

4 votes

/**
 * 移除能量值小于一定阈值的项
 * @return 
 * 
 */
public static int[] trim(TIntFloatHashMap data, float v) {
	int[][] idx = getTop(data, v);
	setZero(data, idx[1]);
	return idx[0];
}

Source File: SemSigProcess.java From ADW with GNU General Public License v3.0

4 votes

/**
 * Assumes that the SemSigs are already sorted and normalized
 * @param path
 * @param size
 * @param warnings
 * @param normalizationLKB
 * @return
 */
public SemSig getCustomSemSigFromCompressed(String path, int size, boolean warnings, LKB normalizationLKB)
{
	if(size == 0 || size > MAX_VECTOR_SIZE) 
		size = MAX_VECTOR_SIZE;
	
	SemSig vector = new SemSig();
	String offset = GeneralUtils.getOffsetFromPath(path);
	vector.setOffset(offset);
	
	TIntFloatMap map = new TIntFloatHashMap(size); 
	
	if(!new File(path).exists())
	{
		if (warnings)
			log.info("[WARNING: "+path+ " does not exist]");
		
		return vector;
	}
	
	try
	{
		BufferedReader br = new BufferedReader(new FileReader(path));

		float prob;
		float lastProb = 0.0f;
		int lineCounter = 1;
		
		while(br.ready())
		{
			String line = br.readLine();
			if(line.startsWith("!!")) continue;
			
			String[] lineSplit = line.split("\t");
			
			//keeping the IDs
			//String off = IDtoOffsetMap.get(lineSplit[0]);
			int off = Integer.parseInt(lineSplit[0]);
			
			if(lineSplit.length == 1)
			{
				prob = lastProb;
			}
			else
			{
				prob = Float.parseFloat(lineSplit[1]);
				lastProb = prob;
			}
			
			map.put(off, prob);
			
			if(lineCounter++ >= size)
				break;
		}
		
		br.close();
		
	}
	catch(Exception e)
	{
		e.printStackTrace();
	}

               
	if(size != MAX_VECTOR_SIZE)
                       map = SemSigUtils.truncateVector(map, true, size, true);

	vector.setVector(map);
	
	return vector;
}

Source File: WeightedOverlapTest.java From ADW with GNU General Public License v3.0

4 votes

@Test
public void testGetSortedIndices() 
{
   	WeightedOverlap WO = new WeightedOverlap();
   	
   	TIntFloatMap map1 = new TIntFloatHashMap();
   	TIntFloatMap map2 = new TIntFloatHashMap();
   	TIntFloatMap map3 = new TIntFloatHashMap();
   	TIntFloatMap map4 = new TIntFloatHashMap();
   	TIntFloatMap map5 = new TIntFloatHashMap();
   	
   	map1.put(1, 1f);
   	map1.put(2, 2f);
   	map1.put(3, 3f);
   	map1.put(4, 4f);
   	map1.put(5, 5f);
   	map1.put(6, 6f);
   	
   	map2.putAll(map1);
   	
   	map3.put(4, 4f);
   	map3.put(5, 5f);
   	map3.put(6, 6f);
   	map3.put(7, 1f);
   	map3.put(8, 2f);
   	map3.put(9, 3f);
   	
   	map4.put(1, 6f);
   	map4.put(4, 5f);
   	map4.put(2, 4f);
   	map4.put(5, 3f);
   	map4.put(3, 2f);
   	map4.put(6, 1f);
   	
   	map5.put(7, 6f);
   	map5.put(8, 5f);
   	
   	double score1 = WO.compare(map1, map2, true);
   	double score2 = WO.compare(map1, map2, true);
   	double score3 = WO.compare(map1, map4, true);
   	double score4 = WO.compare(map1, map5, true);
   	
       assertEquals(1, score1, 0.01);
       assertEquals(1, score2, 0.01);
       assertEquals(0.725, score3, 0.01);
       assertEquals(0, score4, 0.0001);
       
       
}

Source File: HashSparseVector.java From fnlp with GNU Lesser General Public License v3.0

4 votes

public HashSparseVector(HashSparseVector v) {
	data = new TIntFloatHashMap(v.data);
}

Source File: MyHashSparseArrays.java From fnlp with GNU Lesser General Public License v3.0

3 votes

/**
 * 对部分下标的元素赋零
 * 
 * @param data
 *            数组
 * @param idx
 *            赋值下标
 */
public static void setZero(TIntFloatHashMap data, int[] idx) {
	for(int i = 0; i < idx.length; i++)	{
		if (data.containsKey(idx[i]))	{
			data.remove(idx[i]);
		}
	}
}

Source File: MyArrays.java From fnlp with GNU Lesser General Public License v3.0

3 votes

/**
 * 对部分下标的元素赋零
 * 
 * @param data
 *            数组
 * @param idx
 *            赋值下标
 */
public static void setZero(TIntFloatHashMap data, int[] idx) {
	for(int i = 0; i < idx.length; i++)	{
		if (data.containsKey(idx[i]))	{
			data.remove(idx[i]);
		}
	}
}

Source File: MyHashSparseArrays.java From fnlp with GNU Lesser General Public License v3.0

votes

/**
 * 对数组的绝对值由大到小排序，返回调整后元素对于的原始下标
 * 
 * @param data
 *            待排序数组
 * @return 原始下标
 */
public static int[] sort(TIntFloatHashMap data) {
	

	return MyCollection.sort(data);
}

Source File: MyArrays.java From fnlp with GNU Lesser General Public License v3.0

votes

/**
 * 对数组的绝对值由大到小排序，返回调整后元素对于的原始下标
 * 
 * @param data
 *            待排序数组
 * @return 原始下标
 */
public static int[] sort(TIntFloatHashMap data) {
	

	return MyCollection.sort(data);
}

gnu.trove.map.hash.TIntFloatHashMap Java Examples