gnu.trove.map.hash.TIntFloatHashMap Java Examples
The following examples show how to use
gnu.trove.map.hash.TIntFloatHashMap.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FuzzySetSimJoin.java From JedAIToolkit with Apache License 2.0 | 6 votes |
/** * Find matches for a given set */ private TIntFloatHashMap search(int[][] querySet, int[][][] collection, float simThreshold, TIntObjectMap<TIntList>[] idx) { /* SIGNATURE GENERATION */ TIntSet[] unflattenedSignature = computeUnflattenedSignature(querySet, simThreshold, idx); /* CANDIDATE SELECTION AND CHECK FILTER */ TIntObjectMap<TIntFloatMap> checkFilterCandidates = applyCheckFilter(querySet, collection, unflattenedSignature, idx, simThreshold); /* NEAREST NEIGHBOR FILTER */ TIntSet nnFilterCandidates = applyNNFilter(querySet, collection, checkFilterCandidates, simThreshold); /* VERIFICATION */ TIntFloatHashMap matches = verifyCandidates(querySet, collection, nnFilterCandidates, simThreshold); return matches; }
Example #2
Source File: FuzzySetSimJoin.java From JedAIToolkit with Apache License 2.0 | 6 votes |
/** * Computes the join between two already transformed and indexed collections * * @param collection1 * @param collection2 * @param simThreshold * @return */ HashMap<String, Float> join(int[][][] collection1, int[][][] collection2, float simThreshold) { final HashMap<String, Float> matchingPairs = new HashMap<>(); /* CREATE INDEX */ TIntObjectMap<TIntList>[] idx = buildSetInvertedIndex(collection2, tokenDict.size()); /* EXECUTE THE JOIN ALGORITHM */ for (int i = 0; i < collection1.length; i++) { TIntFloatHashMap matches = search(collection1[i], collection2, simThreshold, idx); for (int j : matches.keys()) { matchingPairs.put(i + "_" + j, matches.get(j)); } } return matchingPairs; }
Example #3
Source File: KMeansWordCluster.java From fnlp with GNU Lesser General Public License v3.0 | 6 votes |
private float distanceEuclidean(int n, HashSparseVector sv, float baseDistance) { HashSparseVector center = classCenter.get(n); int count = classCount.get(n); float dist = baseDistance / (count * count); TIntFloatHashMap data = center.data; TIntFloatIterator it = sv.data.iterator(); while (it.hasNext()) { it.advance(); int key = it.key(); if (!data.containsKey(key)) { dist += it.value() * it.value(); } else { float temp = data.get(key) / count; dist -= temp * temp; dist += (it.value() - temp) * (it.value() - temp); } } return dist; }
Example #4
Source File: KMeansWordCluster.java From fnlp with GNU Lesser General Public License v3.0 | 6 votes |
private void updateBaseDist(int classid, HashSparseVector vector) { float base = baseDistList.get(classid); TIntFloatHashMap center = classCenter.get(classid).data; TIntFloatIterator it = vector.data.iterator(); while (it.hasNext()) { it.advance(); if (!center.containsKey(it.key())) { base += it.value() * it.value(); } else { float temp = center.get(it.key()); base -= temp * temp; base += (it.value() - temp) * (it.value() - temp); } } baseDistList.set(classid, base); }
Example #5
Source File: SemSigUtilsTest.java From ADW with GNU General Public License v3.0 | 6 votes |
@Test public void testGetSortedIndices() { TIntFloatMap m = new TIntFloatHashMap(); m.put(0, 1f); m.put(1, 10f); m.put(2, 5f); m.put(3, 2f); int[] sorted = SemSigUtils.getSortedIndices(m); assertEquals(4, sorted.length); assertEquals(1, sorted[0]); assertEquals(2, sorted[1]); assertEquals(3, sorted[2]); assertEquals(0, sorted[3]); }
Example #6
Source File: WordCluster.java From fnlp with GNU Lesser General Public License v3.0 | 6 votes |
private float getweight(int c1, int c2) { int max,min; if(c1<=c2){ max = c2; min = c1; }else{ max = c1; min = c2; } float w; TIntFloatHashMap map2 = wcc.get(min); if(map2==null){ w = 0; }else w = map2.get(max); return w; }
Example #7
Source File: SemSigUtils.java From ADW with GNU General Public License v3.0 | 6 votes |
/** * Normalizes the probability values in a vector so that to sum to 1.0 * @param vector * @return */ public static TIntFloatMap normalizeVector(TIntFloatMap vector) { float total = 0; TFloatIterator iter = vector.valueCollection().iterator(); while (iter.hasNext()) total += iter.next(); TIntFloatMap normalized = new TIntFloatHashMap(vector.size()); TIntFloatIterator iter2 = vector.iterator(); while (iter2.hasNext()) { iter2.advance(); normalized.put(iter2.key(), iter2.value() / total); } return normalized; }
Example #8
Source File: MyArrays.java From fnlp with GNU Lesser General Public License v3.0 | 6 votes |
/** * 得到总能量值大于thres的元素对应的下标 * * @param data 稀疏向量 * @param thres * @return 元素下标 int[][] 第一列表示大于阈值的元素 第二列表示小于阈值的元素 */ public static int[][] getTop(TIntFloatHashMap data, float thres) { int[] idx = sort(data); int i; float total = 0; float[] cp = new float[idx.length]; for (i = idx.length; i-- > 0;) { cp[i] = (float) Math.pow(data.get(idx[i]), 2); total += cp[i]; } float ratio = 0; for (i = 0; i < idx.length; i++) { ratio += cp[i] / total; if (ratio > thres) break; } int[][] a = new int[2][]; a[0] = Arrays.copyOfRange(idx, 0, i); a[1] = Arrays.copyOfRange(idx, i, idx.length); return a; }
Example #9
Source File: MyCollection.java From fnlp with GNU Lesser General Public License v3.0 | 6 votes |
/** * 由大到小排序 * @param tmap * @return 数组下标 */ public static int[] sort(TIntFloatHashMap tmap) { HashMap<Integer, Float> map = new HashMap<Integer, Float>(); TIntFloatIterator it = tmap.iterator(); while (it.hasNext()) { it.advance(); int id = it.key(); float val = it.value(); map.put(id, Math.abs(val)); } it = null; List<Entry> list = sort(map); int[] idx = new int[list.size()]; Iterator<Entry> it1 = list.iterator(); int i=0; while (it1.hasNext()) { Entry entry = it1.next(); idx[i++] = (Integer) entry.getKey(); } return idx; }
Example #10
Source File: MyHashSparseArrays.java From fnlp with GNU Lesser General Public License v3.0 | 6 votes |
/** * 得到总能量值大于thres的元素对应的下标 * * @param data 稀疏向量 * @param thres * @return 元素下标 int[][] 第一列表示大于阈值的元素 第二列表示小于阈值的元素 */ public static int[][] getTop(TIntFloatHashMap data, float thres) { int[] idx = sort(data); int i; float total = 0; float[] cp = new float[idx.length]; for (i = idx.length; i-- > 0;) { cp[i] = (float) Math.pow(data.get(idx[i]), 2); total += cp[i]; } float ratio = 0; for (i = 0; i < idx.length; i++) { ratio += cp[i] / total; if (ratio > thres) break; } int[][] a = new int[2][]; a[0] = Arrays.copyOfRange(idx, 0, i); a[1] = Arrays.copyOfRange(idx, i, idx.length); return a; }
Example #11
Source File: SemSigUtilsTest.java From ADW with GNU General Public License v3.0 | 5 votes |
@Test public void testTruncateVectorNormalized() { TIntFloatMap m = new TIntFloatHashMap(); m.put(0, 1f); m.put(1, 10f); m.put(2, 5f); m.put(3, 2f); TIntFloatMap truncated = SemSigUtils.truncateVector(m, false, 2, true); assertEquals(2, truncated.size()); assertEquals(10f / 15f, truncated.get(1), 0.1f); assertEquals(5f / 15f, truncated.get(2), 0.1f); }
Example #12
Source File: SemSigUtilsTest.java From ADW with GNU General Public License v3.0 | 5 votes |
@Test public void testTruncateVector() { TIntFloatMap m = new TIntFloatHashMap(); m.put(0, 1f); m.put(1, 10f); m.put(2, 5f); m.put(3, 2f); TIntFloatMap truncated = SemSigUtils.truncateVector(m, false, 2, false); assertEquals(2, truncated.size()); assertEquals(10f, truncated.get(1), 0.1f); assertEquals(5f, truncated.get(2), 0.1f); }
Example #13
Source File: WordCluster.java From fnlp with GNU Lesser General Public License v3.0 | 5 votes |
private float getProb(int c1, int c2) { float p; TIntFloatHashMap map = pcc.get(c1); if(map == null){ p = 0f; }else{ p = pcc.get(c1).get(c2); } return p; }
Example #14
Source File: WordCluster.java From fnlp with GNU Lesser General Public License v3.0 | 5 votes |
/** * 一次性统计概率,节约时间 */ private void statisticProb() { System.out.println("统计概率"); TIntFloatIterator it = wordProb.iterator(); while(it.hasNext()){ it.advance(); float v = it.value()/totalword; it.setValue(v); int key = it.key(); if(key<0) continue; Cluster cluster = new Cluster(key,v,alpahbet.lookupString(key)); clusters.put(key, cluster); } TIntObjectIterator<TIntFloatHashMap> it1 = pcc.iterator(); while(it1.hasNext()){ it1.advance(); TIntFloatHashMap map = it1.value(); TIntFloatIterator it2 = map.iterator(); while(it2.hasNext()){ it2.advance(); it2.setValue(it2.value()/totalword); } } }
Example #15
Source File: MyArrays.java From fnlp with GNU Lesser General Public License v3.0 | 4 votes |
/** * 移除能量值小于一定阈值的项 * @return * */ public static int[] trim(TIntFloatHashMap data, float v) { int[][] idx = getTop(data, v); setZero(data, idx[1]); return idx[0]; }
Example #16
Source File: MyHashSparseArrays.java From fnlp with GNU Lesser General Public License v3.0 | 4 votes |
/** * 移除能量值小于一定阈值的项 * @return * */ public static int[] trim(TIntFloatHashMap data, float v) { int[][] idx = getTop(data, v); setZero(data, idx[1]); return idx[0]; }
Example #17
Source File: SemSigProcess.java From ADW with GNU General Public License v3.0 | 4 votes |
/** * Assumes that the SemSigs are already sorted and normalized * @param path * @param size * @param warnings * @param normalizationLKB * @return */ public SemSig getCustomSemSigFromCompressed(String path, int size, boolean warnings, LKB normalizationLKB) { if(size == 0 || size > MAX_VECTOR_SIZE) size = MAX_VECTOR_SIZE; SemSig vector = new SemSig(); String offset = GeneralUtils.getOffsetFromPath(path); vector.setOffset(offset); TIntFloatMap map = new TIntFloatHashMap(size); if(!new File(path).exists()) { if (warnings) log.info("[WARNING: "+path+ " does not exist]"); return vector; } try { BufferedReader br = new BufferedReader(new FileReader(path)); float prob; float lastProb = 0.0f; int lineCounter = 1; while(br.ready()) { String line = br.readLine(); if(line.startsWith("!!")) continue; String[] lineSplit = line.split("\t"); //keeping the IDs //String off = IDtoOffsetMap.get(lineSplit[0]); int off = Integer.parseInt(lineSplit[0]); if(lineSplit.length == 1) { prob = lastProb; } else { prob = Float.parseFloat(lineSplit[1]); lastProb = prob; } map.put(off, prob); if(lineCounter++ >= size) break; } br.close(); } catch(Exception e) { e.printStackTrace(); } if(size != MAX_VECTOR_SIZE) map = SemSigUtils.truncateVector(map, true, size, true); vector.setVector(map); return vector; }
Example #18
Source File: WeightedOverlapTest.java From ADW with GNU General Public License v3.0 | 4 votes |
@Test public void testGetSortedIndices() { WeightedOverlap WO = new WeightedOverlap(); TIntFloatMap map1 = new TIntFloatHashMap(); TIntFloatMap map2 = new TIntFloatHashMap(); TIntFloatMap map3 = new TIntFloatHashMap(); TIntFloatMap map4 = new TIntFloatHashMap(); TIntFloatMap map5 = new TIntFloatHashMap(); map1.put(1, 1f); map1.put(2, 2f); map1.put(3, 3f); map1.put(4, 4f); map1.put(5, 5f); map1.put(6, 6f); map2.putAll(map1); map3.put(4, 4f); map3.put(5, 5f); map3.put(6, 6f); map3.put(7, 1f); map3.put(8, 2f); map3.put(9, 3f); map4.put(1, 6f); map4.put(4, 5f); map4.put(2, 4f); map4.put(5, 3f); map4.put(3, 2f); map4.put(6, 1f); map5.put(7, 6f); map5.put(8, 5f); double score1 = WO.compare(map1, map2, true); double score2 = WO.compare(map1, map2, true); double score3 = WO.compare(map1, map4, true); double score4 = WO.compare(map1, map5, true); assertEquals(1, score1, 0.01); assertEquals(1, score2, 0.01); assertEquals(0.725, score3, 0.01); assertEquals(0, score4, 0.0001); }
Example #19
Source File: HashSparseVector.java From fnlp with GNU Lesser General Public License v3.0 | 4 votes |
public HashSparseVector(HashSparseVector v) { data = new TIntFloatHashMap(v.data); }
Example #20
Source File: MyHashSparseArrays.java From fnlp with GNU Lesser General Public License v3.0 | 3 votes |
/** * 对部分下标的元素赋零 * * @param data * 数组 * @param idx * 赋值下标 */ public static void setZero(TIntFloatHashMap data, int[] idx) { for(int i = 0; i < idx.length; i++) { if (data.containsKey(idx[i])) { data.remove(idx[i]); } } }
Example #21
Source File: MyArrays.java From fnlp with GNU Lesser General Public License v3.0 | 3 votes |
/** * 对部分下标的元素赋零 * * @param data * 数组 * @param idx * 赋值下标 */ public static void setZero(TIntFloatHashMap data, int[] idx) { for(int i = 0; i < idx.length; i++) { if (data.containsKey(idx[i])) { data.remove(idx[i]); } } }
Example #22
Source File: MyHashSparseArrays.java From fnlp with GNU Lesser General Public License v3.0 | votes |
/** * 对数组的绝对值由大到小排序,返回调整后元素对于的原始下标 * * @param data * 待排序数组 * @return 原始下标 */ public static int[] sort(TIntFloatHashMap data) { return MyCollection.sort(data); }
Example #23
Source File: MyArrays.java From fnlp with GNU Lesser General Public License v3.0 | votes |
/** * 对数组的绝对值由大到小排序,返回调整后元素对于的原始下标 * * @param data * 待排序数组 * @return 原始下标 */ public static int[] sort(TIntFloatHashMap data) { return MyCollection.sort(data); }