Java Code Examples for gnu.trove.map.hash.TIntObjectHashMap#iterator()
The following examples show how to use
gnu.trove.map.hash.TIntObjectHashMap#iterator() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: EntityLookup.java From ambiverse-nlu with Apache License 2.0 | 6 votes |
/** * Filters the entity candidates against the given list of types * * @param entities Entities to filter' * @param filteringTypes Set of types to filter the entities against * @return filtered entities */ private Entities filterEntitiesByType(Entities entities, Set<Type> filteringTypes) throws EntityLinkingDataAccessException { if (filteringTypes == null) { return entities; } Entities filteredEntities = new Entities(); TIntObjectHashMap<Set<Type>> entitiesTypes = DataAccess.getTypes(entities); for (TIntObjectIterator<Set<Type>> itr = entitiesTypes.iterator(); itr.hasNext(); ) { itr.advance(); int id = itr.key(); Set<Type> entityTypes = itr.value(); for (Type t : entityTypes) { if (filteringTypes.contains(t)) { filteredEntities.add(entities.getEntityById(id)); break; } } } return filteredEntities; }
Example 2
Source File: InlinkOverlapEntityEntitySimilarity.java From ambiverse-nlu with Apache License 2.0 | 6 votes |
private void setupEntities(Entities entities) throws Exception { if (entities.size() == 0) { logger.debug("Skipping initialization of InlinkEntityEntitySimilarity for " + entities.size() + " entities"); return; } logger.debug("Initializing InlinkEntityEntitySimilarity for " + entities.size() + " entities"); entity2vector = new TIntObjectHashMap<>(); TIntObjectHashMap<int[]> entityInlinks = DataAccess.getInlinkNeighbors(entities); for (TIntObjectIterator<int[]> itr = entityInlinks.iterator(); itr.hasNext(); ) { itr.advance(); int entity = itr.key(); int[] inLinks = itr.value(); RoaringBitmap bs = new RoaringBitmap(); for (int l : inLinks) { bs.add(l); } entity2vector.put(entity, bs); } logger.debug("Done initializing InlinkEntityEntitySimilarity"); }
Example 3
Source File: MilneWittenEntityEntitySimilarity.java From ambiverse-nlu with Apache License 2.0 | 6 votes |
private void setupEntities(Entities entities) throws Exception { logger.debug("Initializing MilneWittenEntityEntitySimilarity for " + entities.size() + " entities"); collectionSize = DataAccess.getCollectionSize(); TIntObjectHashMap<int[]> entityInlinks = DataAccess.getInlinkNeighbors(entities); // inlinks are assumed to be pre-sorted. entity2vector = new TIntObjectHashMap<>(); for (TIntObjectIterator<int[]> itr = entityInlinks.iterator(); itr.hasNext(); ) { itr.advance(); int entity = itr.key(); int[] inLinks = itr.value(); RoaringBitmap bs = new RoaringBitmap(); for (int l : inLinks) { bs.add(l); } entity2vector.put(entity, bs); } logger.debug("Done initializing MilneWittenEntityEntitySimilarity for " + entities.size() + " entities"); }
Example 4
Source File: YagoEntityKeyphraseCooccurrenceDataProviderIterator.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
public YagoEntityKeyphraseCooccurrenceDataProviderIterator(TIntObjectHashMap<TIntIntHashMap> superdocKeyphraseCounts) { entitiesIterator = superdocKeyphraseCounts.iterator(); if (entitiesIterator.hasNext()) { entitiesIterator.advance(); currentEntityKeyphrasesIterator = entitiesIterator.value().iterator(); } else { currentEntityKeyphrasesIterator = null; } }
Example 5
Source File: DataAccess.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
public static Entities getAidaEntitiesForInternalIds(int[] internalIds) throws EntityLinkingDataAccessException { TIntObjectHashMap<KBIdentifiedEntity> kbEntities = DataAccess.getKnowlegebaseEntitiesForInternalIds(internalIds); Entities entities = new Entities(); for (TIntObjectIterator<KBIdentifiedEntity> itr = kbEntities.iterator(); itr.hasNext(); ) { itr.advance(); entities.add(new Entity(itr.value(), itr.key())); } return entities; }
Example 6
Source File: DataAccess.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
/** * Computes all entity occurrence probabilities based on their incoming links. * * @return Map of Entity->Probability. * @throws EntityLinkingDataAccessException */ public static TIntDoubleHashMap getAllEntityProbabilities() throws EntityLinkingDataAccessException { TIntObjectHashMap<int[]> entityInlinks = getAllInlinks(); TIntDoubleHashMap entityProbabilities = new TIntDoubleHashMap(entityInlinks.size(), 0.5f); // Get the total number of links. long totalLinkCount = 0; TIntObjectIterator<int[]> itr = entityInlinks.iterator(); while (itr.hasNext()) { itr.advance(); totalLinkCount += itr.value().length; } // Derive probabilities from counts. itr = entityInlinks.iterator(); while (itr.hasNext()) { itr.advance(); double probability = (double) itr.value().length / (double) totalLinkCount; entityProbabilities.put(itr.key(), probability); } return entityProbabilities; }
Example 7
Source File: DataAccessSQLCache.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
private synchronized void addToEntityKeyphrasesCache(String querySignature, TIntObjectHashMap<List<EntityKeyphraseData>> entityKeyphrases) { CachingHashMap<Integer, List<EntityKeyphraseData>> queryCache = entityKeyphrasesCaches.get(querySignature); if (queryCache == null) { int maxEntities = EntityLinkingConfig.getAsInt(EntityLinkingConfig.ENTITIES_CACHE_SIZE); queryCache = new CachingHashMap<Integer, List<EntityKeyphraseData>>(maxEntities); entityKeyphrasesCaches.put(querySignature, queryCache); } for (TIntObjectIterator<List<EntityKeyphraseData>> itr = entityKeyphrases.iterator(); itr.hasNext(); ) { itr.advance(); int entityId = itr.key(); List<EntityKeyphraseData> keyphrases = itr.value(); queryCache.put(entityId, keyphrases); } }
Example 8
Source File: DataAccessSQLCache.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
private synchronized void addToEntityKeywordsCache(String querySignature, TIntObjectHashMap<List<EntityKeywordsData>> entityKeywords) { CachingHashMap<Integer, List<EntityKeywordsData>> queryCache = entityKeywordsCaches.get(querySignature); if (queryCache == null) { int maxEntities = EntityLinkingConfig.getAsInt(EntityLinkingConfig.ENTITIES_CACHE_SIZE); queryCache = new CachingHashMap<Integer, List<EntityKeywordsData>>(maxEntities); entityKeywordsCaches.put(querySignature, queryCache); } for (TIntObjectIterator<List<EntityKeywordsData>> itr = entityKeywords.iterator(); itr.hasNext(); ) { itr.advance(); int entityId = itr.key(); List<EntityKeywordsData> keyphrases = itr.value(); queryCache.put(entityId, keyphrases); } }
Example 9
Source File: DataAccessKeyValueStore.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
@Override //TODO: language not used public Map<String, int[]> getCategoryIdsForMentions(Set<String> mentions, Language language, boolean isNamedEntity) throws EntityLinkingDataAccessException { Map<String, Entities> entities = getEntitiesForMentions(mentions, 1.0, 0, isNamedEntity); Map<String, Set<Integer>> mentionCategories = new HashMap<>(entities.size()); for (Entry<String, Entities> entry:entities.entrySet()) { Set<Integer> types = mentionCategories.get(entry.getKey()); if (types == null) { types = new HashSet<>(); } List<Integer> entityIds = new ArrayList<>(); for (Entity entity:entry.getValue()) { entityIds.add(entity.getId()); } TIntObjectHashMap<int[]> categories = getCategoryIdsForEntitiesIds(ArrayUtils.toPrimitive(entityIds.toArray(new Integer[0]))); TIntObjectIterator<int[]> it = categories.iterator(); while (it.hasNext()) { types.addAll(Arrays.asList(ArrayUtils.toObject(it.value()))); } } Map<String, int[]> ret = new HashMap<>(); for (String key:mentionCategories.keySet()) { Integer[] temp = mentionCategories.get(key).toArray(new Integer[0]); ret.put(key, ArrayUtils.toPrimitive(temp)); } return ret; }
Example 10
Source File: WikiCorpusTask.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
private static Map<Integer, Set<Integer>> computeEntityOutlinks() throws EntityLinkingDataAccessException, IOException, SQLException { Map<Integer, Set<Integer>> inLinkOutLinkMap = new HashMap<>(); String file_name = EntityLinkingManager.getAidaDbIdentifierLight() + "_" + ENTITY_OUTLINK_CACHE_JSON; if (Files.exists(Paths.get(file_name))) { logger.info("Loading " + file_name + " from cache"); Map<Integer, Set<Integer>> cache = new GsonBuilder().enableComplexMapKeySerialization().create() .fromJson(new JsonReader(new FileReader(file_name)), new TypeToken<Map<Integer, Set<Integer>>>() { }.getType()); if (cache != null) { inLinkOutLinkMap.putAll(cache); return inLinkOutLinkMap; } } logger.info("Computing " + ENTITY_OUTLINK_CACHE_JSON); TIntObjectHashMap<int[]> inlinkNeighbors = DataAccess.getInlinkNeighbors(DataAccess.getAllEntities()); TIntObjectIterator<int[]> iterator = inlinkNeighbors.iterator(); while (iterator.hasNext()) { iterator.advance(); int outEntity = iterator.key(); int[] inEntities = iterator.value(); for (int inEntity : inEntities) { if (!inLinkOutLinkMap.containsKey(inEntity)) { inLinkOutLinkMap.put(inEntity, new HashSet<>()); } Set<Integer> outLinks = inLinkOutLinkMap.get(inEntity); outLinks.add(outEntity); } } if (!inLinkOutLinkMap.isEmpty()) { Gson gson = new Gson(); Files.write(Paths.get(file_name), gson.toJson(inLinkOutLinkMap).getBytes()); } return inLinkOutLinkMap; }
Example 11
Source File: DataAccessCassandraIntegrationTest.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
@Test public void testGetTypes() throws EntityLinkingDataAccessException { Entities entities = DataAccess.getEntitiesForMention("Merkel", 1.0, 0, true); TIntObjectHashMap<Set<Type>> types = DataAccess.getTypes(entities); Type politician = new Type("YAGO3", "<wordnet_politician_110450303>"); Set<Type> allTypes = new HashSet<>(); for (TIntObjectIterator<Set<Type>> itr = types.iterator(); itr.hasNext(); ) { itr.advance(); allTypes.addAll(itr.value()); } assertTrue(allTypes.contains(politician)); }
Example 12
Source File: WordCluster.java From fnlp with GNU Lesser General Public License v3.0 | 5 votes |
public String toString(){ StringBuilder sb = new StringBuilder(); TIntObjectHashMap<TLinkedHashSet<String>> sets = new TIntObjectHashMap<TLinkedHashSet<String>>(); for(int i=0;i<alpahbet.size();i++){ int head = getHead(i); TLinkedHashSet<String> s = sets.get(head); if(s==null){ s = new TLinkedHashSet(); sets.put(head, s); } s.add(alpahbet.lookupString(i)); } TIntObjectIterator<TLinkedHashSet<String>> it = sets.iterator(); while(it.hasNext()){ it.advance(); if(it.value().size()<2) continue; sb.append(wordProb.get(it.key())); sb.append(" "); TObjectHashIterator<String> itt = it.value().iterator(); while(itt.hasNext()){ String ss = itt.next(); sb.append(ss); sb.append(" "); } sb.append("\n"); } return sb.toString(); }
Example 13
Source File: WikiCorpusTask.java From ambiverse-nlu with Apache License 2.0 | 4 votes |
private void computeNonprocessedEntityMentionLabels(Set<Entity> processingEntities, HashMap<Integer, Map<String, MentionObject>> entityMentionLabelsMap) throws EntityLinkingDataAccessException, InterruptedException { TIntObjectHashMap<int[]> typesIdsForEntitiesIds = DataAccess.getTypesIdsForEntitiesIds(processingEntities .stream() .mapToInt(Entity::getId) .toArray()); TIntObjectHashMap<List<MentionObject>> mentionsForEntities = DataAccess.getMentionsForEntities(new Entities(processingEntities)); TIntObjectIterator<List<MentionObject>> entityMentionsIterator = mentionsForEntities.iterator(); while (entityMentionsIterator.hasNext()) { Map<String, MentionObject> entityResult = new HashMap<>(); entityMentionsIterator.advance(); int eid = entityMentionsIterator.key(); int[] typeIDs = typesIdsForEntitiesIds.get(eid); NerType.Label nerTypeForTypeIds = NerType.getNerTypeForTypeIds(typeIDs); for (MentionObject mentionObject : entityMentionsIterator.value()) { if (Thread.interrupted()) { throw new InterruptedException(); } String entityMention = mentionObject.getMention(); NerType.Label nerTypeForTypeIds_ = nerTypeForTypeIds; // getting rid of the junk from the aida database if (stopwords.contains(entityMention.toLowerCase()) || entityMention.contains("<SPAN") || entityMention.contains("=") || entityMention.contains("<!--") || entityMention.contains("(") && entityMention.contains(")") || isDate(entityMention.trim()) || entityMention.matches("[.,\\/#!$%\\^&\\*;:{}=\\-_`~()]") || entityMention.endsWith("'S") ) { continue; } // i.e. United States is a location, not an organization if (knownCountries.contains(entityMention.toLowerCase())) { nerTypeForTypeIds_ = NerType.Label.LOCATION; } if (languagesList.contains(entityMention.toLowerCase())) { nerTypeForTypeIds_ = NerType.Label.MISC; } MentionObject copy = mentionObject.copy(); copy.setLabel(nerTypeForTypeIds_); entityResult.put(entityMention, copy); } entityMentionLabelsMapCache.put(eid, entityResult); entityMentionLabelsMap.put(eid, entityResult); } }