gnu.trove.iterator.TIntObjectIterator Java Examples

The following examples show how to use gnu.trove.iterator.TIntObjectIterator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: EntityLookup.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
/**
 * Filters the entity candidates against the given list of types
 *
 * @param entities Entities to filter'
 * @param filteringTypes Set of types to filter the entities against
 * @return filtered entities
 */
private Entities filterEntitiesByType(Entities entities, Set<Type> filteringTypes) throws EntityLinkingDataAccessException {
  if (filteringTypes == null) {
    return entities;
  }
  Entities filteredEntities = new Entities();
  TIntObjectHashMap<Set<Type>> entitiesTypes = DataAccess.getTypes(entities);
  for (TIntObjectIterator<Set<Type>> itr = entitiesTypes.iterator(); itr.hasNext(); ) {
    itr.advance();
    int id = itr.key();
    Set<Type> entityTypes = itr.value();
    for (Type t : entityTypes) {
      if (filteringTypes.contains(t)) {
        filteredEntities.add(entities.getEntityById(id));
        break;
      }
    }
  }
  return filteredEntities;
}
 
Example #2
Source File: MilneWittenEntityEntitySimilarity.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
private void setupEntities(Entities entities) throws Exception {
  logger.debug("Initializing MilneWittenEntityEntitySimilarity for " + entities.size() + " entities");

  collectionSize = DataAccess.getCollectionSize();

  TIntObjectHashMap<int[]> entityInlinks = DataAccess.getInlinkNeighbors(entities);

  // inlinks are assumed to be pre-sorted.
  entity2vector = new TIntObjectHashMap<>();

  for (TIntObjectIterator<int[]> itr = entityInlinks.iterator(); itr.hasNext(); ) {
    itr.advance();
    int entity = itr.key();
    int[] inLinks = itr.value();

    RoaringBitmap bs = new RoaringBitmap();
    for (int l : inLinks) {
      bs.add(l);
    }
    entity2vector.put(entity, bs);
  }

  logger.debug("Done initializing MilneWittenEntityEntitySimilarity for " + entities.size() + " entities");
}
 
Example #3
Source File: InlinkOverlapEntityEntitySimilarity.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
private void setupEntities(Entities entities) throws Exception {
  if (entities.size() == 0) {
    logger.debug("Skipping initialization of InlinkEntityEntitySimilarity for " + entities.size() + " entities");
    return;
  }

  logger.debug("Initializing InlinkEntityEntitySimilarity for " + entities.size() + " entities");

  entity2vector = new TIntObjectHashMap<>();

  TIntObjectHashMap<int[]> entityInlinks = DataAccess.getInlinkNeighbors(entities);

  for (TIntObjectIterator<int[]> itr = entityInlinks.iterator(); itr.hasNext(); ) {
    itr.advance();
    int entity = itr.key();
    int[] inLinks = itr.value();

    RoaringBitmap bs = new RoaringBitmap();
    for (int l : inLinks) {
      bs.add(l);
    }
    entity2vector.put(entity, bs);
  }

  logger.debug("Done initializing InlinkEntityEntitySimilarity");
}
 
Example #4
Source File: DataAccess.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
/**
 * Computes all entity occurrence probabilities based on their incoming links.
 *
 * @return Map of Entity->Probability.
 * @throws EntityLinkingDataAccessException
 */
public static TIntDoubleHashMap getAllEntityProbabilities() throws EntityLinkingDataAccessException {
  TIntObjectHashMap<int[]> entityInlinks = getAllInlinks();

  TIntDoubleHashMap entityProbabilities = new TIntDoubleHashMap(entityInlinks.size(), 0.5f);

  // Get the total number of links.
  long totalLinkCount = 0;

  TIntObjectIterator<int[]> itr = entityInlinks.iterator();

  while (itr.hasNext()) {
    itr.advance();
    totalLinkCount += itr.value().length;
  }

  // Derive probabilities from counts.
  itr = entityInlinks.iterator();

  while (itr.hasNext()) {
    itr.advance();
    double probability = (double) itr.value().length / (double) totalLinkCount;
    entityProbabilities.put(itr.key(), probability);
  }

  return entityProbabilities;
}
 
Example #5
Source File: DataAccessEntitiesCacheTarget.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
@Override
protected void cacheToDisk() throws IOException {
  File cacheFile = getCacheFile();
  DataOutputStream out = new DataOutputStream(new BufferedOutputStream(new GZIPOutputStream(new FileOutputStream(cacheFile))));
  out.writeInt(data_.size());
  for (TIntObjectIterator<EntityType> itr = data_.iterator(); itr.hasNext(); ) {
    itr.advance();
    out.writeInt(itr.key());
    out.writeInt(itr.value().getDBId());
  }
  out.flush();
  out.close();
}
 
Example #6
Source File: DataAccessSQLCache.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
private synchronized void addToEntityKeyphrasesCache(String querySignature, TIntObjectHashMap<List<EntityKeyphraseData>> entityKeyphrases) {
  CachingHashMap<Integer, List<EntityKeyphraseData>> queryCache = entityKeyphrasesCaches.get(querySignature);
  if (queryCache == null) {
    int maxEntities = EntityLinkingConfig.getAsInt(EntityLinkingConfig.ENTITIES_CACHE_SIZE);
    queryCache = new CachingHashMap<Integer, List<EntityKeyphraseData>>(maxEntities);
    entityKeyphrasesCaches.put(querySignature, queryCache);
  }

  for (TIntObjectIterator<List<EntityKeyphraseData>> itr = entityKeyphrases.iterator(); itr.hasNext(); ) {
    itr.advance();
    int entityId = itr.key();
    List<EntityKeyphraseData> keyphrases = itr.value();
    queryCache.put(entityId, keyphrases);
  }
}
 
Example #7
Source File: DataAccessSQLCache.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
private synchronized void addToEntityKeywordsCache(String querySignature, TIntObjectHashMap<List<EntityKeywordsData>> entityKeywords) {
  CachingHashMap<Integer, List<EntityKeywordsData>> queryCache = entityKeywordsCaches.get(querySignature);
  if (queryCache == null) {
    int maxEntities = EntityLinkingConfig.getAsInt(EntityLinkingConfig.ENTITIES_CACHE_SIZE);
    queryCache = new CachingHashMap<Integer, List<EntityKeywordsData>>(maxEntities);
    entityKeywordsCaches.put(querySignature, queryCache);
  }

  for (TIntObjectIterator<List<EntityKeywordsData>> itr = entityKeywords.iterator(); itr.hasNext(); ) {
    itr.advance();
    int entityId = itr.key();
    List<EntityKeywordsData> keyphrases = itr.value();
    queryCache.put(entityId, keyphrases);
  }
}
 
Example #8
Source File: DataAccessKeyValueStore.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
@Override //TODO: language not used
public Map<String, int[]> getCategoryIdsForMentions(Set<String> mentions, Language language, boolean isNamedEntity) throws EntityLinkingDataAccessException {
  Map<String, Entities> entities = getEntitiesForMentions(mentions, 1.0, 0, isNamedEntity);

  Map<String, Set<Integer>> mentionCategories = new HashMap<>(entities.size());

  for (Entry<String, Entities> entry:entities.entrySet()) {
    Set<Integer> types = mentionCategories.get(entry.getKey());
    if (types == null) {
      types = new HashSet<>();
    }
    List<Integer> entityIds = new ArrayList<>();
    for (Entity entity:entry.getValue()) {
      entityIds.add(entity.getId());
    }
    TIntObjectHashMap<int[]> categories = getCategoryIdsForEntitiesIds(ArrayUtils.toPrimitive(entityIds.toArray(new Integer[0])));
    TIntObjectIterator<int[]> it = categories.iterator();
    while (it.hasNext()) {
      types.addAll(Arrays.asList(ArrayUtils.toObject(it.value())));
    }
  }

  Map<String, int[]> ret = new HashMap<>();
  for (String key:mentionCategories.keySet()) {
    Integer[] temp = mentionCategories.get(key).toArray(new Integer[0]);
    ret.put(key, ArrayUtils.toPrimitive(temp));
  }

  return ret;
}
 
Example #9
Source File: DataAccess.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
public static Entities getAidaEntitiesForInternalIds(int[] internalIds) throws EntityLinkingDataAccessException {
  TIntObjectHashMap<KBIdentifiedEntity> kbEntities = DataAccess.getKnowlegebaseEntitiesForInternalIds(internalIds);
  Entities entities = new Entities();
  for (TIntObjectIterator<KBIdentifiedEntity> itr = kbEntities.iterator(); itr.hasNext(); ) {
    itr.advance();
    entities.add(new Entity(itr.value(), itr.key()));
  }
  return entities;
}
 
Example #10
Source File: ComponentGridWidget.java    From GregTech with GNU Lesser General Public License v3.0 5 votes vote down vote up
private void clearWidgetFromMap(Widget widget) {
    this.originByWidget.remove(widget);
    TIntObjectIterator<Widget> it = this.widgetBySlotIndex.iterator();
    while (it.hasNext()) {
        it.advance();
        if (it.value() == widget) {
            it.remove();
        }
    }
}
 
Example #11
Source File: WikiCorpusTask.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
private static Map<Integer, Set<Integer>> computeEntityOutlinks() throws EntityLinkingDataAccessException, IOException, SQLException {
	Map<Integer, Set<Integer>> inLinkOutLinkMap = new HashMap<>();
	String file_name = EntityLinkingManager.getAidaDbIdentifierLight() + "_" + ENTITY_OUTLINK_CACHE_JSON;
	if (Files.exists(Paths.get(file_name))) {
		logger.info("Loading " + file_name + " from cache");
		Map<Integer, Set<Integer>> cache = new GsonBuilder().enableComplexMapKeySerialization().create()
				.fromJson(new JsonReader(new FileReader(file_name)),
						new TypeToken<Map<Integer, Set<Integer>>>() {
						}.getType());

		if (cache != null) {
			inLinkOutLinkMap.putAll(cache);
			return inLinkOutLinkMap;
		}
	}
	logger.info("Computing " + ENTITY_OUTLINK_CACHE_JSON);
	TIntObjectHashMap<int[]> inlinkNeighbors = DataAccess.getInlinkNeighbors(DataAccess.getAllEntities());
	TIntObjectIterator<int[]> iterator = inlinkNeighbors.iterator();

	while (iterator.hasNext()) {
		iterator.advance();
		int outEntity = iterator.key();
		int[] inEntities = iterator.value();
		for (int inEntity : inEntities) {
			if (!inLinkOutLinkMap.containsKey(inEntity)) {
				inLinkOutLinkMap.put(inEntity, new HashSet<>());
			}
			Set<Integer> outLinks = inLinkOutLinkMap.get(inEntity);
			outLinks.add(outEntity);

		}
	}

	if (!inLinkOutLinkMap.isEmpty()) {
		Gson gson = new Gson();
		Files.write(Paths.get(file_name), gson.toJson(inLinkOutLinkMap).getBytes());
	}

	return inLinkOutLinkMap;
}
 
Example #12
Source File: DataAccessCassandraIntegrationTest.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
@Test public void testGetTypes() throws EntityLinkingDataAccessException {
Entities entities = DataAccess.getEntitiesForMention("Merkel", 1.0, 0, true);
  TIntObjectHashMap<Set<Type>> types = DataAccess.getTypes(entities);
  Type politician = new Type("YAGO3", "<wordnet_politician_110450303>");

  Set<Type> allTypes = new HashSet<>();
  for (TIntObjectIterator<Set<Type>> itr = types.iterator(); itr.hasNext(); ) {
    itr.advance();
    allTypes.addAll(itr.value());
  }

  assertTrue(allTypes.contains(politician));
}
 
Example #13
Source File: EquivalenceClasses.java    From metanome-algorithms with Apache License 2.0 5 votes vote down vote up
@Override
public String toString() {
	StringBuilder outputBuilder = new StringBuilder();
	outputBuilder.append("EquivalenceClasses\n");

	for (TIntObjectIterator<EquivalenceClass> it = this.iterator(); it.hasNext(); ) {
		it.advance();
		outputBuilder.append(String.format("ec(%d(\t", Integer.valueOf(it.key())));
		outputBuilder.append(String.format("{%s}\n", it.value().toString()));
	}
	outputBuilder.append("EquivalenceClasses\n");

	return outputBuilder.toString();
}
 
Example #14
Source File: WordCluster.java    From fnlp with GNU Lesser General Public License v3.0 5 votes vote down vote up
/**
 * 一次性统计概率,节约时间
 */
private void statisticProb() {
	System.out.println("统计概率");
	TIntFloatIterator it = wordProb.iterator();
	while(it.hasNext()){
		it.advance();
		float v = it.value()/totalword;
		it.setValue(v);
		int key = it.key();
		if(key<0)
			continue;
		Cluster cluster = new Cluster(key,v,alpahbet.lookupString(key));
		clusters.put(key, cluster);
	}

	TIntObjectIterator<TIntFloatHashMap> it1 = pcc.iterator();
	while(it1.hasNext()){
		it1.advance();
		TIntFloatHashMap map = it1.value();
		TIntFloatIterator it2 = map.iterator();
		while(it2.hasNext()){
			it2.advance();
			it2.setValue(it2.value()/totalword);
		}
	}

}
 
Example #15
Source File: WordCluster.java    From fnlp with GNU Lesser General Public License v3.0 5 votes vote down vote up
public String toString(){
	StringBuilder sb = new StringBuilder();

	TIntObjectHashMap<TLinkedHashSet<String>> sets = new TIntObjectHashMap<TLinkedHashSet<String>>();

	for(int i=0;i<alpahbet.size();i++){
		int head = getHead(i);
		TLinkedHashSet<String> s = sets.get(head);
		if(s==null){
			s = new TLinkedHashSet();
			sets.put(head, s);
		}
		s.add(alpahbet.lookupString(i));
	}

	TIntObjectIterator<TLinkedHashSet<String>> it = sets.iterator();
	while(it.hasNext()){
		it.advance();
		if(it.value().size()<2)
			continue;
		sb.append(wordProb.get(it.key()));
		sb.append(" ");
		TObjectHashIterator<String> itt = it.value().iterator();
		while(itt.hasNext()){
			String ss = itt.next();
			sb.append(ss);
			sb.append(" ");
		}
		sb.append("\n");
	}

	return sb.toString();

}
 
Example #16
Source File: WikiCorpusTask.java    From ambiverse-nlu with Apache License 2.0 4 votes vote down vote up
private void computeNonprocessedEntityMentionLabels(Set<Entity> processingEntities,
														HashMap<Integer, Map<String, MentionObject>> entityMentionLabelsMap)
			throws EntityLinkingDataAccessException, InterruptedException {
		TIntObjectHashMap<int[]> typesIdsForEntitiesIds =
				DataAccess.getTypesIdsForEntitiesIds(processingEntities
						.stream()
						.mapToInt(Entity::getId)
						.toArray());

		TIntObjectHashMap<List<MentionObject>> mentionsForEntities = DataAccess.getMentionsForEntities(new Entities(processingEntities));

		TIntObjectIterator<List<MentionObject>> entityMentionsIterator = mentionsForEntities.iterator();
		while (entityMentionsIterator.hasNext()) {
			Map<String, MentionObject> entityResult = new HashMap<>();
			entityMentionsIterator.advance();

			int eid = entityMentionsIterator.key();

			int[] typeIDs = typesIdsForEntitiesIds.get(eid);
			NerType.Label nerTypeForTypeIds = NerType.getNerTypeForTypeIds(typeIDs);

			for (MentionObject mentionObject : entityMentionsIterator.value()) {
				if (Thread.interrupted()) {
					throw new InterruptedException();
				}
				String entityMention = mentionObject.getMention();
				NerType.Label nerTypeForTypeIds_ = nerTypeForTypeIds;

//              getting rid of the junk from the aida database
				if (stopwords.contains(entityMention.toLowerCase()) ||
						entityMention.contains("<SPAN") ||
						entityMention.contains("=") ||
						entityMention.contains("<!--") ||
						entityMention.contains("(") && entityMention.contains(")") ||
						isDate(entityMention.trim()) ||
						entityMention.matches("[.,\\/#!$%\\^&\\*;:{}=\\-_`~()]") ||
						entityMention.endsWith("'S")
						) {
					continue;
				}

//                i.e. United States is a location, not an organization
				if (knownCountries.contains(entityMention.toLowerCase())) {
					nerTypeForTypeIds_ = NerType.Label.LOCATION;
				}
				if (languagesList.contains(entityMention.toLowerCase())) {
					nerTypeForTypeIds_ = NerType.Label.MISC;
				}
				MentionObject copy = mentionObject.copy();
				copy.setLabel(nerTypeForTypeIds_);
				entityResult.put(entityMention, copy);
			}
			entityMentionLabelsMapCache.put(eid, entityResult);
			entityMentionLabelsMap.put(eid, entityResult);
		}
	}
 
Example #17
Source File: DataWatcherSerializer.java    From Carbon-2 with GNU Lesser General Public License v3.0 4 votes vote down vote up
public static byte[] encodeData(TIntObjectMap<DataWatcherObject> objects) {
	PacketDataSerializer serializer = new PacketDataSerializer(Unpooled.buffer());
	TIntObjectIterator<DataWatcherObject> iterator = objects.iterator();
	while (iterator.hasNext()) {
		iterator.advance();
		DataWatcherObject object = iterator.value();
		final int tk = ((object.type.getId() << 5) | (iterator.key() & 0x1F)) & 0xFF;
		serializer.writeByte(tk);
		switch (object.type) {
			case BYTE: {
				serializer.writeByte((byte) object.value);
				break;
			}
			case SHORT: {
				serializer.writeShort((short) object.value);
				break;
			}
			case INT: {
				serializer.writeInt((int) object.value);
				break;
			}
			case FLOAT: {
				serializer.writeFloat((float) object.value);
				break;
			}
			case STRING: {
			    PacketDataSerializerHelper.writeString(serializer, (String) object.value);
				break;
			}
			case ITEMSTACK: {
			    PacketDataSerializerHelper.writeItemStack(serializer, (ItemStack) object.value);
				break;
			}
			case VECTOR3I: {
				BlockPosition blockPos = (BlockPosition) object.value;
				serializer.writeInt(blockPos.getX());
				serializer.writeInt(blockPos.getY());
				serializer.writeInt(blockPos.getZ());
				break;
			}
			case VECTOR3F: {
				Vector3f vector = (Vector3f) object.value;
				serializer.writeFloat(vector.getX());
				serializer.writeFloat(vector.getY());
				serializer.writeFloat(vector.getZ());
				break;
			}
		}
	}
	serializer.writeByte(127);
	return Utils.toArray(serializer);
}
 
Example #18
Source File: DataWatcherSerializer.java    From Carbon-2 with GNU Lesser General Public License v3.0 4 votes vote down vote up
public static byte[] encodeData(TIntObjectMap<DataWatcherObject> objects) {
	PacketDataSerializer serializer = new PacketDataSerializer(Unpooled.buffer());
	TIntObjectIterator<DataWatcherObject> iterator = objects.iterator();
	while (iterator.hasNext()) {
		iterator.advance();
		DataWatcherObject object = iterator.value();
		final int tk = ((object.type.getId() << 5) | (iterator.key() & 0x1F)) & 0xFF;
		serializer.writeByte(tk);
		switch (object.type) {
			case BYTE: {
				serializer.writeByte((byte) object.value);
				break;
			}
			case SHORT: {
				serializer.writeShort((short) object.value);
				break;
			}
			case INT: {
				serializer.writeInt((int) object.value);
				break;
			}
			case FLOAT: {
				serializer.writeFloat((float) object.value);
				break;
			}
			case STRING: {
			    PacketDataSerializerHelper.writeString(serializer, (String) object.value);
				break;
			}
			case ITEMSTACK: {
			    PacketDataSerializerHelper.writeItemStack(serializer, (ItemStack) object.value);
				break;
			}
			case VECTOR3I: {
				BlockPosition blockPos = (BlockPosition) object.value;
				serializer.writeInt(blockPos.getX());
				serializer.writeInt(blockPos.getY());
				serializer.writeInt(blockPos.getZ());
				break;
			}
			case VECTOR3F: {
				Vector3f vector = (Vector3f) object.value;
				serializer.writeFloat(vector.getX());
				serializer.writeFloat(vector.getY());
				serializer.writeFloat(vector.getZ());
				break;
			}
		}
	}
	serializer.writeByte(127);
	return Utils.toArray(serializer);
}