gnu.trove.set.TIntSet#iterator

Source File: AbstractAttributeClustering.java From JedAIToolkit with Apache License 2.0

6 votes

private void executeDirtyErComparisons(int attributeId, TIntSet coOccurringAttrs) {
    for (TIntIterator sigIterator = coOccurringAttrs.iterator(); sigIterator.hasNext();) {
        int neighborId = sigIterator.next();
        if (neighborId <= attributeId) { // avoid repeated comparisons & comparison with attributeId
            continue;
        }

        float similarity = attributeModels[DATASET_1][attributeId].getSimilarity(attributeModels[DATASET_1][neighborId]);
        if (globalMaxSimilarities[attributeId] < similarity) {
            globalMaxSimilarities[attributeId] = similarity;
        }

        if (globalMaxSimilarities[neighborId] < similarity) {
            globalMaxSimilarities[neighborId] = similarity;
        }
    }
}

Source File: UnitUtil.java From ambiverse-nlu with Apache License 2.0

5 votes

private static TIntObjectHashMap<String> getUsedWords(TIntSet usedTokens, TIntObjectHashMap<String> idsWords) {
  TIntObjectHashMap<String> usedWords = new TIntObjectHashMap<>();
  for (TIntIterator itr = usedTokens.iterator(); itr.hasNext(); ) {
    int usedToken = itr.next();
    usedWords.put(usedToken, idsWords.get(usedToken));
  }
  return usedWords;
}

Source File: DataAccess.java From ambiverse-nlu with Apache License 2.0

5 votes

public static TIntIntHashMap getKeywordDocumentFrequencies(TIntSet keywords) throws EntityLinkingDataAccessException {
  logger.debug("Get keyword-document frequencies.");
  Integer runId = RunningTimer.recordStartTime("DataAccess:KWDocFreq");
  TIntIntHashMap keywordCounts = new TIntIntHashMap((int) (keywords.size() / Constants.DEFAULT_LOAD_FACTOR));
  for (TIntIterator itr = keywords.iterator(); itr.hasNext(); ) {
    int keywordId = itr.next();
    int count = DataAccessCache.singleton().getKeywordCount(keywordId);
    keywordCounts.put(keywordId, count);
  }
  RunningTimer.recordEndTime("DataAccess:KWDocFreq", runId);
  return keywordCounts;
}

Source File: DataAccess.java From ambiverse-nlu with Apache License 2.0

5 votes

public static TIntIntHashMap getUnitDocumentFrequencies(TIntSet keywords, UnitType unitType) throws EntityLinkingDataAccessException {
  logger.debug("Get Unit-document frequencies.");
  Integer runId = RunningTimer.recordStartTime("DataAccess:KWDocFreq");
  TIntIntHashMap keywordCounts = new TIntIntHashMap((int) (keywords.size() / Constants.DEFAULT_LOAD_FACTOR));
  for (TIntIterator itr = keywords.iterator(); itr.hasNext(); ) {
    int keywordId = itr.next();
    int count = DataAccessCache.singleton().getUnitCount(keywordId, unitType);
    keywordCounts.put(keywordId, count);
  }
  RunningTimer.recordEndTime("DataAccess:KWDocFreq", runId);
  return keywordCounts;
}

Source File: GraphConfidenceEstimator.java From ambiverse-nlu with Apache License 2.0

5 votes

private Configuration getRandomConfiguration(Graph g, Map<Integer, Integer> solution, float mentionFlipPercentage) {
  Configuration flippedConfiguration = new Configuration();

  // Solution has at least 2 mentions, other case is handled in estimate().
  // Decide number of mentions to switch - at least 1, at most 20%.
  int mentionSize = Math.round(solution.size() * mentionFlipPercentage);
  mentionSize = Math.max(1, mentionSize);
  int numFlips = Math.max(1, random_.nextInt(mentionSize));
  TIntSet flipCandidates = getFlipCandidates(g, solution);
  TIntSet flippedMentions = getRandomElements(flipCandidates, numFlips);
  flippedConfiguration.flippedMentions_ = flippedMentions;
  Map<Integer, Integer> flippedSolution = new HashMap<Integer, Integer>(solution);
  for (TIntIterator itr = flippedMentions.iterator(); itr.hasNext(); ) {
    int mentionId = itr.next();
    TIntDoubleHashMap entityCandidates = new TIntDoubleHashMap(getConnectedEntitiesWithScores(g_, mentionId));
    // Remove correct solution from candidates - it should not be chosen
    // when flipping.
    entityCandidates.remove(solution.get(mentionId));
    // Put placeholder if resembling a missing entity (will not contribute
    // to coherence at all).
    Integer flippedEntity = -1;
    if (entityCandidates.size() > 0) {
      TIntDoubleHashMap entityCandidateProbabilities = CollectionUtils.normalizeValuesToSum(entityCandidates);
      flippedEntity = getRandomEntity(mentionId, entityCandidateProbabilities, random_);
    }
    flippedSolution.put(mentionId, flippedEntity);
  }
  flippedConfiguration.mapping_ = flippedSolution;
  // Store active nodes in graph for faster lookup.
  flippedConfiguration.presentGraphNodes_ = new TIntHashSet();
  for (Entry<Integer, Integer> entry : flippedSolution.entrySet()) {
    flippedConfiguration.presentGraphNodes_.add(entry.getKey());
    flippedConfiguration.presentGraphNodes_.add(entry.getValue());
  }
  //    logger_.debug("Flipped " + flippedMentions.size() + " mentions: " +
  //                  flippedMentions);
  return flippedConfiguration;
}

Source File: AbstractAttributeClustering.java From JedAIToolkit with Apache License 2.0

5 votes

private void connectCleanCleanErComparisons(int attributeId, TIntSet coOccurringAttrs, UndirectedGraph similarityGraph) {
    for (TIntIterator sigIterator = coOccurringAttrs.iterator(); sigIterator.hasNext();) {
        int neighborId = sigIterator.next();

        int normalizedNeighborId = neighborId + attributesDelimiter;
        float similarity = attributeModels[DATASET_1][attributeId].getSimilarity(attributeModels[DATASET_2][neighborId]);
        if (a * globalMaxSimilarities[attributeId] < similarity
                || a * globalMaxSimilarities[normalizedNeighborId] < similarity) {
            similarityGraph.addEdge(attributeId, normalizedNeighborId);
        }
    }
}

Source File: AbstractAttributeClustering.java From JedAIToolkit with Apache License 2.0

5 votes

private void connectDirtyErComparisons(int attributeId, TIntSet coOccurringAttrs, UndirectedGraph similarityGraph) {
    for (TIntIterator sigIterator = coOccurringAttrs.iterator(); sigIterator.hasNext();) {
        int neighborId = sigIterator.next();
        if (neighborId <= attributeId) { // avoid repeated comparisons & comparison with attributeId
            continue;
        }

        float similarity = attributeModels[DATASET_1][attributeId].getSimilarity(attributeModels[DATASET_1][neighborId]);
        if (a * globalMaxSimilarities[attributeId] < similarity
                || a * globalMaxSimilarities[neighborId] < similarity) {
            similarityGraph.addEdge(attributeId, neighborId);
        }
    }
}

Source File: AbstractAttributeClustering.java From JedAIToolkit with Apache License 2.0

5 votes

private void executeCleanCleanErComparisons(int attributeId, TIntSet coOccurringAttrs) {
    for (TIntIterator sigIterator = coOccurringAttrs.iterator(); sigIterator.hasNext();) {
        int neighborId = sigIterator.next();

        int normalizedNeighborId = neighborId + attributesDelimiter;
        float similarity = attributeModels[DATASET_1][attributeId].getSimilarity(attributeModels[DATASET_2][neighborId]);
        if (globalMaxSimilarities[attributeId] < similarity) {
            globalMaxSimilarities[attributeId] = similarity;
        }

        if (globalMaxSimilarities[normalizedNeighborId] < similarity) {
            globalMaxSimilarities[normalizedNeighborId] = similarity;
        }
    }
}

Java Code Examples for gnu.trove.set.TIntSet#iterator()