Java Code Examples for gnu.trove.set.TIntSet#addAll()

The following examples show how to use gnu.trove.set.TIntSet#addAll() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AbstractAttributeClustering.java    From JedAIToolkit with Apache License 2.0 6 votes vote down vote up
protected void compareAttributes() {
    globalMaxSimilarities = new float[noOfAttributes];
    final TIntSet coOccurringAttrs = new TIntHashSet();
    int lastId = 0 < attributesDelimiter ? attributesDelimiter : noOfAttributes;
    for (int i = 0; i < lastId; i++) {
        coOccurringAttrs.clear();

        final Set<String> signatures = attributeModels[DATASET_1][i].getSignatures();
        for (String signature : signatures) {
            final TIntList attrIds = invertedIndex.get(signature);
            if (attrIds == null) {
                continue;
            }
            coOccurringAttrs.addAll(attrIds);
        }

        if (0 < attributesDelimiter) { // Clean-Clean ER
            executeCleanCleanErComparisons(i, coOccurringAttrs);
        } else { // Dirty ER
            executeDirtyErComparisons(i, coOccurringAttrs);
        }
    }
}
 
Example 2
Source File: ExtendedSortedNeighborhoodBlocking.java    From JedAIToolkit with Apache License 2.0 6 votes vote down vote up
@Override
protected void parseIndex() {
    final Set<String> blockingKeysSet = invertedIndexD1.keySet();
    final String[] sortedTerms = blockingKeysSet.toArray(new String[0]);
    Arrays.sort(sortedTerms);

    //slide window over the sorted list of blocking keys
    int upperLimit = sortedTerms.length - windowSize;
    for (int i = 0; i <= upperLimit; i++) {
        final TIntSet entityIds = new TIntHashSet();
        for (int j = 0; j < windowSize; j++) {
            entityIds.addAll(invertedIndexD1.get(sortedTerms[i + j]));
        }

        if (1 < entityIds.size()) {
            blocks.add(new UnilateralBlock(entityIds.toArray()));
        }
    }
}
 
Example 3
Source File: UnitUtil.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
public static void getNgram(int ngram, int[] tokens, TIntSet usedTokens, Set<TIntArrayList> unitSet) {
  if (tokens == null) {
    return;
  }
  int[] unitArray = new int[ngram];
  int unitSizeMinusOne = ngram - 1;
  if (ngram > 1) {
    unitArray[0] = 0;
    // we skip the first element of the firstUnitArray because it should stay 0
    System.arraycopy(tokens, 0, unitArray, 1, unitSizeMinusOne);
    unitSet.add(new TIntArrayList(unitArray));

    unitArray[ngram - 1] = 0;
    // we skip the last element of the lastUnitArray because it should stay 0
    System.arraycopy(tokens, tokens.length - unitSizeMinusOne, unitArray, 0, unitSizeMinusOne);
    unitSet.add(new TIntArrayList(unitArray));
  }
  if (tokens.length >= ngram - 1) {
    for (int i = 0; i < tokens.length - unitSizeMinusOne; i++) {
      System.arraycopy(tokens, i, unitArray, 0, ngram);
      unitSet.add(new TIntArrayList(unitArray));
    }
    if (usedTokens != null) {
      usedTokens.addAll(tokens);
    }
  }
}
 
Example 4
Source File: AbstractAttributeClustering.java    From JedAIToolkit with Apache License 2.0 5 votes vote down vote up
protected AttributeClusters[] clusterAttributes() {
    final UndirectedGraph similarityGraph = new UndirectedGraph(noOfAttributes);

    final TIntSet coOccurringAttrs = new TIntHashSet();
    int lastId = 0 < attributesDelimiter ? attributesDelimiter : noOfAttributes;
    for (int i = 0; i < lastId; i++) {
        coOccurringAttrs.clear();

        final Set<String> signatures = attributeModels[DATASET_1][i].getSignatures();
        for (String signature : signatures) {
            final TIntList attrIds = invertedIndex.get(signature);
            if (attrIds == null) {
                continue;
            }
            coOccurringAttrs.addAll(attrIds);
        }

        if (0 < attributesDelimiter) { // Clean-Clean ER
            connectCleanCleanErComparisons(i, coOccurringAttrs, similarityGraph);
        } else { // Dirty ER
            connectDirtyErComparisons(i, coOccurringAttrs, similarityGraph);
        }
    }

    AttributeClusters[] aClusters;
    final ConnectedComponents cc = new ConnectedComponents(similarityGraph);
    if (attributesDelimiter < 0) { // Dirty ER
        aClusters = new AttributeClusters[1];
        aClusters[0] = clusterAttributes(DATASET_1, cc);
    } else { // Clean-Clean ER
        aClusters = new AttributeClusters[2];
        aClusters[0] = clusterAttributes(DATASET_1, cc);
        aClusters[1] = clusterAttributes(DATASET_2, cc);
    }

    return aClusters; 
}
 
Example 5
Source File: FuzzySetSimJoin.java    From JedAIToolkit with Apache License 2.0 5 votes vote down vote up
private static float jaccard(int[] r, int[] s) {
    TIntSet nr = new TIntHashSet(r);
    TIntSet ns = new TIntHashSet(s);
    TIntSet intersection = new TIntHashSet(nr);
    intersection.retainAll(ns);
    TIntSet union = new TIntHashSet(nr);
    union.addAll(ns);
    return ((float) intersection.size()) / ((float) union.size());
}
 
Example 6
Source File: ExtendedSortedNeighborhoodBlocking.java    From JedAIToolkit with Apache License 2.0 5 votes vote down vote up
@Override
protected void parseIndices() {
    final Set<String> blockingKeysSet = new HashSet<>();
    blockingKeysSet.addAll(invertedIndexD1.keySet());
    blockingKeysSet.addAll(invertedIndexD2.keySet());
    final String[] sortedTerms = blockingKeysSet.toArray(new String[0]);
    Arrays.sort(sortedTerms);

    //slide window over the sorted list of blocking keys
    int upperLimit = sortedTerms.length - windowSize;
    for (int i = 0; i <= upperLimit; i++) {
        final TIntSet entityIds1 = new TIntHashSet();
        final TIntSet entityIds2 = new TIntHashSet();
        for (int j = 0; j < windowSize; j++) {
            final TIntList d1Entities = invertedIndexD1.get(sortedTerms[i + j]);
            if (d1Entities != null) {
                entityIds1.addAll(d1Entities);
            }

            final TIntList d2Entities = invertedIndexD2.get(sortedTerms[i + j]);
            if (d2Entities != null) {
                entityIds2.addAll(d2Entities);
            }
        }

        if (!entityIds1.isEmpty() && !entityIds2.isEmpty()) {
            blocks.add(new BilateralBlock(entityIds1.toArray(), entityIds2.toArray()));
        }
    }
}
 
Example 7
Source File: CocktailParty.java    From ambiverse-nlu with Apache License 2.0 4 votes vote down vote up
/**
 * Removes dangling mentions (where no candidate entity has a coherence edge)
 * from gaph. They will influence the minimum weighted degree but can
 * never be improved. Set the solution to the entity with the highest
 * mention-entity weight.
 *
 * @param solution Solution will be updated, setting the correct entity using
 *                local similarity for dangling mentions.
 * @return Node ids of nodes to remove.
 */
private TIntSet removeUnconnectedMentionEntityPairs(Graph g, Map<ResultMention, List<ResultEntity>> solution) {
  TIntSet mentionsToRemove = new TIntHashSet();
  for (int mentionId : g.getMentionNodesIds().values()) {
    GraphNode mentionNode = g.getNode(mentionId);
    Mention mention = (Mention) mentionNode.getNodeData();
    TIntDoubleHashMap entityCandidates = mentionNode.getSuccessors();
    if (entityCandidates.size() == 0) {
      continue;
    }
    // Remove all mentions without any entities that have coherence edges.
    if (g.isLocalMention(mentionId)) {
      logger.debug("local mention removed: " + mentionId + " " + mention);
      mentionsToRemove.add(mentionId);
      GraphTracer.gTracer.addMentionToDangling(g.getName(), mention.getMention(), mention.getCharOffset());
      // Set solution to best local candidate.
      Pair<Integer, Double> bestEntityScore = getBestLocalCandidateAndScore(entityCandidates);
      int bestEntity = bestEntityScore.getKey();
      double score = bestEntityScore.getValue();
      updateSolution(solution, g, mention, bestEntity, score);
    }

  }
  TIntSet entitiesToRemove = new TIntHashSet();
  // Remove entities that are only connected to removed mentions.
  for (int entityId : g.getEntityNodesIds().values()) {
    GraphNode entityNode = g.getNode(entityId);
    TIntDoubleHashMap successors = entityNode.getSuccessors();
    int removedCount = 0;
    for (TIntDoubleIterator itr = successors.iterator(); itr.hasNext(); ) {
      itr.advance();
      int neighborId = itr.key();
      if (mentionsToRemove.contains(neighborId)) {
        ++removedCount;
      }
    }
    if (removedCount == successors.size()) {
      entitiesToRemove.add(entityId);
    }
  }
  // Remove mentions + entity candidates from graph, trace.
  TIntSet nodesToRemove = new TIntHashSet(mentionsToRemove.size() + entitiesToRemove.size());
  nodesToRemove.addAll(mentionsToRemove);
  nodesToRemove.addAll(entitiesToRemove);
  return nodesToRemove;
}