Java Code Examples for gnu.trove.set.TIntSet#size()

The following examples show how to use gnu.trove.set.TIntSet#size() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparseArrayOfInts.java    From morpheus-core with Apache License 2.0 6 votes vote down vote up
@Override
public final Array<Integer> distinct(int limit) {
    final int capacity = limit < Integer.MAX_VALUE ? limit : 100;
    final TIntSet set = new TIntHashSet(capacity);
    final ArrayBuilder<Integer> builder = ArrayBuilder.of(capacity, Integer.class);
    for (int i=0; i<length(); ++i) {
        final int value = getInt(i);
        if (set.add(value)) {
            builder.addInt(value);
            if (set.size() >= limit) {
                break;
            }
        }
    }
    return builder.toArray();
}
 
Example 2
Source File: SparseArrayWithIntCoding.java    From morpheus-core with Apache License 2.0 6 votes vote down vote up
@Override
public Array<T> distinct(int limit) {
    final int capacity = limit < Integer.MAX_VALUE ? limit : 100;
    final TIntSet set = new TIntHashSet(capacity);
    final ArrayBuilder<T> builder = ArrayBuilder.of(capacity, type());
    for (int i=0; i<length(); ++i) {
        final int code = getInt(i);
        if (set.add(code)) {
            final T value = getValue(i);
            builder.add(value);
            if (set.size() >= limit) {
                break;
            }
        }
    }
    return builder.toArray();
}
 
Example 3
Source File: MappedArrayOfInts.java    From morpheus-core with Apache License 2.0 6 votes vote down vote up
@Override
public final Array<Integer> distinct(int limit) {
    final int capacity = limit < Integer.MAX_VALUE ? limit : 100;
    final TIntSet set = new TIntHashSet(capacity);
    final ArrayBuilder<Integer> builder = ArrayBuilder.of(capacity, Integer.class);
    for (int i=0; i<length(); ++i) {
        final int value = getInt(i);
        if (set.add(value)) {
            builder.addInt(value);
            if (set.size() >= limit) {
                break;
            }
        }
    }
    return builder.toArray();
}
 
Example 4
Source File: MappedArrayWithIntCoding.java    From morpheus-core with Apache License 2.0 6 votes vote down vote up
@Override
public Array<T> distinct(int limit) {
    final int capacity = limit < Integer.MAX_VALUE ? limit : 100;
    final TIntSet set = new TIntHashSet(capacity);
    final ArrayBuilder<T> builder = ArrayBuilder.of(capacity, type());
    for (int i=0; i<length(); ++i) {
        final int code = getInt(i);
        if (set.add(code)) {
            final T value = getValue(i);
            builder.add(value);
            if (set.size() >= limit) {
                break;
            }
        }
    }
    return builder.toArray();
}
 
Example 5
Source File: DenseArrayOfInts.java    From morpheus-core with Apache License 2.0 6 votes vote down vote up
@Override
public final Array<Integer> distinct(int limit) {
    final int capacity = limit < Integer.MAX_VALUE ? limit : 100;
    final TIntSet set = new TIntHashSet(capacity);
    final ArrayBuilder<Integer> builder = ArrayBuilder.of(capacity, Integer.class);
    for (int i=0; i<length(); ++i) {
        final int value = getInt(i);
        if (set.add(value)) {
            builder.addInt(value);
            if (set.size() >= limit) {
                break;
            }
        }
    }
    return builder.toArray();
}
 
Example 6
Source File: DenseArrayWithIntCoding.java    From morpheus-core with Apache License 2.0 6 votes vote down vote up
@Override
public Array<T> distinct(int limit) {
    final int capacity = limit < Integer.MAX_VALUE ? limit : 100;
    final TIntSet set = new TIntHashSet(capacity);
    final ArrayBuilder<T> builder = ArrayBuilder.of(capacity, type());
    for (int i=0; i<length(); ++i) {
        final int code = getInt(i);
        if (set.add(code)) {
            final T value = getValue(i);
            builder.add(value);
            if (set.size() >= limit) {
                break;
            }
        }
    }
    return builder.toArray();
}
 
Example 7
Source File: AbstractMetablocking.java    From JedAIToolkit with Apache License 2.0 6 votes vote down vote up
protected void setStatistics() {
    distinctComparisons = 0;
    comparisonsPerEntity = new float[noOfEntities];
    final TIntSet distinctNeighbors = new TIntHashSet();
    for (int i = 0; i < noOfEntities; i++) {
        final int[] associatedBlocks = entityIndex.getEntityBlocks(i, 0);
        if (associatedBlocks.length != 0) {
            distinctNeighbors.clear();
            for (int blockIndex : associatedBlocks) {
                for (int neighborId : getNeighborEntities(blockIndex, i)) {
                    distinctNeighbors.add(neighborId);
                }
            }
            comparisonsPerEntity[i] = distinctNeighbors.size();
            if (!cleanCleanER) {
                comparisonsPerEntity[i]--;
            }
            distinctComparisons += comparisonsPerEntity[i];
        }
    }
    distinctComparisons /= 2;
}
 
Example 8
Source File: SortedNeighborhoodBlocking.java    From JedAIToolkit with Apache License 2.0 6 votes vote down vote up
@Override
protected void parseIndex() {
    final Set<String> blockingKeysSet = invertedIndexD1.keySet();
    final String[] sortedTerms = blockingKeysSet.toArray(new String[0]);
    Arrays.sort(sortedTerms);

    final int[] allEntityIds = getSortedEntities(sortedTerms);

    //slide window over the sorted list of entity ids
    int upperLimit = allEntityIds.length - windowSize;
    for (int i = 0; i <= upperLimit; i++) {
        final TIntSet entityIds = new TIntHashSet();
        for (int j = 0; j < windowSize; j++) {
            entityIds.add(allEntityIds[i + j]);
        }

        if (1 < entityIds.size()) {
            blocks.add(new UnilateralBlock(entityIds.toArray()));
        }
    }
}
 
Example 9
Source File: ExtendedSortedNeighborhoodBlocking.java    From JedAIToolkit with Apache License 2.0 6 votes vote down vote up
@Override
protected void parseIndex() {
    final Set<String> blockingKeysSet = invertedIndexD1.keySet();
    final String[] sortedTerms = blockingKeysSet.toArray(new String[0]);
    Arrays.sort(sortedTerms);

    //slide window over the sorted list of blocking keys
    int upperLimit = sortedTerms.length - windowSize;
    for (int i = 0; i <= upperLimit; i++) {
        final TIntSet entityIds = new TIntHashSet();
        for (int j = 0; j < windowSize; j++) {
            entityIds.addAll(invertedIndexD1.get(sortedTerms[i + j]));
        }

        if (1 < entityIds.size()) {
            blocks.add(new UnilateralBlock(entityIds.toArray()));
        }
    }
}
 
Example 10
Source File: DataAccess.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
public static TIntIntHashMap getKeywordDocumentFrequencies(TIntSet keywords) throws EntityLinkingDataAccessException {
  logger.debug("Get keyword-document frequencies.");
  Integer runId = RunningTimer.recordStartTime("DataAccess:KWDocFreq");
  TIntIntHashMap keywordCounts = new TIntIntHashMap((int) (keywords.size() / Constants.DEFAULT_LOAD_FACTOR));
  for (TIntIterator itr = keywords.iterator(); itr.hasNext(); ) {
    int keywordId = itr.next();
    int count = DataAccessCache.singleton().getKeywordCount(keywordId);
    keywordCounts.put(keywordId, count);
  }
  RunningTimer.recordEndTime("DataAccess:KWDocFreq", runId);
  return keywordCounts;
}
 
Example 11
Source File: DataAccess.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
public static TIntIntHashMap getUnitDocumentFrequencies(TIntSet keywords, UnitType unitType) throws EntityLinkingDataAccessException {
  logger.debug("Get Unit-document frequencies.");
  Integer runId = RunningTimer.recordStartTime("DataAccess:KWDocFreq");
  TIntIntHashMap keywordCounts = new TIntIntHashMap((int) (keywords.size() / Constants.DEFAULT_LOAD_FACTOR));
  for (TIntIterator itr = keywords.iterator(); itr.hasNext(); ) {
    int keywordId = itr.next();
    int count = DataAccessCache.singleton().getUnitCount(keywordId, unitType);
    keywordCounts.put(keywordId, count);
  }
  RunningTimer.recordEndTime("DataAccess:KWDocFreq", runId);
  return keywordCounts;
}
 
Example 12
Source File: FuzzySetSimJoin.java    From JedAIToolkit with Apache License 2.0 5 votes vote down vote up
private static float jaccard(int[] r, int[] s) {
    TIntSet nr = new TIntHashSet(r);
    TIntSet ns = new TIntHashSet(s);
    TIntSet intersection = new TIntHashSet(nr);
    intersection.retainAll(ns);
    TIntSet union = new TIntHashSet(nr);
    union.addAll(ns);
    return ((float) intersection.size()) / ((float) union.size());
}
 
Example 13
Source File: SDRCategoryEncoder.java    From htm.java with GNU Affero General Public License v3.0 5 votes vote down vote up
private int[] getSortedSample(final int populationSize, final int sampleLength) {
    TIntSet resultSet = new TIntHashSet();
    while (resultSet.size() < sampleLength) {
        resultSet.add(random.nextInt(populationSize));
    }
    int[] result = resultSet.toArray();
    Arrays.sort(result);
    return result;
}
 
Example 14
Source File: CocktailParty.java    From ambiverse-nlu with Apache License 2.0 4 votes vote down vote up
/**
 * Removes dangling mentions (where no candidate entity has a coherence edge)
 * from gaph. They will influence the minimum weighted degree but can
 * never be improved. Set the solution to the entity with the highest
 * mention-entity weight.
 *
 * @param solution Solution will be updated, setting the correct entity using
 *                local similarity for dangling mentions.
 * @return Node ids of nodes to remove.
 */
private TIntSet removeUnconnectedMentionEntityPairs(Graph g, Map<ResultMention, List<ResultEntity>> solution) {
  TIntSet mentionsToRemove = new TIntHashSet();
  for (int mentionId : g.getMentionNodesIds().values()) {
    GraphNode mentionNode = g.getNode(mentionId);
    Mention mention = (Mention) mentionNode.getNodeData();
    TIntDoubleHashMap entityCandidates = mentionNode.getSuccessors();
    if (entityCandidates.size() == 0) {
      continue;
    }
    // Remove all mentions without any entities that have coherence edges.
    if (g.isLocalMention(mentionId)) {
      logger.debug("local mention removed: " + mentionId + " " + mention);
      mentionsToRemove.add(mentionId);
      GraphTracer.gTracer.addMentionToDangling(g.getName(), mention.getMention(), mention.getCharOffset());
      // Set solution to best local candidate.
      Pair<Integer, Double> bestEntityScore = getBestLocalCandidateAndScore(entityCandidates);
      int bestEntity = bestEntityScore.getKey();
      double score = bestEntityScore.getValue();
      updateSolution(solution, g, mention, bestEntity, score);
    }

  }
  TIntSet entitiesToRemove = new TIntHashSet();
  // Remove entities that are only connected to removed mentions.
  for (int entityId : g.getEntityNodesIds().values()) {
    GraphNode entityNode = g.getNode(entityId);
    TIntDoubleHashMap successors = entityNode.getSuccessors();
    int removedCount = 0;
    for (TIntDoubleIterator itr = successors.iterator(); itr.hasNext(); ) {
      itr.advance();
      int neighborId = itr.key();
      if (mentionsToRemove.contains(neighborId)) {
        ++removedCount;
      }
    }
    if (removedCount == successors.size()) {
      entitiesToRemove.add(entityId);
    }
  }
  // Remove mentions + entity candidates from graph, trace.
  TIntSet nodesToRemove = new TIntHashSet(mentionsToRemove.size() + entitiesToRemove.size());
  nodesToRemove.addAll(mentionsToRemove);
  nodesToRemove.addAll(entitiesToRemove);
  return nodesToRemove;
}