com.carrotsearch.hppc.IntOpenHashSet Java Exaples

Source File: GeneralizationHierarchy.java From arx with Apache License 2.0

6 votes

/**
 * Returns the distinct values.
 *
 * @param level
 * @return
 */
public int[] getDistinctValues(final int level) {

    final IntOpenHashSet vals = new IntOpenHashSet();
    for (int k = 0; k < map.length; k++) {
        vals.add(map[k][level]);
    }

    final int[] result = new int[vals.size()];
    final int[] keys = vals.keys;
    final boolean[] allocated = vals.allocated;
    int index = 0;
    for (int i = 0; i < allocated.length; i++) {
        if (allocated[i]) {
            result[index++] = keys[i];
        }
    }
    return result;
}

Source File: BitSetBasedBooleanDocumentFrequencyDeterminer.java From Palmetto with GNU Affero General Public License v3.0

6 votes

private BitSet[] createBitSets(IntOpenHashSet hashSets[],
        IntOpenHashSet mergedHashSet) {
    BitSet bitSets[] = new BitSet[hashSets.length];
    for (int i = 0; i < bitSets.length; ++i) {
        bitSets[i] = new BitSet(mergedHashSet.size());
    }

    int pos = 0;
    for (int i = 0; i < mergedHashSet.keys.length; i++) {
        if (mergedHashSet.allocated[i]) {
            for (int j = 0; j < bitSets.length; ++j) {
                if (hashSets[j].contains(mergedHashSet.keys[i])) {
                    bitSets[j].set(pos);
                }
            }
            ++pos;
        }
    }

    return bitSets;
}

Source File: BitSetBasedBooleanDocumentFrequencyDeterminer.java From Palmetto with GNU Affero General Public License v3.0

6 votes

public CountedSubsets[] determineCounts(String[][] wordsets,
        SegmentationDefinition[] definitions) {
    ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping = new ObjectObjectOpenHashMap<String, IntOpenHashSet>();
    for (int i = 0; i < wordsets.length; ++i) {
        for (int j = 0; j < wordsets[i].length; ++j) {
            if (!wordDocMapping.containsKey(wordsets[i][j])) {
                wordDocMapping.put(wordsets[i][j], new IntOpenHashSet());
            }
        }
    }

    corpusAdapter.getDocumentsWithWordsAsSet(wordDocMapping);

    CountedSubsets countedSubsets[] = new CountedSubsets[definitions.length];
    for (int i = 0; i < definitions.length; ++i) {
        /*countedSubsets[i] = new CountedSubsets(definitions[i].segments,
                definitions[i].conditions, createCounts(
                        createBitSets(wordDocMapping, wordsets[i]),
                        definitions[i].neededCounts));*/
        countedSubsets[i] = new CountedSubsets(definitions[i].segments,
                definitions[i].conditions, createCounts(
                createBitSets(wordDocMapping, wordsets[i])));
    }
    return countedSubsets;
}

Source File: LuceneCorpusAdapter.java From Palmetto with GNU Affero General Public License v3.0

6 votes

@Override
public void getDocumentsWithWordAsSet(String word, IntOpenHashSet documents) {
    DocsEnum docs = null;
    Term term = new Term(fieldName, word);
    try {
        int baseDocId;
        for (int i = 0; i < reader.length; i++) {
            docs = reader[i].termDocsEnum(term);
            baseDocId = contexts[i].docBase;
            if (docs != null) {
                while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
                    documents.add(baseDocId + docs.docID());
                }
            }
        }
    } catch (IOException e) {
        LOGGER.error("Error while requesting documents for word \"" + word + "\".", e);
    }
}

Source File: LuceneCorpusAdapter.java From Palmetto with GNU Affero General Public License v3.0

5 votes

public void getDocumentsWithWordsAsSet(ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping) {
    Object keys[] = (Object[]) wordDocMapping.keys;
    Object values[] = (Object[]) wordDocMapping.values;
    for (int i = 0; i < wordDocMapping.allocated.length; ++i) {
        if (wordDocMapping.allocated[i]) {
            getDocumentsWithWordAsSet((String) keys[i], (IntOpenHashSet) values[i]);
        }
    }
}

Source File: AbstractBooleanDocumentSupportingAdapterBasedTest.java From Palmetto with GNU Affero General Public License v3.0

5 votes

@Override
public void getDocumentsWithWordsAsSet(ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping) {
    Object keys[] = (Object[]) wordDocMapping.keys;
    Object values[] = (Object[]) wordDocMapping.values;
    for (int i = 0; i < wordDocMapping.allocated.length; ++i) {
        if (wordDocMapping.allocated[i]) {
            ((IntOpenHashSet) values[i]).add(wordDocuments[Integer.parseInt((String) keys[i])]);
        }
    }
}

Source File: AbstractSegmentatorTest.java From Palmetto with GNU Affero General Public License v3.0

5 votes

private void compare(SegmentationDefinition definition,
                     IntObjectOpenHashMap<IntOpenHashSet> segmentToConditionMapping) {
    IntOpenHashSet conditionSet;

    Assert.assertEquals(segmentToConditionMapping.size(),
            definition.segments.length);
    for (int i = 0; i < definition.segments.length; i++) {
        Assert.assertTrue("got unexpected segment "
                + definition.segments[i], segmentToConditionMapping
                .containsKey(definition.segments[i]));
        conditionSet = segmentToConditionMapping
                .get(definition.segments[i]);
        for (int j = 0; j < definition.conditions[i].length; ++j) {
            Assert.assertEquals(
                    "expected " + conditionSet.size() + " conditions "
                            + conditionSet.toString()
                            + " for segment ["
                            + definition.segments[i] + "] but got " + definition.conditions[i].length + " "
                            + Arrays.toString(definition.conditions[i]) + ".",
                    conditionSet.size(),
                    definition.conditions[i].length);
            Assert.assertTrue("got unexpected condition "
                    + definition.conditions[i][j] + " for segment "
                    + definition.segments[i],
                    conditionSet.contains(definition.conditions[i][j]));
        }
    }

    // Assert.assertEquals(neededCounts, definition.neededCounts);
}

Source File: AbstractSegmentatorTest.java From Palmetto with GNU Affero General Public License v3.0

5 votes

public void testSubsetCreator(int wordsetSize, Segmentator subsetCreator,
            int expectedSegments[], int expectedConditions[][]) {
        IntObjectOpenHashMap<IntOpenHashSet> segmentToConditionMapping = new IntObjectOpenHashMap<IntOpenHashSet>();
        BitSet neededCounts = new BitSet();
        createSets(expectedSegments, expectedConditions,
                segmentToConditionMapping, neededCounts);

        SegmentationDefinition definition = subsetCreator.getSubsetDefinition(wordsetSize);

//        compare(definition, segmentToConditionMapping, neededCounts);
        compare(definition, segmentToConditionMapping);
    }

Source File: BitSetBasedBooleanDocumentFrequencyDeterminer.java From Palmetto with GNU Affero General Public License v3.0

5 votes

private BitSet[] createBitSets(
        ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping,
        String[] wordset) {
    IntOpenHashSet hashSets[] = new IntOpenHashSet[wordset.length];
    IntOpenHashSet mergedHashSet = new IntOpenHashSet();
    for (int i = 0; i < hashSets.length; ++i) {
        hashSets[i] = wordDocMapping.get(wordset[i]);
        mergedHashSet.addAll(hashSets[i]);
    }
    return createBitSets(hashSets, mergedHashSet);
}

Source File: PalmettoApplication.java From Palmetto with GNU Affero General Public License v3.0

5 votes

@RequestMapping(value = "df")
public ResponseEntity<byte[]> requestDocFreq(@RequestParam(value = "words") String words) {
    if (luceneAdapter instanceof BooleanDocumentSupportingAdapter) {
        String array[] = words.split(WORD_SEPARATOR);
        IntOpenHashSet documentIds = new IntOpenHashSet();
        IntBuffer buffers[] = new IntBuffer[array.length];
        int completeLength = 0;
        for (int j = 0; j < array.length; ++j) {
            documentIds.clear();
            ((BooleanDocumentSupportingAdapter) luceneAdapter).getDocumentsWithWordAsSet(array[j], documentIds);
            completeLength += (4 * documentIds.size()) + 4;
            buffers[j] = IntBuffer.allocate(documentIds.size());
            if (documentIds.size() > 0) {
                for (int i = 0; i < documentIds.keys.length; ++i) {
                    if (documentIds.allocated[i]) {
                        buffers[j].put(documentIds.keys[i]);
                    }
                }
            }
        }
        ByteBuffer response = ByteBuffer.allocate(completeLength);
        IntBuffer intView = response.asIntBuffer();
        for (int j = 0; j < buffers.length; ++j) {
            intView.put(buffers[j].capacity());
            intView.put(buffers[j].array());
        }
        return new ResponseEntity<byte[]>(response.array(), HttpStatus.OK);
    } else {
        return new ResponseEntity<>(HttpStatus.NOT_IMPLEMENTED);
    }
}

Source File: Connect.java From jopenfst with MIT License

5 votes

private static void dfsForward(MutableState start, IntOpenHashSet accessible) {
  accessible.add(start.getId());
  for (MutableArc arc : start.getArcs()) {
    MutableState nextState = arc.getNextState();
    if (!accessible.contains(nextState.getId())) {
      dfsForward(nextState, accessible);
    }
  }
}

Source File: Connect.java From jopenfst with MIT License

5 votes

private static void dfsBackward(MutableState state, IntOpenHashSet coaccessible) {
  coaccessible.add(state.getId());
  for (MutableState incoming : state.getIncomingStates()) {
    if (!coaccessible.contains(incoming.getId())) {
      dfsBackward(incoming, coaccessible);
    }
  }
}

Source File: ClassifiedClassNode.java From gerbil with GNU Affero General Public License v3.0

4 votes

public IntOpenHashSet getClassIds() {
    return classIds;
}

Source File: NShortestPaths.java From jopenfst with MIT License

4 votes

/**
 * Calculates the shortest distances from each state to the final.
 *
 * See: M. Mohri, "Semiring Framework and Algorithms for Shortest-Distance Problems", Journal of Automata, Languages
 * and Combinatorics, 7(3), pp. 321-350, 2002.
 *
 * @param fst the fst to calculate the shortest distances
 * @return the array containing the shortest distances
 */
private static double[] shortestDistance(Fst fst) {

  Fst reversed = Reverse.reverse(fst);

  double[] d = new double[reversed.getStateCount()];
  double[] r = new double[reversed.getStateCount()];

  Semiring semiring = reversed.getSemiring();

  for (int i = 0; i < d.length; i++) {
    d[i] = semiring.zero();
    r[i] = semiring.zero();
  }

  IntObjectOpenHashMap<State> stateMap = new IntObjectOpenHashMap<>();
  Deque<Integer> queue = new LinkedList<>();
  IntOpenHashSet enqueuedStateIds = new IntOpenHashSet();

  queue.addLast(reversed.getStartState().getId());
  stateMap.put(reversed.getStartState().getId(), reversed.getStartState());

  d[reversed.getStartState().getId()] = semiring.one();
  r[reversed.getStartState().getId()] = semiring.one();

  while (!queue.isEmpty()) {
    int thisStateId = queue.removeFirst();
    enqueuedStateIds.remove(thisStateId);
    State thisState = stateMap.get(thisStateId);
    double rnew = r[thisState.getId()];
    r[thisState.getId()] = semiring.zero();

    for (int i = 0; i < thisState.getArcCount(); i++) {
      Arc arc = thisState.getArc(i);
      State nextState = arc.getNextState();
      double dnext = d[arc.getNextState().getId()];
      double dnextnew = semiring.plus(dnext, semiring.times(rnew, arc.getWeight()));
      if (dnext != dnextnew) {
        d[arc.getNextState().getId()] = dnextnew;
        r[arc.getNextState().getId()] = semiring.plus(r[arc.getNextState().getId()], semiring.times(rnew, arc.getWeight()));
        int nextStateId = nextState.getId();
        if (!enqueuedStateIds.contains(nextStateId)) {
          queue.addLast(nextStateId);
          enqueuedStateIds.add(nextStateId);
          stateMap.put(nextStateId, nextState);
        }
      }
    }
  }
  return d;
}

Source File: AbstractBooleanDocumentSupportingAdapterBasedTest.java From Palmetto with GNU Affero General Public License v3.0

4 votes

@Override
public void getDocumentsWithWordAsSet(String word, IntOpenHashSet documents) {
    documents.add(wordDocuments[Integer.parseInt(word)]);
}

Source File: ClassifiedClassNode.java From gerbil with GNU Affero General Public License v3.0

4 votes

public void setClassIds(IntOpenHashSet classIds) {
    this.classIds = classIds;
}

Source File: BooleanDocumentSupportingAdapter.java From Palmetto with GNU Affero General Public License v3.0

2 votes

/**
 * Determines the documents containing the given word. The ids of the found
 * documents are inserted into the given set.
 * 
 * @param word
 *            the word which should be searched
 * @param documents
 *            the set in which the document ids will be stored
 */
public void getDocumentsWithWordAsSet(String word, IntOpenHashSet documents);

Source File: BooleanDocumentSupportingAdapter.java From Palmetto with GNU Affero General Public License v3.0

2 votes

/**
 * Determines the documents containing the words used as key in the given
 * map. The resulting sets contain the ids of the documents and are inserted
 * into the map.
 * 
 * @param wordDocMapping
 *            a mapping of words to documents in which the results are
 *            stored
 */
public void getDocumentsWithWordsAsSet(ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping);

com.carrotsearch.hppc.IntOpenHashSet Java Examples