com.carrotsearch.hppc.IntObjectOpenHashMap Java Examples
The following examples show how to use
com.carrotsearch.hppc.IntObjectOpenHashMap.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AbstractSymbolTable.java From jopenfst with MIT License | 5 votes |
protected AbstractSymbolTable(SymbolTable copyFrom) { this.symbolToId = new ObjectIntOpenHashMap<>(copyFrom.size()); this.idToSymbol = new IntObjectOpenHashMap<>(copyFrom.size()); for (ObjectIntCursor<String> cursor : copyFrom) { symbolToId.put(cursor.key, cursor.value); idToSymbol.put(cursor.value, cursor.key); } }
Example #2
Source File: AbstractVectorBasedCalculation.java From Palmetto with GNU Affero General Public License v3.0 | 5 votes |
@Override public double[] calculateConfirmationValues(SubsetVectors subsetVectors) { int pos = 0; for (int i = 0; i < subsetVectors.segments.length; ++i) { pos += subsetVectors.conditions[i].length; } double values[] = new double[pos]; IntObjectOpenHashMap<double[]> vectorCache = new IntObjectOpenHashMap<double[]>(); for (int i = 0; i < subsetVectors.vectors.length; ++i) { vectorCache.put(1 << i, subsetVectors.vectors[i]); } double segmentVector[], conditionVector[]; pos = 0; for (int i = 0; i < subsetVectors.segments.length; ++i) { if (vectorCache.containsKey(subsetVectors.segments[i])) { segmentVector = vectorCache.lget(); } else { segmentVector = createVector(subsetVectors.segments[i], subsetVectors.vectors); vectorCache.put(subsetVectors.segments[i], segmentVector); } for (int j = 0; j < subsetVectors.conditions[i].length; ++j) { if (vectorCache.containsKey(subsetVectors.conditions[i][j])) { conditionVector = vectorCache.lget(); } else { conditionVector = createVector(subsetVectors.conditions[i][j], subsetVectors.vectors); vectorCache.put(subsetVectors.conditions[i][j], conditionVector); } values[pos] = calculateSimilarity(segmentVector, conditionVector); ++pos; } } return values; }
Example #3
Source File: CentroidConfirmationMeasure.java From Palmetto with GNU Affero General Public License v3.0 | 5 votes |
@Override public double[] calculateConfirmationValues(SubsetVectors subsetVectors) { double values[] = new double[subsetVectors.segments.length]; double centroid[] = new double[subsetVectors.vectors[0].length]; IntObjectOpenHashMap<double[]> vectorCache = new IntObjectOpenHashMap<double[]>(); for (int i = 0; i < subsetVectors.vectors.length; ++i) { vectorCache.put(1 << i, subsetVectors.vectors[i]); for (int j = 0; j < centroid.length; j++) { centroid[j] += subsetVectors.vectors[i][j]; } } // for (int j = 0; j < centroid.length; j++) { // centroid[j] /= subsetVectors.vectors.length; // } double segmentVector[]; for (int i = 0; i < subsetVectors.segments.length; ++i) { if (vectorCache.containsKey(subsetVectors.segments[i])) { segmentVector = vectorCache.lget(); } else { segmentVector = createVector(subsetVectors.segments[i], subsetVectors.vectors); vectorCache.put(subsetVectors.segments[i], segmentVector); } values[i] = calculateSimilarity(segmentVector, centroid); } return values; }
Example #4
Source File: WindowSupportingLuceneCorpusAdapter.java From Palmetto with GNU Affero General Public License v3.0 | 5 votes |
@Override public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words, IntIntOpenHashMap docLengths) { IntObjectOpenHashMap<IntArrayList[]> positionsInDocs = new IntObjectOpenHashMap<IntArrayList[]>(); for (int i = 0; i < words.length; ++i) { requestDocumentsWithWord(words[i], positionsInDocs, docLengths, i, words.length); } return positionsInDocs; }
Example #5
Source File: AbstractWindowBasedFrequencyDeterminer.java From Palmetto with GNU Affero General Public License v3.0 | 5 votes |
protected int[] determineCounts(String wordset[]) { int counts[] = new int[(1 << wordset.length)]; IntArrayList positions[]; IntIntOpenHashMap docLengths = new IntIntOpenHashMap(); IntObjectOpenHashMap<IntArrayList[]> positionsInDocs = corpusAdapter.requestWordPositionsInDocuments(wordset, docLengths); for (int i = 0; i < positionsInDocs.keys.length; ++i) { if (positionsInDocs.allocated[i]) { positions = ((IntArrayList[]) ((Object[]) positionsInDocs.values)[i]); addCountsFromDocument(positions, counts, docLengths.get(positionsInDocs.keys[i])); } } return counts; }
Example #6
Source File: AbstractSegmentatorTest.java From Palmetto with GNU Affero General Public License v3.0 | 5 votes |
public void testSubsetCreator(int wordsetSize, Segmentator subsetCreator, int expectedSegments[], int expectedConditions[][]) { IntObjectOpenHashMap<IntOpenHashSet> segmentToConditionMapping = new IntObjectOpenHashMap<IntOpenHashSet>(); BitSet neededCounts = new BitSet(); createSets(expectedSegments, expectedConditions, segmentToConditionMapping, neededCounts); SegmentationDefinition definition = subsetCreator.getSubsetDefinition(wordsetSize); // compare(definition, segmentToConditionMapping, neededCounts); compare(definition, segmentToConditionMapping); }
Example #7
Source File: AbstractSegmentatorTest.java From Palmetto with GNU Affero General Public License v3.0 | 5 votes |
private void compare(SegmentationDefinition definition, IntObjectOpenHashMap<IntOpenHashSet> segmentToConditionMapping) { IntOpenHashSet conditionSet; Assert.assertEquals(segmentToConditionMapping.size(), definition.segments.length); for (int i = 0; i < definition.segments.length; i++) { Assert.assertTrue("got unexpected segment " + definition.segments[i], segmentToConditionMapping .containsKey(definition.segments[i])); conditionSet = segmentToConditionMapping .get(definition.segments[i]); for (int j = 0; j < definition.conditions[i].length; ++j) { Assert.assertEquals( "expected " + conditionSet.size() + " conditions " + conditionSet.toString() + " for segment [" + definition.segments[i] + "] but got " + definition.conditions[i].length + " " + Arrays.toString(definition.conditions[i]) + ".", conditionSet.size(), definition.conditions[i].length); Assert.assertTrue("got unexpected condition " + definition.conditions[i][j] + " for segment " + definition.segments[i], conditionSet.contains(definition.conditions[i][j])); } } // Assert.assertEquals(neededCounts, definition.neededCounts); }
Example #8
Source File: BooleanSlidingWindowProbabilitySupplierTest.java From Palmetto with GNU Affero General Public License v3.0 | 5 votes |
@Override public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words, IntIntOpenHashMap docLengths) { IntObjectOpenHashMap<IntArrayList[]> positionsInDocuments = new IntObjectOpenHashMap<IntArrayList[]>(); IntArrayList[] positionsInDocument = new IntArrayList[positions.length]; for (int i = 0; i < positionsInDocument.length; ++i) { if (positions[i].length > 0) { positionsInDocument[i] = new IntArrayList(); positionsInDocument[i].add(positions[i]); } } positionsInDocuments.put(0, positionsInDocument); docLengths.put(0, docLength); return positionsInDocuments; }
Example #9
Source File: ContextWindowFrequencyDeterminerCountingTest.java From Palmetto with GNU Affero General Public License v3.0 | 5 votes |
@Override public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words, IntIntOpenHashMap docLengths) { IntObjectOpenHashMap<IntArrayList[]> positionsInDocuments = new IntObjectOpenHashMap<IntArrayList[]>(); IntArrayList[] positionsInDocument = new IntArrayList[positions.length]; for (int i = 0; i < positionsInDocument.length; ++i) { if ((positions[i] != null) && (positions[i].length > 0)) { positionsInDocument[i] = new IntArrayList(); positionsInDocument[i].add(positions[i]); } } positionsInDocuments.put(0, positionsInDocument); docLengths.put(0, docLength); return positionsInDocuments; }
Example #10
Source File: BooleanSlidingWindowFrequencyDeterminerCountingTest.java From Palmetto with GNU Affero General Public License v3.0 | 5 votes |
@Override public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words, IntIntOpenHashMap docLengths) { IntObjectOpenHashMap<IntArrayList[]> positionsInDocuments = new IntObjectOpenHashMap<IntArrayList[]>(); IntArrayList[] positionsInDocument = new IntArrayList[positions.length]; for (int i = 0; i < positionsInDocument.length; ++i) { if ((positions[i] != null) && (positions[i].length > 0)) { positionsInDocument[i] = new IntArrayList(); positionsInDocument[i].add(positions[i]); } } positionsInDocuments.put(0, positionsInDocument); docLengths.put(0, docLength); return positionsInDocuments; }
Example #11
Source File: NShortestPaths.java From jopenfst with MIT License | 4 votes |
/** * Calculates the shortest distances from each state to the final. * * See: M. Mohri, "Semiring Framework and Algorithms for Shortest-Distance Problems", Journal of Automata, Languages * and Combinatorics, 7(3), pp. 321-350, 2002. * * @param fst the fst to calculate the shortest distances * @return the array containing the shortest distances */ private static double[] shortestDistance(Fst fst) { Fst reversed = Reverse.reverse(fst); double[] d = new double[reversed.getStateCount()]; double[] r = new double[reversed.getStateCount()]; Semiring semiring = reversed.getSemiring(); for (int i = 0; i < d.length; i++) { d[i] = semiring.zero(); r[i] = semiring.zero(); } IntObjectOpenHashMap<State> stateMap = new IntObjectOpenHashMap<>(); Deque<Integer> queue = new LinkedList<>(); IntOpenHashSet enqueuedStateIds = new IntOpenHashSet(); queue.addLast(reversed.getStartState().getId()); stateMap.put(reversed.getStartState().getId(), reversed.getStartState()); d[reversed.getStartState().getId()] = semiring.one(); r[reversed.getStartState().getId()] = semiring.one(); while (!queue.isEmpty()) { int thisStateId = queue.removeFirst(); enqueuedStateIds.remove(thisStateId); State thisState = stateMap.get(thisStateId); double rnew = r[thisState.getId()]; r[thisState.getId()] = semiring.zero(); for (int i = 0; i < thisState.getArcCount(); i++) { Arc arc = thisState.getArc(i); State nextState = arc.getNextState(); double dnext = d[arc.getNextState().getId()]; double dnextnew = semiring.plus(dnext, semiring.times(rnew, arc.getWeight())); if (dnext != dnextnew) { d[arc.getNextState().getId()] = dnextnew; r[arc.getNextState().getId()] = semiring.plus(r[arc.getNextState().getId()], semiring.times(rnew, arc.getWeight())); int nextStateId = nextState.getId(); if (!enqueuedStateIds.contains(nextStateId)) { queue.addLast(nextStateId); enqueuedStateIds.add(nextStateId); stateMap.put(nextStateId, nextState); } } } } return d; }
Example #12
Source File: AbstractSymbolTable.java From jopenfst with MIT License | 4 votes |
protected AbstractSymbolTable() { this.symbolToId = new ObjectIntOpenHashMap<>(); this.idToSymbol = new IntObjectOpenHashMap<>(); }
Example #13
Source File: WindowSupportingLuceneCorpusAdapter.java From Palmetto with GNU Affero General Public License v3.0 | 4 votes |
protected void requestDocumentsWithWord(String word, IntObjectOpenHashMap<IntArrayList[]> positionsInDocs, IntIntOpenHashMap docLengths, int wordId, int numberOfWords) { DocsAndPositionsEnum docPosEnum = null; Term term = new Term(fieldName, word); int localDocId, globalDocId, baseDocId; IntArrayList positions[]; try { for (int i = 0; i < reader.length; i++) { docPosEnum = reader[i].termPositionsEnum(term); baseDocId = contexts[i].docBase; if (docPosEnum != null) { while (docPosEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) { localDocId = docPosEnum.docID(); globalDocId = localDocId + baseDocId; // if this is the first word and we found a new document if (!positionsInDocs.containsKey(globalDocId)) { positions = new IntArrayList[numberOfWords]; positionsInDocs.put(globalDocId, positions); } else { positions = positionsInDocs.get(globalDocId); } if (positions[wordId] == null) { positions[wordId] = new IntArrayList(); } // Go through the positions inside this document for (int p = 0; p < docPosEnum.freq(); ++p) { positions[wordId].add(docPosEnum.nextPosition()); } if (!docLengths.containsKey(globalDocId)) { // Get the length of the document docLengths.put(globalDocId, reader[i].document(localDocId).getField(docLengthFieldName) .numericValue().intValue()); } } } } } catch (IOException e) { LOGGER.error("Error while requesting documents for word \"" + word + "\".", e); } }
Example #14
Source File: PositionStoringLuceneIndexCreatorTest.java From Palmetto with GNU Affero General Public License v3.0 | 4 votes |
@Test public void test() throws CorruptIndexException, IOException { File indexDir = new File( FileUtils.getTempDirectoryPath() + File.separator + "temp_index" + Long.toString(System.nanoTime())); Assert.assertTrue(indexDir.mkdir()); Iterator<IndexableDocument> docIterator = Arrays.asList(DOCUMENTS).iterator(); // create the index PositionStoringLuceneIndexCreator creator = new PositionStoringLuceneIndexCreator( Palmetto.DEFAULT_TEXT_INDEX_FIELD_NAME, Palmetto.DEFAULT_DOCUMENT_LENGTH_INDEX_FIELD_NAME); Assert.assertTrue(creator.createIndex(indexDir, docIterator)); LuceneIndexHistogramCreator hCreator = new LuceneIndexHistogramCreator( Palmetto.DEFAULT_DOCUMENT_LENGTH_INDEX_FIELD_NAME); hCreator.createLuceneIndexHistogram(indexDir.getAbsolutePath()); // test the created index // create an adapter WindowSupportingLuceneCorpusAdapter adapter = null; try { adapter = WindowSupportingLuceneCorpusAdapter.create(indexDir.getAbsolutePath(), Palmetto.DEFAULT_TEXT_INDEX_FIELD_NAME, Palmetto.DEFAULT_DOCUMENT_LENGTH_INDEX_FIELD_NAME); // query the test words IntIntOpenHashMap docLengths = new IntIntOpenHashMap(); IntObjectOpenHashMap<IntArrayList[]> wordPositions = adapter.requestWordPositionsInDocuments(TEST_WORDS, docLengths); // compare the result with the expected counts int positionInDoc; IntArrayList[] positionsInDocs; for (int i = 0; i < EXPECTED_WORD_POSITIONS.length; ++i) { positionsInDocs = wordPositions.get(i); for (int j = 0; j < positionsInDocs.length; ++j) { if (EXPECTED_WORD_POSITIONS[i][j] < 0) { Assert.assertNull("Expected null because the word \"" + TEST_WORDS[j] + "\" shouldn't be found inside document " + i + ". But got a position list instead.", positionsInDocs[j]); } else { Assert.assertEquals(1, positionsInDocs[j].elementsCount); positionInDoc = positionsInDocs[j].buffer[0]; Assert.assertEquals("Expected the word \"" + TEST_WORDS[j] + "\" in document " + i + " at position " + EXPECTED_WORD_POSITIONS[i][j] + " but got position " + positionInDoc + " form the index.", EXPECTED_WORD_POSITIONS[i][j], positionInDoc); } } } // test the window based counting BooleanSlidingWindowFrequencyDeterminer determiner = new BooleanSlidingWindowFrequencyDeterminer(adapter, WINDOW_SIZE); CountedSubsets subsets = determiner.determineCounts(new String[][] { TEST_WORDS }, new SegmentationDefinition[] { new SegmentationDefinition(new int[0], new int[0][0], null) })[0]; Assert.assertArrayEquals(EXPECTED_COUNTS, subsets.counts); } finally { if (adapter != null) { adapter.close(); } } }
Example #15
Source File: BooleanSlidingWindowFrequencyDeterminerSumCreationTest.java From Palmetto with GNU Affero General Public License v3.0 | 4 votes |
@Override public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words, IntIntOpenHashMap docLengths) { return null; }
Example #16
Source File: WindowSupportingAdapter.java From Palmetto with GNU Affero General Public License v3.0 | 2 votes |
/** * Returns the positions of the given words inside the corpus. * * @param words * the words for which the positions inside the documents should * be determined * @param docLengths * empty int int map in which the document lengths and counts are * inserted * @return the positions of the given words inside the corpus */ public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String words[], IntIntOpenHashMap docLengths);