com.carrotsearch.hppc.IntObjectOpenHashMap Java Exaples

Source File: AbstractSymbolTable.java From jopenfst with MIT License

5 votes

protected AbstractSymbolTable(SymbolTable copyFrom) {

    this.symbolToId = new ObjectIntOpenHashMap<>(copyFrom.size());
    this.idToSymbol = new IntObjectOpenHashMap<>(copyFrom.size());
    for (ObjectIntCursor<String> cursor : copyFrom) {
      symbolToId.put(cursor.key, cursor.value);
      idToSymbol.put(cursor.value, cursor.key);
    }
  }

Source File: AbstractVectorBasedCalculation.java From Palmetto with GNU Affero General Public License v3.0

5 votes

@Override
public double[] calculateConfirmationValues(SubsetVectors subsetVectors) {
    int pos = 0;
    for (int i = 0; i < subsetVectors.segments.length; ++i) {
        pos += subsetVectors.conditions[i].length;
    }
    double values[] = new double[pos];

    IntObjectOpenHashMap<double[]> vectorCache = new IntObjectOpenHashMap<double[]>();
    for (int i = 0; i < subsetVectors.vectors.length; ++i) {
        vectorCache.put(1 << i, subsetVectors.vectors[i]);
    }
    double segmentVector[],
            conditionVector[];
    pos = 0;
    for (int i = 0; i < subsetVectors.segments.length; ++i) {
        if (vectorCache.containsKey(subsetVectors.segments[i])) {
            segmentVector = vectorCache.lget();
        } else {
            segmentVector = createVector(subsetVectors.segments[i], subsetVectors.vectors);
            vectorCache.put(subsetVectors.segments[i], segmentVector);
        }
        for (int j = 0; j < subsetVectors.conditions[i].length; ++j) {
            if (vectorCache.containsKey(subsetVectors.conditions[i][j])) {
                conditionVector = vectorCache.lget();
            } else {
                conditionVector = createVector(subsetVectors.conditions[i][j], subsetVectors.vectors);
                vectorCache.put(subsetVectors.conditions[i][j], conditionVector);
            }
            values[pos] = calculateSimilarity(segmentVector, conditionVector);
            ++pos;
        }
    }
    return values;
}

Source File: CentroidConfirmationMeasure.java From Palmetto with GNU Affero General Public License v3.0

5 votes

@Override
public double[] calculateConfirmationValues(SubsetVectors subsetVectors) {
    double values[] = new double[subsetVectors.segments.length];

    double centroid[] = new double[subsetVectors.vectors[0].length];
    IntObjectOpenHashMap<double[]> vectorCache = new IntObjectOpenHashMap<double[]>();
    for (int i = 0; i < subsetVectors.vectors.length; ++i) {
        vectorCache.put(1 << i, subsetVectors.vectors[i]);
        for (int j = 0; j < centroid.length; j++) {
            centroid[j] += subsetVectors.vectors[i][j];
        }
    }
    // for (int j = 0; j < centroid.length; j++) {
    // centroid[j] /= subsetVectors.vectors.length;
    // }
    double segmentVector[];
    for (int i = 0; i < subsetVectors.segments.length; ++i) {
        if (vectorCache.containsKey(subsetVectors.segments[i])) {
            segmentVector = vectorCache.lget();
        } else {
            segmentVector = createVector(subsetVectors.segments[i], subsetVectors.vectors);
            vectorCache.put(subsetVectors.segments[i], segmentVector);
        }
        values[i] = calculateSimilarity(segmentVector, centroid);
    }
    return values;
}

Source File: WindowSupportingLuceneCorpusAdapter.java From Palmetto with GNU Affero General Public License v3.0

5 votes

@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocs = new IntObjectOpenHashMap<IntArrayList[]>();
    for (int i = 0; i < words.length; ++i) {
        requestDocumentsWithWord(words[i], positionsInDocs, docLengths, i, words.length);
    }
    return positionsInDocs;
}

Source File: AbstractWindowBasedFrequencyDeterminer.java From Palmetto with GNU Affero General Public License v3.0

5 votes

protected int[] determineCounts(String wordset[]) {
    int counts[] = new int[(1 << wordset.length)];
    IntArrayList positions[];
    IntIntOpenHashMap docLengths = new IntIntOpenHashMap();
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocs = corpusAdapter.requestWordPositionsInDocuments(wordset,
            docLengths);
    for (int i = 0; i < positionsInDocs.keys.length; ++i) {
        if (positionsInDocs.allocated[i]) {
            positions = ((IntArrayList[]) ((Object[]) positionsInDocs.values)[i]);
            addCountsFromDocument(positions, counts, docLengths.get(positionsInDocs.keys[i]));
        }
    }
    return counts;
}

Source File: AbstractSegmentatorTest.java From Palmetto with GNU Affero General Public License v3.0

5 votes

public void testSubsetCreator(int wordsetSize, Segmentator subsetCreator,
            int expectedSegments[], int expectedConditions[][]) {
        IntObjectOpenHashMap<IntOpenHashSet> segmentToConditionMapping = new IntObjectOpenHashMap<IntOpenHashSet>();
        BitSet neededCounts = new BitSet();
        createSets(expectedSegments, expectedConditions,
                segmentToConditionMapping, neededCounts);

        SegmentationDefinition definition = subsetCreator.getSubsetDefinition(wordsetSize);

//        compare(definition, segmentToConditionMapping, neededCounts);
        compare(definition, segmentToConditionMapping);
    }

Source File: AbstractSegmentatorTest.java From Palmetto with GNU Affero General Public License v3.0

5 votes

private void compare(SegmentationDefinition definition,
                     IntObjectOpenHashMap<IntOpenHashSet> segmentToConditionMapping) {
    IntOpenHashSet conditionSet;

    Assert.assertEquals(segmentToConditionMapping.size(),
            definition.segments.length);
    for (int i = 0; i < definition.segments.length; i++) {
        Assert.assertTrue("got unexpected segment "
                + definition.segments[i], segmentToConditionMapping
                .containsKey(definition.segments[i]));
        conditionSet = segmentToConditionMapping
                .get(definition.segments[i]);
        for (int j = 0; j < definition.conditions[i].length; ++j) {
            Assert.assertEquals(
                    "expected " + conditionSet.size() + " conditions "
                            + conditionSet.toString()
                            + " for segment ["
                            + definition.segments[i] + "] but got " + definition.conditions[i].length + " "
                            + Arrays.toString(definition.conditions[i]) + ".",
                    conditionSet.size(),
                    definition.conditions[i].length);
            Assert.assertTrue("got unexpected condition "
                    + definition.conditions[i][j] + " for segment "
                    + definition.segments[i],
                    conditionSet.contains(definition.conditions[i][j]));
        }
    }

    // Assert.assertEquals(neededCounts, definition.neededCounts);
}

Source File: BooleanSlidingWindowProbabilitySupplierTest.java From Palmetto with GNU Affero General Public License v3.0

5 votes

@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocuments = new IntObjectOpenHashMap<IntArrayList[]>();
    IntArrayList[] positionsInDocument = new IntArrayList[positions.length];
    for (int i = 0; i < positionsInDocument.length; ++i) {
        if (positions[i].length > 0) {
            positionsInDocument[i] = new IntArrayList();
            positionsInDocument[i].add(positions[i]);
        }
    }
    positionsInDocuments.put(0, positionsInDocument);
    docLengths.put(0, docLength);
    return positionsInDocuments;
}

Source File: ContextWindowFrequencyDeterminerCountingTest.java From Palmetto with GNU Affero General Public License v3.0

5 votes

@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocuments = new IntObjectOpenHashMap<IntArrayList[]>();
    IntArrayList[] positionsInDocument = new IntArrayList[positions.length];
    for (int i = 0; i < positionsInDocument.length; ++i) {
        if ((positions[i] != null) && (positions[i].length > 0)) {
            positionsInDocument[i] = new IntArrayList();
            positionsInDocument[i].add(positions[i]);
        }
    }
    positionsInDocuments.put(0, positionsInDocument);
    docLengths.put(0, docLength);
    return positionsInDocuments;
}

Source File: BooleanSlidingWindowFrequencyDeterminerCountingTest.java From Palmetto with GNU Affero General Public License v3.0

5 votes

@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocuments = new IntObjectOpenHashMap<IntArrayList[]>();
    IntArrayList[] positionsInDocument = new IntArrayList[positions.length];
    for (int i = 0; i < positionsInDocument.length; ++i) {
        if ((positions[i] != null) && (positions[i].length > 0)) {
            positionsInDocument[i] = new IntArrayList();
            positionsInDocument[i].add(positions[i]);
        }
    }
    positionsInDocuments.put(0, positionsInDocument);
    docLengths.put(0, docLength);
    return positionsInDocuments;
}

Source File: NShortestPaths.java From jopenfst with MIT License

4 votes

/**
 * Calculates the shortest distances from each state to the final.
 *
 * See: M. Mohri, "Semiring Framework and Algorithms for Shortest-Distance Problems", Journal of Automata, Languages
 * and Combinatorics, 7(3), pp. 321-350, 2002.
 *
 * @param fst the fst to calculate the shortest distances
 * @return the array containing the shortest distances
 */
private static double[] shortestDistance(Fst fst) {

  Fst reversed = Reverse.reverse(fst);

  double[] d = new double[reversed.getStateCount()];
  double[] r = new double[reversed.getStateCount()];

  Semiring semiring = reversed.getSemiring();

  for (int i = 0; i < d.length; i++) {
    d[i] = semiring.zero();
    r[i] = semiring.zero();
  }

  IntObjectOpenHashMap<State> stateMap = new IntObjectOpenHashMap<>();
  Deque<Integer> queue = new LinkedList<>();
  IntOpenHashSet enqueuedStateIds = new IntOpenHashSet();

  queue.addLast(reversed.getStartState().getId());
  stateMap.put(reversed.getStartState().getId(), reversed.getStartState());

  d[reversed.getStartState().getId()] = semiring.one();
  r[reversed.getStartState().getId()] = semiring.one();

  while (!queue.isEmpty()) {
    int thisStateId = queue.removeFirst();
    enqueuedStateIds.remove(thisStateId);
    State thisState = stateMap.get(thisStateId);
    double rnew = r[thisState.getId()];
    r[thisState.getId()] = semiring.zero();

    for (int i = 0; i < thisState.getArcCount(); i++) {
      Arc arc = thisState.getArc(i);
      State nextState = arc.getNextState();
      double dnext = d[arc.getNextState().getId()];
      double dnextnew = semiring.plus(dnext, semiring.times(rnew, arc.getWeight()));
      if (dnext != dnextnew) {
        d[arc.getNextState().getId()] = dnextnew;
        r[arc.getNextState().getId()] = semiring.plus(r[arc.getNextState().getId()], semiring.times(rnew, arc.getWeight()));
        int nextStateId = nextState.getId();
        if (!enqueuedStateIds.contains(nextStateId)) {
          queue.addLast(nextStateId);
          enqueuedStateIds.add(nextStateId);
          stateMap.put(nextStateId, nextState);
        }
      }
    }
  }
  return d;
}

Source File: AbstractSymbolTable.java From jopenfst with MIT License

4 votes

protected AbstractSymbolTable() {
  this.symbolToId = new ObjectIntOpenHashMap<>();
  this.idToSymbol = new IntObjectOpenHashMap<>();
}

Source File: WindowSupportingLuceneCorpusAdapter.java From Palmetto with GNU Affero General Public License v3.0

4 votes

protected void requestDocumentsWithWord(String word, IntObjectOpenHashMap<IntArrayList[]> positionsInDocs,
        IntIntOpenHashMap docLengths, int wordId, int numberOfWords) {
    DocsAndPositionsEnum docPosEnum = null;
    Term term = new Term(fieldName, word);
    int localDocId,
            globalDocId,
            baseDocId;
    IntArrayList positions[];
    try {
        for (int i = 0; i < reader.length; i++) {
            docPosEnum = reader[i].termPositionsEnum(term);
            baseDocId = contexts[i].docBase;
            if (docPosEnum != null) {
                while (docPosEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) {
                    localDocId = docPosEnum.docID();
                    globalDocId = localDocId + baseDocId;
                    // if this is the first word and we found a new document
                    if (!positionsInDocs.containsKey(globalDocId)) {
                        positions = new IntArrayList[numberOfWords];
                        positionsInDocs.put(globalDocId, positions);
                    } else {
                        positions = positionsInDocs.get(globalDocId);
                    }
                    if (positions[wordId] == null) {
                        positions[wordId] = new IntArrayList();
                    }
                    // Go through the positions inside this document
                    for (int p = 0; p < docPosEnum.freq(); ++p) {
                        positions[wordId].add(docPosEnum.nextPosition());
                    }
                    if (!docLengths.containsKey(globalDocId)) {
                        // Get the length of the document
                        docLengths.put(globalDocId, reader[i].document(localDocId).getField(docLengthFieldName)
                                .numericValue().intValue());
                    }
                }
            }
        }
    } catch (IOException e) {
        LOGGER.error("Error while requesting documents for word \"" + word + "\".", e);
    }
}

Source File: PositionStoringLuceneIndexCreatorTest.java From Palmetto with GNU Affero General Public License v3.0

4 votes

@Test
public void test() throws CorruptIndexException, IOException {
    File indexDir = new File(
            FileUtils.getTempDirectoryPath() + File.separator + "temp_index" + Long.toString(System.nanoTime()));
    Assert.assertTrue(indexDir.mkdir());
    Iterator<IndexableDocument> docIterator = Arrays.asList(DOCUMENTS).iterator();
    // create the index
    PositionStoringLuceneIndexCreator creator = new PositionStoringLuceneIndexCreator(
            Palmetto.DEFAULT_TEXT_INDEX_FIELD_NAME, Palmetto.DEFAULT_DOCUMENT_LENGTH_INDEX_FIELD_NAME);
    Assert.assertTrue(creator.createIndex(indexDir, docIterator));
    LuceneIndexHistogramCreator hCreator = new LuceneIndexHistogramCreator(
            Palmetto.DEFAULT_DOCUMENT_LENGTH_INDEX_FIELD_NAME);
    hCreator.createLuceneIndexHistogram(indexDir.getAbsolutePath());

    // test the created index
    // create an adapter
    WindowSupportingLuceneCorpusAdapter adapter = null;
    try {
        adapter = WindowSupportingLuceneCorpusAdapter.create(indexDir.getAbsolutePath(),
                Palmetto.DEFAULT_TEXT_INDEX_FIELD_NAME, Palmetto.DEFAULT_DOCUMENT_LENGTH_INDEX_FIELD_NAME);
        // query the test words
        IntIntOpenHashMap docLengths = new IntIntOpenHashMap();
        IntObjectOpenHashMap<IntArrayList[]> wordPositions = adapter.requestWordPositionsInDocuments(TEST_WORDS,
                docLengths);
        // compare the result with the expected counts
        int positionInDoc;
        IntArrayList[] positionsInDocs;
        for (int i = 0; i < EXPECTED_WORD_POSITIONS.length; ++i) {
            positionsInDocs = wordPositions.get(i);
            for (int j = 0; j < positionsInDocs.length; ++j) {
                if (EXPECTED_WORD_POSITIONS[i][j] < 0) {
                    Assert.assertNull("Expected null because the word \"" + TEST_WORDS[j]
                            + "\" shouldn't be found inside document " + i + ". But got a position list instead.",
                            positionsInDocs[j]);
                } else {
                    Assert.assertEquals(1, positionsInDocs[j].elementsCount);
                    positionInDoc = positionsInDocs[j].buffer[0];
                    Assert.assertEquals("Expected the word \"" + TEST_WORDS[j] + "\" in document " + i
                            + " at position " + EXPECTED_WORD_POSITIONS[i][j] + " but got position " + positionInDoc
                            + " form the index.", EXPECTED_WORD_POSITIONS[i][j], positionInDoc);
                }
            }
        }

        // test the window based counting
        BooleanSlidingWindowFrequencyDeterminer determiner = new BooleanSlidingWindowFrequencyDeterminer(adapter,
                WINDOW_SIZE);
        CountedSubsets subsets = determiner.determineCounts(new String[][] { TEST_WORDS },
                new SegmentationDefinition[] { new SegmentationDefinition(new int[0], new int[0][0], null) })[0];
        Assert.assertArrayEquals(EXPECTED_COUNTS, subsets.counts);
    } finally {
        if (adapter != null) {
            adapter.close();
        }
    }
}

Source File: BooleanSlidingWindowFrequencyDeterminerSumCreationTest.java From Palmetto with GNU Affero General Public License v3.0

4 votes

@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    return null;
}

Source File: WindowSupportingAdapter.java From Palmetto with GNU Affero General Public License v3.0

2 votes

/**
 * Returns the positions of the given words inside the corpus.
 * 
 * @param words
 *            the words for which the positions inside the documents should
 *            be determined
 * @param docLengths
 *            empty int int map in which the document lengths and counts are
 *            inserted
 * @return the positions of the given words inside the corpus
 */
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String words[],
        IntIntOpenHashMap docLengths);

com.carrotsearch.hppc.IntObjectOpenHashMap Java Examples