com.carrotsearch.hppc.ObjectObjectOpenHashMap Java Exaples

Source File: BitSetBasedBooleanDocumentFrequencyDeterminer.java From Palmetto with GNU Affero General Public License v3.0

6 votes

public CountedSubsets[] determineCounts(String[][] wordsets,
        SegmentationDefinition[] definitions) {
    ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping = new ObjectObjectOpenHashMap<String, IntOpenHashSet>();
    for (int i = 0; i < wordsets.length; ++i) {
        for (int j = 0; j < wordsets[i].length; ++j) {
            if (!wordDocMapping.containsKey(wordsets[i][j])) {
                wordDocMapping.put(wordsets[i][j], new IntOpenHashSet());
            }
        }
    }

    corpusAdapter.getDocumentsWithWordsAsSet(wordDocMapping);

    CountedSubsets countedSubsets[] = new CountedSubsets[definitions.length];
    for (int i = 0; i < definitions.length; ++i) {
        /*countedSubsets[i] = new CountedSubsets(definitions[i].segments,
                definitions[i].conditions, createCounts(
                        createBitSets(wordDocMapping, wordsets[i]),
                        definitions[i].neededCounts));*/
        countedSubsets[i] = new CountedSubsets(definitions[i].segments,
                definitions[i].conditions, createCounts(
                createBitSets(wordDocMapping, wordsets[i])));
    }
    return countedSubsets;
}

Source File: ListBasedBooleanDocumentFrequencyDeterminer.java From Palmetto with GNU Affero General Public License v3.0

6 votes

public CountedSubsets[] determineCounts(String[][] wordsets,
        SegmentationDefinition[] definitions) {
    ObjectObjectOpenHashMap<String, IntArrayList> wordDocMapping = new ObjectObjectOpenHashMap<String, IntArrayList>();
    for (int i = 0; i < wordsets.length; ++i) {
        for (int j = 0; j < wordsets[i].length; ++j) {
            if (!wordDocMapping.containsKey(wordsets[i][j])) {
                wordDocMapping.put(wordsets[i][j], new IntArrayList());
            }
        }
    }

    corpusAdapter.getDocumentsWithWords(wordDocMapping);

    CountedSubsets countedSubsets[] = new CountedSubsets[definitions.length];
    int counts[];
    for (int i = 0; i < definitions.length; ++i) {
        counts = createCounts(wordDocMapping, wordsets[i]);
        addCountsOfSubsets(counts);
        countedSubsets[i] = new CountedSubsets(definitions[i].segments,
                definitions[i].conditions, counts);
    }
    return countedSubsets;
}

Source File: SpotlightClient.java From gerbil with GNU Affero General Public License v3.0

5 votes

public SpotlightClient(String serviceURL, SpotlightAnnotator annotator) {
    this.serviceURL = serviceURL.endsWith("/") ? serviceURL : (serviceURL + "/");
    this.annotator = annotator;
    typePrefixToUriMapping = new ObjectObjectOpenHashMap<String, String>();
    for (int i = 0; i < TYPE_PREFIX_URI_MAPPING.length; ++i) {
        typePrefixToUriMapping.put(TYPE_PREFIX_URI_MAPPING[i][0], TYPE_PREFIX_URI_MAPPING[i][1]);
    }
}

Source File: LuceneCorpusAdapter.java From Palmetto with GNU Affero General Public License v3.0

5 votes

public void getDocumentsWithWordsAsSet(ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping) {
    Object keys[] = (Object[]) wordDocMapping.keys;
    Object values[] = (Object[]) wordDocMapping.values;
    for (int i = 0; i < wordDocMapping.allocated.length; ++i) {
        if (wordDocMapping.allocated[i]) {
            getDocumentsWithWordAsSet((String) keys[i], (IntOpenHashSet) values[i]);
        }
    }
}

Source File: LuceneCorpusAdapter.java From Palmetto with GNU Affero General Public License v3.0

5 votes

@Override
public void getDocumentsWithWords(ObjectObjectOpenHashMap<String, IntArrayList> wordDocMapping) {
    Object keys[] = (Object[]) wordDocMapping.keys;
    Object values[] = (Object[]) wordDocMapping.values;
    for (int i = 0; i < wordDocMapping.allocated.length; ++i) {
        if (wordDocMapping.allocated[i]) {
            getDocumentsWithWord((String) keys[i], (IntArrayList) values[i]);
        }
    }
}

Source File: BitSetBasedBooleanDocumentFrequencyDeterminer.java From Palmetto with GNU Affero General Public License v3.0

5 votes

private BitSet[] createBitSets(
        ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping,
        String[] wordset) {
    IntOpenHashSet hashSets[] = new IntOpenHashSet[wordset.length];
    IntOpenHashSet mergedHashSet = new IntOpenHashSet();
    for (int i = 0; i < hashSets.length; ++i) {
        hashSets[i] = wordDocMapping.get(wordset[i]);
        mergedHashSet.addAll(hashSets[i]);
    }
    return createBitSets(hashSets, mergedHashSet);
}

Source File: ListBasedBooleanDocumentFrequencyDeterminer.java From Palmetto with GNU Affero General Public License v3.0

5 votes

private int[] createCounts(ObjectObjectOpenHashMap<String, IntArrayList> wordDocMapping, String[] wordset) {
    int counts[] = new int[(1 << wordset.length)];
    IntArrayList wordDocuments[] = new IntArrayList[wordset.length];
    for (int i = 0; i < wordDocuments.length; ++i) {
        wordDocuments[i] = wordDocMapping.get(wordset[i]);
        Arrays.sort(wordDocuments[i].buffer, 0, wordDocuments[i].elementsCount);
    }

    int posInList[] = new int[wordDocuments.length];
    int nextDocId;
    int documentSignature = 0;
    counts[0] = -1;
    do {
        ++counts[documentSignature];
        nextDocId = Integer.MAX_VALUE;
        for (int i = 0; i < wordDocuments.length; ++i) {
            if ((posInList[i] < wordDocuments[i].elementsCount)
                    && (wordDocuments[i].buffer[posInList[i]] <= nextDocId)) {
                if (wordDocuments[i].buffer[posInList[i]] < nextDocId) {
                    nextDocId = wordDocuments[i].buffer[posInList[i]];
                    documentSignature = 0;
                }
                documentSignature |= 1 << i;
            }
        }
        for (int i = 0; i < posInList.length; ++i) {
            if ((documentSignature & (1 << i)) > 0) {
                ++posInList[i];
            }
        }
    } while (nextDocId != Integer.MAX_VALUE);
    return counts;
}

Source File: SimpleLuceneIndexCreatorTest.java From Palmetto with GNU Affero General Public License v3.0

5 votes

@Test
public void test() throws CorruptIndexException, IOException {
    File indexDir = createTempDirectory();
    Iterator<String> docIterator = Arrays.asList(DOCUMENTS).iterator();
    // create the index
    SimpleLuceneIndexCreator creator = new SimpleLuceneIndexCreator(Palmetto.DEFAULT_TEXT_INDEX_FIELD_NAME);
    Assert.assertTrue(creator.createIndex(indexDir, docIterator));

    // test the created index
    // create an adapter
    LuceneCorpusAdapter adapter = LuceneCorpusAdapter.create(indexDir.getAbsolutePath(),
            Palmetto.DEFAULT_TEXT_INDEX_FIELD_NAME);
    // query the test words
    ObjectObjectOpenHashMap<String, IntArrayList> wordDocMapping = new ObjectObjectOpenHashMap<String, IntArrayList>();
    for (int i = 0; i < TEST_WORDS.length; ++i) {
        wordDocMapping.put(TEST_WORDS[i], new IntArrayList());
    }
    adapter.getDocumentsWithWords(wordDocMapping);
    // compare the result with the expected counts
    int retrievedDocs;
    for (int i = 0; i < TEST_WORDS.length; ++i) {
        retrievedDocs = wordDocMapping.get(TEST_WORDS[i]).elementsCount;
        Assert.assertEquals("Expected " + EXPECTED_DOC_COUNTS[i] + " documents containing the word \""
                + TEST_WORDS[i] + "\", but got " + retrievedDocs + " documents form the index.",
                EXPECTED_DOC_COUNTS[i], retrievedDocs);
    }
}

Source File: AbstractBooleanDocumentSupportingAdapterBasedTest.java From Palmetto with GNU Affero General Public License v3.0

5 votes

@Override
public void getDocumentsWithWordsAsSet(ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping) {
    Object keys[] = (Object[]) wordDocMapping.keys;
    Object values[] = (Object[]) wordDocMapping.values;
    for (int i = 0; i < wordDocMapping.allocated.length; ++i) {
        if (wordDocMapping.allocated[i]) {
            ((IntOpenHashSet) values[i]).add(wordDocuments[Integer.parseInt((String) keys[i])]);
        }
    }
}

Source File: AbstractBooleanDocumentSupportingAdapterBasedTest.java From Palmetto with GNU Affero General Public License v3.0

5 votes

@Override
public void getDocumentsWithWords(ObjectObjectOpenHashMap<String, IntArrayList> wordDocMapping) {
    Object keys[] = (Object[]) wordDocMapping.keys;
    Object values[] = (Object[]) wordDocMapping.values;
    for (int i = 0; i < wordDocMapping.allocated.length; ++i) {
        if (wordDocMapping.allocated[i]) {
            ((IntArrayList) values[i]).add(wordDocuments[Integer.parseInt((String) keys[i])]);
        }
    }
}

Source File: BooleanDocumentSupportingAdapter.java From Palmetto with GNU Affero General Public License v3.0

2 votes

/**
 * Determines the documents containing the words used as key in the given
 * map. The resulting sets contain the ids of the documents and are inserted
 * into the map.
 * 
 * @param wordDocMapping
 *            a mapping of words to documents in which the results are
 *            stored
 */
public void getDocumentsWithWordsAsSet(ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping);

Source File: BooleanDocumentSupportingAdapter.java From Palmetto with GNU Affero General Public License v3.0

2 votes

/**
 * Determines the documents containing the words used as key in the given
 * map. The resulting int arrays contain the ids of the documents and are
 * inserted into the map.
 * 
 * @param wordDocMapping
 *            a mapping of words to documents in which the results are
 *            stored
 */
public void getDocumentsWithWords(ObjectObjectOpenHashMap<String, IntArrayList> wordDocMapping);

com.carrotsearch.hppc.ObjectObjectOpenHashMap Java Examples