com.carrotsearch.hppc.ObjectObjectOpenHashMap Java Examples
The following examples show how to use
com.carrotsearch.hppc.ObjectObjectOpenHashMap.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BitSetBasedBooleanDocumentFrequencyDeterminer.java From Palmetto with GNU Affero General Public License v3.0 | 6 votes |
public CountedSubsets[] determineCounts(String[][] wordsets, SegmentationDefinition[] definitions) { ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping = new ObjectObjectOpenHashMap<String, IntOpenHashSet>(); for (int i = 0; i < wordsets.length; ++i) { for (int j = 0; j < wordsets[i].length; ++j) { if (!wordDocMapping.containsKey(wordsets[i][j])) { wordDocMapping.put(wordsets[i][j], new IntOpenHashSet()); } } } corpusAdapter.getDocumentsWithWordsAsSet(wordDocMapping); CountedSubsets countedSubsets[] = new CountedSubsets[definitions.length]; for (int i = 0; i < definitions.length; ++i) { /*countedSubsets[i] = new CountedSubsets(definitions[i].segments, definitions[i].conditions, createCounts( createBitSets(wordDocMapping, wordsets[i]), definitions[i].neededCounts));*/ countedSubsets[i] = new CountedSubsets(definitions[i].segments, definitions[i].conditions, createCounts( createBitSets(wordDocMapping, wordsets[i]))); } return countedSubsets; }
Example #2
Source File: ListBasedBooleanDocumentFrequencyDeterminer.java From Palmetto with GNU Affero General Public License v3.0 | 6 votes |
public CountedSubsets[] determineCounts(String[][] wordsets, SegmentationDefinition[] definitions) { ObjectObjectOpenHashMap<String, IntArrayList> wordDocMapping = new ObjectObjectOpenHashMap<String, IntArrayList>(); for (int i = 0; i < wordsets.length; ++i) { for (int j = 0; j < wordsets[i].length; ++j) { if (!wordDocMapping.containsKey(wordsets[i][j])) { wordDocMapping.put(wordsets[i][j], new IntArrayList()); } } } corpusAdapter.getDocumentsWithWords(wordDocMapping); CountedSubsets countedSubsets[] = new CountedSubsets[definitions.length]; int counts[]; for (int i = 0; i < definitions.length; ++i) { counts = createCounts(wordDocMapping, wordsets[i]); addCountsOfSubsets(counts); countedSubsets[i] = new CountedSubsets(definitions[i].segments, definitions[i].conditions, counts); } return countedSubsets; }
Example #3
Source File: SpotlightClient.java From gerbil with GNU Affero General Public License v3.0 | 5 votes |
public SpotlightClient(String serviceURL, SpotlightAnnotator annotator) { this.serviceURL = serviceURL.endsWith("/") ? serviceURL : (serviceURL + "/"); this.annotator = annotator; typePrefixToUriMapping = new ObjectObjectOpenHashMap<String, String>(); for (int i = 0; i < TYPE_PREFIX_URI_MAPPING.length; ++i) { typePrefixToUriMapping.put(TYPE_PREFIX_URI_MAPPING[i][0], TYPE_PREFIX_URI_MAPPING[i][1]); } }
Example #4
Source File: LuceneCorpusAdapter.java From Palmetto with GNU Affero General Public License v3.0 | 5 votes |
public void getDocumentsWithWordsAsSet(ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping) { Object keys[] = (Object[]) wordDocMapping.keys; Object values[] = (Object[]) wordDocMapping.values; for (int i = 0; i < wordDocMapping.allocated.length; ++i) { if (wordDocMapping.allocated[i]) { getDocumentsWithWordAsSet((String) keys[i], (IntOpenHashSet) values[i]); } } }
Example #5
Source File: LuceneCorpusAdapter.java From Palmetto with GNU Affero General Public License v3.0 | 5 votes |
@Override public void getDocumentsWithWords(ObjectObjectOpenHashMap<String, IntArrayList> wordDocMapping) { Object keys[] = (Object[]) wordDocMapping.keys; Object values[] = (Object[]) wordDocMapping.values; for (int i = 0; i < wordDocMapping.allocated.length; ++i) { if (wordDocMapping.allocated[i]) { getDocumentsWithWord((String) keys[i], (IntArrayList) values[i]); } } }
Example #6
Source File: BitSetBasedBooleanDocumentFrequencyDeterminer.java From Palmetto with GNU Affero General Public License v3.0 | 5 votes |
private BitSet[] createBitSets( ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping, String[] wordset) { IntOpenHashSet hashSets[] = new IntOpenHashSet[wordset.length]; IntOpenHashSet mergedHashSet = new IntOpenHashSet(); for (int i = 0; i < hashSets.length; ++i) { hashSets[i] = wordDocMapping.get(wordset[i]); mergedHashSet.addAll(hashSets[i]); } return createBitSets(hashSets, mergedHashSet); }
Example #7
Source File: ListBasedBooleanDocumentFrequencyDeterminer.java From Palmetto with GNU Affero General Public License v3.0 | 5 votes |
private int[] createCounts(ObjectObjectOpenHashMap<String, IntArrayList> wordDocMapping, String[] wordset) { int counts[] = new int[(1 << wordset.length)]; IntArrayList wordDocuments[] = new IntArrayList[wordset.length]; for (int i = 0; i < wordDocuments.length; ++i) { wordDocuments[i] = wordDocMapping.get(wordset[i]); Arrays.sort(wordDocuments[i].buffer, 0, wordDocuments[i].elementsCount); } int posInList[] = new int[wordDocuments.length]; int nextDocId; int documentSignature = 0; counts[0] = -1; do { ++counts[documentSignature]; nextDocId = Integer.MAX_VALUE; for (int i = 0; i < wordDocuments.length; ++i) { if ((posInList[i] < wordDocuments[i].elementsCount) && (wordDocuments[i].buffer[posInList[i]] <= nextDocId)) { if (wordDocuments[i].buffer[posInList[i]] < nextDocId) { nextDocId = wordDocuments[i].buffer[posInList[i]]; documentSignature = 0; } documentSignature |= 1 << i; } } for (int i = 0; i < posInList.length; ++i) { if ((documentSignature & (1 << i)) > 0) { ++posInList[i]; } } } while (nextDocId != Integer.MAX_VALUE); return counts; }
Example #8
Source File: SimpleLuceneIndexCreatorTest.java From Palmetto with GNU Affero General Public License v3.0 | 5 votes |
@Test public void test() throws CorruptIndexException, IOException { File indexDir = createTempDirectory(); Iterator<String> docIterator = Arrays.asList(DOCUMENTS).iterator(); // create the index SimpleLuceneIndexCreator creator = new SimpleLuceneIndexCreator(Palmetto.DEFAULT_TEXT_INDEX_FIELD_NAME); Assert.assertTrue(creator.createIndex(indexDir, docIterator)); // test the created index // create an adapter LuceneCorpusAdapter adapter = LuceneCorpusAdapter.create(indexDir.getAbsolutePath(), Palmetto.DEFAULT_TEXT_INDEX_FIELD_NAME); // query the test words ObjectObjectOpenHashMap<String, IntArrayList> wordDocMapping = new ObjectObjectOpenHashMap<String, IntArrayList>(); for (int i = 0; i < TEST_WORDS.length; ++i) { wordDocMapping.put(TEST_WORDS[i], new IntArrayList()); } adapter.getDocumentsWithWords(wordDocMapping); // compare the result with the expected counts int retrievedDocs; for (int i = 0; i < TEST_WORDS.length; ++i) { retrievedDocs = wordDocMapping.get(TEST_WORDS[i]).elementsCount; Assert.assertEquals("Expected " + EXPECTED_DOC_COUNTS[i] + " documents containing the word \"" + TEST_WORDS[i] + "\", but got " + retrievedDocs + " documents form the index.", EXPECTED_DOC_COUNTS[i], retrievedDocs); } }
Example #9
Source File: AbstractBooleanDocumentSupportingAdapterBasedTest.java From Palmetto with GNU Affero General Public License v3.0 | 5 votes |
@Override public void getDocumentsWithWordsAsSet(ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping) { Object keys[] = (Object[]) wordDocMapping.keys; Object values[] = (Object[]) wordDocMapping.values; for (int i = 0; i < wordDocMapping.allocated.length; ++i) { if (wordDocMapping.allocated[i]) { ((IntOpenHashSet) values[i]).add(wordDocuments[Integer.parseInt((String) keys[i])]); } } }
Example #10
Source File: AbstractBooleanDocumentSupportingAdapterBasedTest.java From Palmetto with GNU Affero General Public License v3.0 | 5 votes |
@Override public void getDocumentsWithWords(ObjectObjectOpenHashMap<String, IntArrayList> wordDocMapping) { Object keys[] = (Object[]) wordDocMapping.keys; Object values[] = (Object[]) wordDocMapping.values; for (int i = 0; i < wordDocMapping.allocated.length; ++i) { if (wordDocMapping.allocated[i]) { ((IntArrayList) values[i]).add(wordDocuments[Integer.parseInt((String) keys[i])]); } } }
Example #11
Source File: BooleanDocumentSupportingAdapter.java From Palmetto with GNU Affero General Public License v3.0 | 2 votes |
/** * Determines the documents containing the words used as key in the given * map. The resulting sets contain the ids of the documents and are inserted * into the map. * * @param wordDocMapping * a mapping of words to documents in which the results are * stored */ public void getDocumentsWithWordsAsSet(ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping);
Example #12
Source File: BooleanDocumentSupportingAdapter.java From Palmetto with GNU Affero General Public License v3.0 | 2 votes |
/** * Determines the documents containing the words used as key in the given * map. The resulting int arrays contain the ids of the documents and are * inserted into the map. * * @param wordDocMapping * a mapping of words to documents in which the results are * stored */ public void getDocumentsWithWords(ObjectObjectOpenHashMap<String, IntArrayList> wordDocMapping);