it.unimi.dsi.fastutil.ints.IntOpenHashSet Java Examples
The following examples show how to use
it.unimi.dsi.fastutil.ints.IntOpenHashSet.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SlimUDTF.java From incubator-hivemall with Apache License 2.0 | 6 votes |
private void replayTrain(@Nonnull final ByteBuffer buf) { final int itemI = buf.getInt(); final int knnSize = buf.getInt(); final Int2ObjectMap<Int2FloatMap> knnItems = new Int2ObjectOpenHashMap<>(1024); final IntSet pairItems = new IntOpenHashSet(); for (int i = 0; i < knnSize; i++) { int user = buf.getInt(); int ruSize = buf.getInt(); Int2FloatMap ru = new Int2FloatOpenHashMap(ruSize); ru.defaultReturnValue(0.f); for (int j = 0; j < ruSize; j++) { int itemK = buf.getInt(); pairItems.add(itemK); float ruk = buf.getFloat(); ru.put(itemK, ruk); } knnItems.put(user, ru); } for (int itemJ : pairItems) { train(itemI, knnItems, itemJ); } }
Example #2
Source File: CallGraphGenerator.java From fasten with Apache License 2.0 | 6 votes |
/** Generate a random DAG using preferential attachment. First an independent set of <code>n0</code> nodes is generated. * Then <code>n-n0</code> more nodes are generated: for each node, the outdegree is determined using <code>outdegreeDistribution.nextInt()</code> * minimized with the number of existing nodes. For each arc, the target is the existing node <code>i</code> with probability proportional to * <code>k+1</code> where <code>k</code> is <code>i</code>'s current outdegree. * * @param n number of nodes. * @param n0 number of initial nodes. * @param outdegreeDistribution distribution from which outdegrees are sampled. * @param random generator used to produce the arcs. * @return the generated DAG. */ public static ArrayListMutableGraph preferentialAttachmentDAG(final int n, final int n0, final IntegerDistribution outdegreeDistribution, final RandomGenerator random) { final ArrayListMutableGraph g = new ArrayListMutableGraph(n); final FenwickTree ft = new FenwickTree(n); // Initial independent set for (int source = 0; source < n0; source++) ft.incrementCount(source + 1); // Rest of the graph final IntOpenHashSet s = new IntOpenHashSet(); for (int source = n0; source < n; source++) { final int m = Math.min(outdegreeDistribution.sample(), source - 1); // Outdegree s.clear(); while(s.size() < m) { final int t = ft.sample(random); if (s.add(t)) { ft.incrementCount(t); g.addArc(source, t - 1); } } ft.incrementCount(source + 1); } return g; }
Example #3
Source File: ColumnIndexBuilder.java From parquet-mr with Apache License 2.0 | 6 votes |
@Override public <T extends Comparable<T>> PrimitiveIterator.OfInt visit(NotEq<T> notEq) { T value = notEq.getValue(); if (value == null) { return IndexIterator.filter(getPageCount(), pageIndex -> !nullPages[pageIndex]); } if (nullCounts == null) { // Nulls match so if we don't have null related statistics we have to return all pages return IndexIterator.all(getPageCount()); } // Merging value filtering with pages containing nulls IntSet matchingIndexes = new IntOpenHashSet(); getBoundaryOrder().notEq(createValueComparator(value)) .forEachRemaining((int index) -> matchingIndexes.add(index)); return IndexIterator.filter(getPageCount(), pageIndex -> nullCounts[pageIndex] > 0 || matchingIndexes.contains(pageIndex)); }
Example #4
Source File: ExpReplay.java From deeplearning4j with Apache License 2.0 | 6 votes |
public ArrayList<Transition<A>> getBatch(int size) { ArrayList<Transition<A>> batch = new ArrayList<>(size); int storageSize = storage.size(); int actualBatchSize = Math.min(storageSize, size); int[] actualIndex = new int[actualBatchSize]; IntSet set = new IntOpenHashSet(); for( int i=0; i<actualBatchSize; i++ ){ int next = rnd.nextInt(storageSize); while(set.contains(next)){ next = rnd.nextInt(storageSize); } set.add(next); actualIndex[i] = next; } for (int i = 0; i < actualBatchSize; i ++) { Transition<A> trans = storage.get(actualIndex[i]); batch.add(trans.dup()); } return batch; }
Example #5
Source File: IntDictionaryMap.java From tablesaw with Apache License 2.0 | 6 votes |
@Override public Selection selectIsIn(String... strings) { IntOpenHashSet keys = new IntOpenHashSet(strings.length); for (String string : strings) { int key = getKeyForValue(string); if (key != DEFAULT_RETURN_VALUE) { keys.add(key); } } Selection results = new BitmapBackedSelection(); for (int i = 0; i < values.size(); i++) { if (keys.contains(values.getInt(i))) { results.add(i); } } return results; }
Example #6
Source File: PositionListIndex.java From metanome-algorithms with Apache License 2.0 | 6 votes |
@Override public int hashCode() { final int prime = 31; int result = 1; List<IntOpenHashSet> setCluster = this.convertClustersToSets(this.clusters); Collections.sort(setCluster, new Comparator<IntSet>() { @Override public int compare(IntSet o1, IntSet o2) { return o1.hashCode() - o2.hashCode(); } }); result = prime * result + (setCluster.hashCode()); return result; }
Example #7
Source File: NotInPredicateEvaluatorFactory.java From incubator-pinot with Apache License 2.0 | 6 votes |
DictionaryBasedNotInPredicateEvaluator(NotInPredicate notInPredicate, Dictionary dictionary) { List<String> values = notInPredicate.getValues(); _nonMatchingDictIdSet = new IntOpenHashSet(HashUtil.getMinHashSetSize(values.size())); for (String value : values) { int dictId = dictionary.indexOf(value); if (dictId >= 0) { _nonMatchingDictIdSet.add(dictId); } } _numNonMatchingDictIds = _nonMatchingDictIdSet.size(); if (_numNonMatchingDictIds == 0) { _alwaysTrue = true; } else if (dictionary.length() == _numNonMatchingDictIds) { _alwaysFalse = true; } _dictionary = dictionary; }
Example #8
Source File: IntervalTest.java From database with GNU General Public License v2.0 | 6 votes |
public void testSubsets() { for( int i = 0; i < 10; i++ ) for( int j = i - 1; j < 10; j++ ) { Interval interval = j < i ? EMPTY_INTERVAL : Interval.valueOf( i, j ); IntSortedSet set = toSortedSet( interval ); assertEquals( set, interval ); assertTrue( Arrays.equals( IntIterators.unwrap( set.iterator() ), IntIterators.unwrap( set.iterator() ) ) ); assertEquals( new IntOpenHashSet( set ), interval ); for( int k = j - 1; k <= i + 1; k++ ) { assertTrue( Arrays.equals( IntIterators.unwrap( set.iterator( k ) ), IntIterators.unwrap( set.iterator( k ) ) ) ); assertEquals( set.headSet( k ), interval.headSet( k ) ); assertEquals( set.tailSet( k ), interval.tailSet( k ) ); for( int l = k; l <= i + 1; l++ ) assertEquals( set.subSet( k, l ), interval.subSet( k, l ) ); } } }
Example #9
Source File: ThresholdMapFlattenerTest.java From metanome-algorithms with Apache License 2.0 | 6 votes |
@Test public void test() { Collection<To> row1 = Arrays.asList( To.builder().similarity(0.5).records(new IntOpenHashSet(Arrays.asList(Integer.valueOf(1), Integer.valueOf(2)))).build(), To.builder().similarity(0.6).records(IntSets.singleton(3)).build()); Collection<To> row2 = Collections.singletonList( To.builder().similarity(0.4).records(IntSets.singleton(1)).build()); ThresholdMapFlattener flattener = createFlattener(3); ThresholdMap map = CollectingThresholdMap.builder() .add(1, row1) .add(2, row2) .build(flattener); assertThat(map.greaterOrEqual(1, 0.5)).hasSize(3); assertThat(map.greaterOrEqual(1, 0.5)).contains(Integer.valueOf(1), Integer.valueOf(2), Integer.valueOf(3)); assertThat(map.greaterOrEqual(1, 0.6)).hasSize(1); assertThat(map.greaterOrEqual(1, 0.6)).contains(Integer.valueOf(3)); assertThat(map.greaterOrEqual(1, 0.7)).isEmpty(); assertThat(map.greaterOrEqual(2, 0.4)).hasSize(1); assertThat(map.greaterOrEqual(2, 0.4)).contains(Integer.valueOf(1)); }
Example #10
Source File: CollectingThresholdMapTest.java From metanome-algorithms with Apache License 2.0 | 6 votes |
@Test public void test() { Collection<To> row1 = Arrays.asList( To.builder().similarity(0.5).records(new IntOpenHashSet(Arrays.asList(Integer.valueOf(1), Integer.valueOf(2)))).build(), To.builder().similarity(0.6).records(IntSets.singleton(3)).build()); Collection<To> row2 = Collections.singletonList( To.builder().similarity(0.4).records(IntSets.singleton(1)).build()); ThresholdMap map = CollectingThresholdMap.builder() .add(1, row1) .add(2, row2) .build(); assertThat(map.greaterOrEqual(1, 0.5)).hasSize(3); assertThat(map.greaterOrEqual(1, 0.5)).contains(Integer.valueOf(1), Integer.valueOf(2), Integer.valueOf(3)); assertThat(map.greaterOrEqual(1, 0.6)).hasSize(1); assertThat(map.greaterOrEqual(1, 0.6)).contains(Integer.valueOf(3)); assertThat(map.greaterOrEqual(1, 0.7)).isEmpty(); assertThat(map.greaterOrEqual(2, 0.4)).hasSize(1); assertThat(map.greaterOrEqual(2, 0.4)).contains(Integer.valueOf(1)); }
Example #11
Source File: InPredicateEvaluatorFactory.java From incubator-pinot with Apache License 2.0 | 6 votes |
DictionaryBasedInPredicateEvaluator(InPredicate inPredicate, Dictionary dictionary) { List<String> values = inPredicate.getValues(); _matchingDictIdSet = new IntOpenHashSet(HashUtil.getMinHashSetSize(values.size())); for (String value : values) { int dictId = dictionary.indexOf(value); if (dictId >= 0) { _matchingDictIdSet.add(dictId); } } _numMatchingDictIds = _matchingDictIdSet.size(); if (_numMatchingDictIds == 0) { _alwaysFalse = true; } else if (dictionary.length() == _numMatchingDictIds) { _alwaysTrue = true; } }
Example #12
Source File: OneClassPreferenceFMData.java From RankSys with Mozilla Public License 2.0 | 6 votes |
@Override public Stream<? extends FMInstance> stream() { return uidxs.stream() .flatMap(uidx -> { IntSet uidxIidxs = new IntOpenHashSet(); prefs.getUidxIidxs(uidx).forEachRemaining(uidxIidxs::add); List<FMInstance> instances = new ArrayList<>(); // adding positive examples uidxIidxs .forEach(iidx -> instances.add(getInstance(uidx, iidx, 1.0))); // adding negative examples rnd.ints(iidxs.size(), 0, iidxs.size()).map(iidxs::getInt) .filter(jidx -> !uidxIidxs.contains(jidx)) .distinct() .limit((int) (negativeProp * uidxIidxs.size())) .forEach(jidx -> instances.add(getInstance(uidx, jidx, 0.0))); Collections.shuffle(instances); return instances.stream(); }); }
Example #13
Source File: NbestListUtils.java From phrasal with GNU General Public License v3.0 | 6 votes |
/** * Baseline implementation. Augments the "standard" list with alternatives. * * @param l1 * @param l2 * @return */ public static <TK,FV> List<RichTranslation<TK,FV>> mergeAndDedup(List<RichTranslation<TK,FV>> standard, List<RichTranslation<TK,FV>> alt, int maxAltItems) { IntSet hashCodeSet = new IntOpenHashSet(standard.size()); for (RichTranslation<TK,FV> s : standard) { hashCodeSet.add(derivationHashCode(s.getFeaturizable().derivation)); } List<RichTranslation<TK,FV>> returnList = new ArrayList<>(standard); for (int i = 0, sz = Math.min(maxAltItems, alt.size()); i < sz; ++i) { RichTranslation<TK,FV> t = alt.get(i); int hashCode = derivationHashCode(t.getFeaturizable().derivation); if (! hashCodeSet.contains(hashCode)) returnList.add(t); } Collections.sort(returnList); return returnList; }
Example #14
Source File: GraphFunctions.java From data-polygamy with BSD 3-Clause "New" or "Revised" License | 6 votes |
public GraphFunctions(int[][] edges2D, int noNodes) { try { nv = noNodes; nodes = new IntOpenHashSet[nv]; for(int i = 0;i < nv;i ++) { nodes[i] = new IntOpenHashSet(); } for(int i = 0; i < edges2D.length; i++) { int v1 = edges2D[i][0]; int v2 = edges2D[i][1]; nodes[v1].add(v2); nodes[v2].add(v1); } } catch(Exception e) { e.printStackTrace(); } }
Example #15
Source File: IntDictionaryMap.java From tablesaw with Apache License 2.0 | 6 votes |
@Override public Selection selectIsIn(Collection<String> strings) { IntOpenHashSet keys = new IntOpenHashSet(strings.size()); for (String string : strings) { int key = getKeyForValue(string); if (key != DEFAULT_RETURN_VALUE) { keys.add(key); } } Selection results = new BitmapBackedSelection(); for (int i = 0; i < values.size(); i++) { if (keys.contains(values.getInt(i))) { results.add(i); } } return results; }
Example #16
Source File: ObjectSerDeUtilsTest.java From incubator-pinot with Apache License 2.0 | 6 votes |
@Test public void testIntSet() { for (int i = 0; i < NUM_ITERATIONS; i++) { int size = RANDOM.nextInt(100); IntSet expected = new IntOpenHashSet(size); for (int j = 0; j < size; j++) { expected.add(RANDOM.nextInt()); } byte[] bytes = ObjectSerDeUtils.serialize(expected); IntSet actual = ObjectSerDeUtils.deserialize(bytes, ObjectSerDeUtils.ObjectType.IntSet); // NOTE: use Object comparison instead of Collection comparison because the order might be different assertEquals((Object) actual, expected, ERROR_MESSAGE); } }
Example #17
Source File: PositionListIndex.java From metanome-algorithms with Apache License 2.0 | 6 votes |
@Override public int hashCode() { final int prime = 31; int result = 1; List<IntOpenHashSet> setCluster = this.convertClustersToSets(this.clusters); Collections.sort(setCluster, new Comparator<IntSet>() { @Override public int compare(IntSet o1, IntSet o2) { return o1.hashCode() - o2.hashCode(); } }); result = prime * result + (setCluster.hashCode()); return result; }
Example #18
Source File: PositionListIndex.java From metanome-algorithms with Apache License 2.0 | 6 votes |
@Override public int hashCode() { final int prime = 31; int result = 1; List<IntOpenHashSet> setCluster = this.convertClustersToSets(this.clusters); Collections.sort(setCluster, new Comparator<IntSet>() { @Override public int compare(IntSet o1, IntSet o2) { return o1.hashCode() - o2.hashCode(); } }); result = prime * result + (setCluster.hashCode()); return result; }
Example #19
Source File: IntDictionaryMap.java From tablesaw with Apache License 2.0 | 6 votes |
@Override public Selection selectIsIn(String... strings) { IntOpenHashSet keys = new IntOpenHashSet(strings.length); for (String string : strings) { int key = getKeyForValue(string); if (key != DEFAULT_RETURN_VALUE) { keys.add(key); } } Selection results = new BitmapBackedSelection(); for (int i = 0; i < values.size(); i++) { if (keys.contains(values.getInt(i))) { results.add(i); } } return results; }
Example #20
Source File: IntColumn.java From tablesaw with Apache License 2.0 | 5 votes |
@Override public int countUnique() { IntSet uniqueElements = new IntOpenHashSet(); for (int i = 0; i < size(); i++) { uniqueElements.add(getInt(i)); } return uniqueElements.size(); }
Example #21
Source File: FastFilters.java From RankSys with Mozilla Public License 2.0 | 5 votes |
/** * Item filter that discards items in the training preference data. * * @param <U> type of the users * @param <I> type of the items * @param trainData preference data * @return item filters for each using returning true if the * user-item pair was not observed in the preference data */ public static <U, I> Function<U, IntPredicate> notInTrain(FastPreferenceData<U, I> trainData) { return user -> { IntSet set = new IntOpenHashSet(); trainData.getUidxPreferences(trainData.user2uidx(user)) .mapToInt(IdxPref::v1) .forEach(set::add); return iidx -> !set.contains(iidx); }; }
Example #22
Source File: IntColumn.java From tablesaw with Apache License 2.0 | 5 votes |
@Override public IntColumn unique() { final IntSet values = new IntOpenHashSet(); for (int i = 0; i < size(); i++) { values.add(getInt(i)); } final IntColumn column = IntColumn.create(name() + " Unique values"); for (int value : values) { column.append(value); } return column; }
Example #23
Source File: DatasetWikiIdExporter.java From gerbil with GNU Affero General Public License v3.0 | 5 votes |
private IntOpenHashSet analyzeAsD2W(DatasetConfiguration config) throws GerbilException { C2WDataset dataset = (C2WDataset) config.getDataset(ExperimentType.C2KB); if (dataset == null) { return null; } List<HashSet<Tag>> goldStandard = dataset.getC2WGoldStandardList(); IntOpenHashSet ids = new IntOpenHashSet(); for (HashSet<Tag> tags : goldStandard) { for (Tag tag : tags) { ids.add(tag.getConcept()); } } return ids; }
Example #24
Source File: NotInPredicateEvaluatorFactory.java From incubator-pinot with Apache License 2.0 | 5 votes |
IntRawValueBasedNotInPredicateEvaluator(NotInPredicate notInPredicate) { List<String> values = notInPredicate.getValues(); _nonMatchingValues = new IntOpenHashSet(HashUtil.getMinHashSetSize(values.size())); for (String value : values) { _nonMatchingValues.add(Integer.parseInt(value)); } }
Example #25
Source File: AccessTrace.java From cache2k-benchmark with Apache License 2.0 | 5 votes |
private void initStatistics() { IntSet _values = new IntOpenHashSet(); for (int v : getArray()) { _values.add(v); if (v < lowValue) { lowValue = v; } if (v > highValue) { highValue = v; } } valueCount = _values.size(); }
Example #26
Source File: TimeColumn.java From tablesaw with Apache License 2.0 | 5 votes |
@Override public TimeColumn unique() { IntSet ints = new IntOpenHashSet(data); TimeColumn column = emptyCopy(ints.size()); column.data = IntArrayList.wrap(ints.toIntArray()); column.setName(name() + " Unique values"); return column; }
Example #27
Source File: DistinctCountAggregationFunction.java From incubator-pinot with Apache License 2.0 | 5 votes |
@Override public IntOpenHashSet extractAggregationResult(AggregationResultHolder aggregationResultHolder) { IntOpenHashSet valueSet = aggregationResultHolder.getResult(); if (valueSet == null) { return new IntOpenHashSet(); } else { return valueSet; } }
Example #28
Source File: IgnoreWIDs.java From tagme with Apache License 2.0 | 5 votes |
@Override protected IntSet parseSet() throws IOException { log.info("Loading data..."); Object2IntMap<String> titles = new TitlesToWIDMap(lang).getDataset(); IntOpenHashSet ids = new IntOpenHashSet(titles.size()); Pattern p_date = WikiPatterns.getPattern(lang, Type.PAGE_DATE); Pattern p_other = WikiPatterns.getPattern(lang, Type.PAGE_IGNORE); PLogger plog = new PLogger(log,"titles","dates","others").setEnd(0, titles.size()).start("Parsing ignore-pages..."); for(String title : titles.keySet()) { plog.update(0); if (p_date.matcher(title).find()) { plog.update(1); ids.add(titles.get(title)); } else if (p_other.matcher(title).find()) { plog.update(2); ids.add(titles.get(title)); } } plog.stop(); ids.trim(); return ids; }
Example #29
Source File: PageToCategoryIDs.java From tagme with Apache License 2.0 | 5 votes |
@Override protected int[][] parseSet() throws IOException { final Int2ObjectMap<IntSet> map = new Int2ObjectOpenHashMap<IntSet>(3000000); final IntSet hidden= DatasetLoader.get(new HiddenCategoriesWIDs(lang)); File input = WikipediaFiles.CAT_LINKS.getSourceFile(lang); final Object2IntMap<String> categories=DatasetLoader.get(new CategoriesToWIDMap(lang)); SQLWikiParser parser = new SQLWikiParser(log) { @Override public boolean compute(ArrayList<String> values) throws IOException { String c_title=cleanPageName(values.get(SQLWikiParser.CATLINKS_TITLE_TO)); int id=Integer.parseInt(values.get(SQLWikiParser.CATLINKS_ID_FROM)); if(categories.containsKey(c_title) && !hidden.contains(categories.get(c_title).intValue())){ if(map.containsKey(id)){ map.get(id).add(categories.get(c_title).intValue()); }else{ IntSet set = new IntOpenHashSet(); set.add(categories.get(c_title).intValue()); map.put(id, set); } return true; } else return false; } }; InputStreamReader reader = new InputStreamReader(new FileInputStream(input), Charset.forName("UTF-8")); parser.compute(reader); reader.close(); return createDump(map); }
Example #30
Source File: DatasetWikiIdExporter.java From gerbil with GNU Affero General Public License v3.0 | 5 votes |
public void analyzeDataset(DatasetConfiguration config, PrintStream output) throws GerbilException { IntOpenHashSet ids = analyzeAsD2W(config); if (ids == null) { ids = analyzeAsC2W(config); } printIds(ids, output); }