it.unimi.dsi.fastutil.ints.IntOpenHashSet Java Exaples

Source File: SlimUDTF.java From incubator-hivemall with Apache License 2.0

6 votes

private void replayTrain(@Nonnull final ByteBuffer buf) {
    final int itemI = buf.getInt();
    final int knnSize = buf.getInt();

    final Int2ObjectMap<Int2FloatMap> knnItems = new Int2ObjectOpenHashMap<>(1024);
    final IntSet pairItems = new IntOpenHashSet();
    for (int i = 0; i < knnSize; i++) {
        int user = buf.getInt();
        int ruSize = buf.getInt();
        Int2FloatMap ru = new Int2FloatOpenHashMap(ruSize);
        ru.defaultReturnValue(0.f);

        for (int j = 0; j < ruSize; j++) {
            int itemK = buf.getInt();
            pairItems.add(itemK);
            float ruk = buf.getFloat();
            ru.put(itemK, ruk);
        }
        knnItems.put(user, ru);
    }

    for (int itemJ : pairItems) {
        train(itemI, knnItems, itemJ);
    }
}

Source File: CallGraphGenerator.java From fasten with Apache License 2.0

6 votes

/** Generate a random DAG using preferential attachment. First an independent set of <code>n0</code> nodes is generated.
 *  Then <code>n-n0</code> more nodes are generated: for each node, the outdegree is determined using <code>outdegreeDistribution.nextInt()</code>
 *  minimized with the number of existing nodes. For each arc, the target is the existing node <code>i</code> with probability proportional to
 *  <code>k+1</code> where <code>k</code> is <code>i</code>'s current outdegree.
 *
 * @param n number of nodes.
 * @param n0 number of initial nodes.
 * @param outdegreeDistribution distribution from which outdegrees are sampled.
 * @param random generator used to produce the arcs.
 * @return the generated DAG.
 */
public static ArrayListMutableGraph preferentialAttachmentDAG(final int n, final int n0, final IntegerDistribution outdegreeDistribution, final RandomGenerator random) {
	final ArrayListMutableGraph g = new ArrayListMutableGraph(n);
	final FenwickTree ft = new FenwickTree(n);
	// Initial independent set
	for (int source = 0; source < n0; source++) ft.incrementCount(source + 1);
	// Rest of the graph
	final IntOpenHashSet s = new IntOpenHashSet();
	for (int source = n0; source < n; source++) {
		final int m = Math.min(outdegreeDistribution.sample(), source - 1); // Outdegree
		s.clear();
		while(s.size() < m) {
			final int t = ft.sample(random);
			if (s.add(t)) {
				ft.incrementCount(t);
				g.addArc(source, t - 1);
			}
		}
		ft.incrementCount(source + 1);
	}
	return g;
}

Source File: ColumnIndexBuilder.java From parquet-mr with Apache License 2.0

6 votes

@Override
public <T extends Comparable<T>> PrimitiveIterator.OfInt visit(NotEq<T> notEq) {
  T value = notEq.getValue();
  if (value == null) {
    return IndexIterator.filter(getPageCount(), pageIndex -> !nullPages[pageIndex]);
  }

  if (nullCounts == null) {
    // Nulls match so if we don't have null related statistics we have to return all pages
    return IndexIterator.all(getPageCount());
  }

  // Merging value filtering with pages containing nulls
  IntSet matchingIndexes = new IntOpenHashSet();
  getBoundaryOrder().notEq(createValueComparator(value))
      .forEachRemaining((int index) -> matchingIndexes.add(index));
  return IndexIterator.filter(getPageCount(),
      pageIndex -> nullCounts[pageIndex] > 0 || matchingIndexes.contains(pageIndex));
}

Source File: ExpReplay.java From deeplearning4j with Apache License 2.0

6 votes

public ArrayList<Transition<A>> getBatch(int size) {
    ArrayList<Transition<A>> batch = new ArrayList<>(size);
    int storageSize = storage.size();
    int actualBatchSize = Math.min(storageSize, size);

    int[] actualIndex = new int[actualBatchSize];
    IntSet set = new IntOpenHashSet();
    for( int i=0; i<actualBatchSize; i++ ){
        int next = rnd.nextInt(storageSize);
        while(set.contains(next)){
            next = rnd.nextInt(storageSize);
        }
        set.add(next);
        actualIndex[i] = next;
    }

    for (int i = 0; i < actualBatchSize; i ++) {
        Transition<A> trans = storage.get(actualIndex[i]);
        batch.add(trans.dup());
    }

    return batch;
}

Source File: IntDictionaryMap.java From tablesaw with Apache License 2.0

6 votes

@Override
public Selection selectIsIn(String... strings) {
  IntOpenHashSet keys = new IntOpenHashSet(strings.length);
  for (String string : strings) {
    int key = getKeyForValue(string);
    if (key != DEFAULT_RETURN_VALUE) {
      keys.add(key);
    }
  }

  Selection results = new BitmapBackedSelection();
  for (int i = 0; i < values.size(); i++) {
    if (keys.contains(values.getInt(i))) {
      results.add(i);
    }
  }
  return results;
}

Source File: PositionListIndex.java From metanome-algorithms with Apache License 2.0

6 votes

@Override
public int hashCode() {
	final int prime = 31;
	int result = 1;

	List<IntOpenHashSet> setCluster = this.convertClustersToSets(this.clusters);

	Collections.sort(setCluster, new Comparator<IntSet>() {
		@Override
		public int compare(IntSet o1, IntSet o2) {
			return o1.hashCode() - o2.hashCode();
		}
	});
	result = prime * result + (setCluster.hashCode());
	return result;
}

Source File: NotInPredicateEvaluatorFactory.java From incubator-pinot with Apache License 2.0

6 votes

DictionaryBasedNotInPredicateEvaluator(NotInPredicate notInPredicate, Dictionary dictionary) {
  List<String> values = notInPredicate.getValues();
  _nonMatchingDictIdSet = new IntOpenHashSet(HashUtil.getMinHashSetSize(values.size()));
  for (String value : values) {
    int dictId = dictionary.indexOf(value);
    if (dictId >= 0) {
      _nonMatchingDictIdSet.add(dictId);
    }
  }
  _numNonMatchingDictIds = _nonMatchingDictIdSet.size();
  if (_numNonMatchingDictIds == 0) {
    _alwaysTrue = true;
  } else if (dictionary.length() == _numNonMatchingDictIds) {
    _alwaysFalse = true;
  }
  _dictionary = dictionary;
}

Source File: IntervalTest.java From database with GNU General Public License v2.0

6 votes

public void testSubsets() {
	for( int i = 0; i < 10; i++ )
		for( int j = i - 1; j < 10; j++ ) {
			Interval interval = j < i ? EMPTY_INTERVAL : Interval.valueOf( i, j );
			IntSortedSet set = toSortedSet( interval );
			assertEquals( set, interval );
			assertTrue( Arrays.equals( IntIterators.unwrap( set.iterator() ), IntIterators.unwrap( set.iterator() ) ) );
			assertEquals( new IntOpenHashSet( set ), interval );
			for( int k = j - 1; k <= i + 1; k++ ) {
				assertTrue( Arrays.equals( IntIterators.unwrap( set.iterator( k ) ), IntIterators.unwrap( set.iterator( k ) ) ) );
				assertEquals( set.headSet( k ), interval.headSet( k ) );
				assertEquals( set.tailSet( k ), interval.tailSet( k ) );
				for( int l = k; l <= i + 1; l++ )
					assertEquals( set.subSet( k, l ), interval.subSet( k, l ) );
			}
		}
}

Source File: ThresholdMapFlattenerTest.java From metanome-algorithms with Apache License 2.0

6 votes

@Test
public void test() {
	Collection<To> row1 = Arrays.asList(
		To.builder().similarity(0.5).records(new IntOpenHashSet(Arrays.asList(Integer.valueOf(1), Integer.valueOf(2)))).build(),
		To.builder().similarity(0.6).records(IntSets.singleton(3)).build());
	Collection<To> row2 = Collections.singletonList(
		To.builder().similarity(0.4).records(IntSets.singleton(1)).build());
	ThresholdMapFlattener flattener = createFlattener(3);
	ThresholdMap map = CollectingThresholdMap.builder()
		.add(1, row1)
		.add(2, row2)
		.build(flattener);
	assertThat(map.greaterOrEqual(1, 0.5)).hasSize(3);
	assertThat(map.greaterOrEqual(1, 0.5)).contains(Integer.valueOf(1), Integer.valueOf(2), Integer.valueOf(3));
	assertThat(map.greaterOrEqual(1, 0.6)).hasSize(1);
	assertThat(map.greaterOrEqual(1, 0.6)).contains(Integer.valueOf(3));
	assertThat(map.greaterOrEqual(1, 0.7)).isEmpty();
	assertThat(map.greaterOrEqual(2, 0.4)).hasSize(1);
	assertThat(map.greaterOrEqual(2, 0.4)).contains(Integer.valueOf(1));
}

Source File: CollectingThresholdMapTest.java From metanome-algorithms with Apache License 2.0

6 votes

@Test
public void test() {
	Collection<To> row1 = Arrays.asList(
		To.builder().similarity(0.5).records(new IntOpenHashSet(Arrays.asList(Integer.valueOf(1), Integer.valueOf(2)))).build(),
		To.builder().similarity(0.6).records(IntSets.singleton(3)).build());
	Collection<To> row2 = Collections.singletonList(
		To.builder().similarity(0.4).records(IntSets.singleton(1)).build());
	ThresholdMap map = CollectingThresholdMap.builder()
		.add(1, row1)
		.add(2, row2)
		.build();
	assertThat(map.greaterOrEqual(1, 0.5)).hasSize(3);
	assertThat(map.greaterOrEqual(1, 0.5)).contains(Integer.valueOf(1), Integer.valueOf(2), Integer.valueOf(3));
	assertThat(map.greaterOrEqual(1, 0.6)).hasSize(1);
	assertThat(map.greaterOrEqual(1, 0.6)).contains(Integer.valueOf(3));
	assertThat(map.greaterOrEqual(1, 0.7)).isEmpty();
	assertThat(map.greaterOrEqual(2, 0.4)).hasSize(1);
	assertThat(map.greaterOrEqual(2, 0.4)).contains(Integer.valueOf(1));
}

Source File: InPredicateEvaluatorFactory.java From incubator-pinot with Apache License 2.0

6 votes

DictionaryBasedInPredicateEvaluator(InPredicate inPredicate, Dictionary dictionary) {
  List<String> values = inPredicate.getValues();
  _matchingDictIdSet = new IntOpenHashSet(HashUtil.getMinHashSetSize(values.size()));
  for (String value : values) {
    int dictId = dictionary.indexOf(value);
    if (dictId >= 0) {
      _matchingDictIdSet.add(dictId);
    }
  }
  _numMatchingDictIds = _matchingDictIdSet.size();
  if (_numMatchingDictIds == 0) {
    _alwaysFalse = true;
  } else if (dictionary.length() == _numMatchingDictIds) {
    _alwaysTrue = true;
  }
}

Source File: OneClassPreferenceFMData.java From RankSys with Mozilla Public License 2.0

6 votes

@Override
public Stream<? extends FMInstance> stream() {
    return uidxs.stream()
            .flatMap(uidx -> {
                IntSet uidxIidxs = new IntOpenHashSet();
                prefs.getUidxIidxs(uidx).forEachRemaining(uidxIidxs::add);

                List<FMInstance> instances = new ArrayList<>();

                // adding positive examples
                uidxIidxs
                        .forEach(iidx -> instances.add(getInstance(uidx, iidx, 1.0)));

                // adding negative examples
                rnd.ints(iidxs.size(), 0, iidxs.size()).map(iidxs::getInt)
                        .filter(jidx -> !uidxIidxs.contains(jidx))
                        .distinct()
                        .limit((int) (negativeProp * uidxIidxs.size()))
                        .forEach(jidx -> instances.add(getInstance(uidx, jidx, 0.0)));

                Collections.shuffle(instances);

                return instances.stream();
            });
}

Source File: NbestListUtils.java From phrasal with GNU General Public License v3.0

6 votes

/**
 * Baseline implementation. Augments the "standard" list with alternatives.
 * 
 * @param l1
 * @param l2
 * @return
 */
public static <TK,FV> List<RichTranslation<TK,FV>> mergeAndDedup(List<RichTranslation<TK,FV>> standard,
    List<RichTranslation<TK,FV>> alt, int maxAltItems) {
  
  IntSet hashCodeSet = new IntOpenHashSet(standard.size());
  for (RichTranslation<TK,FV> s : standard) {
    hashCodeSet.add(derivationHashCode(s.getFeaturizable().derivation));
  }
  
  List<RichTranslation<TK,FV>> returnList = new ArrayList<>(standard);
  for (int i = 0, sz = Math.min(maxAltItems, alt.size()); i < sz; ++i) {
    RichTranslation<TK,FV> t = alt.get(i);
    int hashCode = derivationHashCode(t.getFeaturizable().derivation);
    if (! hashCodeSet.contains(hashCode)) returnList.add(t);
  }
  Collections.sort(returnList);
  
  return returnList;
}

Source File: GraphFunctions.java From data-polygamy with BSD 3-Clause "New" or "Revised" License

6 votes

public GraphFunctions(int[][] edges2D, int noNodes) {
 try {
        nv = noNodes;
        nodes = new IntOpenHashSet[nv];
        
        for(int i = 0;i < nv;i ++) {
            nodes[i] = new IntOpenHashSet();
        }
        for(int i = 0; i < edges2D.length; i++) {
            int v1 = edges2D[i][0];
            int v2 = edges2D[i][1];
            nodes[v1].add(v2);
            nodes[v2].add(v1);
        }
    } catch(Exception e) {
        e.printStackTrace();
    }
}

Source File: IntDictionaryMap.java From tablesaw with Apache License 2.0

6 votes

@Override
public Selection selectIsIn(Collection<String> strings) {
  IntOpenHashSet keys = new IntOpenHashSet(strings.size());
  for (String string : strings) {
    int key = getKeyForValue(string);
    if (key != DEFAULT_RETURN_VALUE) {
      keys.add(key);
    }
  }

  Selection results = new BitmapBackedSelection();
  for (int i = 0; i < values.size(); i++) {
    if (keys.contains(values.getInt(i))) {
      results.add(i);
    }
  }
  return results;
}

Source File: ObjectSerDeUtilsTest.java From incubator-pinot with Apache License 2.0

6 votes

@Test
public void testIntSet() {
  for (int i = 0; i < NUM_ITERATIONS; i++) {
    int size = RANDOM.nextInt(100);
    IntSet expected = new IntOpenHashSet(size);
    for (int j = 0; j < size; j++) {
      expected.add(RANDOM.nextInt());
    }

    byte[] bytes = ObjectSerDeUtils.serialize(expected);
    IntSet actual = ObjectSerDeUtils.deserialize(bytes, ObjectSerDeUtils.ObjectType.IntSet);

    // NOTE: use Object comparison instead of Collection comparison because the order might be different
    assertEquals((Object) actual, expected, ERROR_MESSAGE);
  }
}

Source File: PositionListIndex.java From metanome-algorithms with Apache License 2.0

6 votes

@Override
public int hashCode() {
	final int prime = 31;
	int result = 1;

	List<IntOpenHashSet> setCluster = this.convertClustersToSets(this.clusters);

	Collections.sort(setCluster, new Comparator<IntSet>() {
		@Override
		public int compare(IntSet o1, IntSet o2) {
			return o1.hashCode() - o2.hashCode();
		}
	});
	result = prime * result + (setCluster.hashCode());
	return result;
}

Source File: PositionListIndex.java From metanome-algorithms with Apache License 2.0

6 votes

@Override
public int hashCode() {
	final int prime = 31;
	int result = 1;

	List<IntOpenHashSet> setCluster = this.convertClustersToSets(this.clusters);

	Collections.sort(setCluster, new Comparator<IntSet>() {
		@Override
		public int compare(IntSet o1, IntSet o2) {
			return o1.hashCode() - o2.hashCode();
		}
	});
	result = prime * result + (setCluster.hashCode());
	return result;
}

Source File: IntDictionaryMap.java From tablesaw with Apache License 2.0

6 votes

@Override
public Selection selectIsIn(String... strings) {
  IntOpenHashSet keys = new IntOpenHashSet(strings.length);
  for (String string : strings) {
    int key = getKeyForValue(string);
    if (key != DEFAULT_RETURN_VALUE) {
      keys.add(key);
    }
  }

  Selection results = new BitmapBackedSelection();
  for (int i = 0; i < values.size(); i++) {
    if (keys.contains(values.getInt(i))) {
      results.add(i);
    }
  }
  return results;
}

Source File: IntColumn.java From tablesaw with Apache License 2.0

5 votes

@Override
public int countUnique() {
  IntSet uniqueElements = new IntOpenHashSet();
  for (int i = 0; i < size(); i++) {
    uniqueElements.add(getInt(i));
  }
  return uniqueElements.size();
}

Source File: FastFilters.java From RankSys with Mozilla Public License 2.0

5 votes

/**
 * Item filter that discards items in the training preference data.
 *
 * @param <U> type of the users
 * @param <I> type of the items
 * @param trainData preference data
 * @return item filters for each using returning true if the
 * user-item pair was not observed in the preference data
 */
public static <U, I> Function<U, IntPredicate> notInTrain(FastPreferenceData<U, I> trainData) {
    return user -> {
        IntSet set = new IntOpenHashSet();
        trainData.getUidxPreferences(trainData.user2uidx(user))
                .mapToInt(IdxPref::v1)
                .forEach(set::add);

        return iidx -> !set.contains(iidx);
    };
}

Source File: IntColumn.java From tablesaw with Apache License 2.0

5 votes

@Override
public IntColumn unique() {
  final IntSet values = new IntOpenHashSet();
  for (int i = 0; i < size(); i++) {
    values.add(getInt(i));
  }
  final IntColumn column = IntColumn.create(name() + " Unique values");
  for (int value : values) {
    column.append(value);
  }
  return column;
}

Source File: DatasetWikiIdExporter.java From gerbil with GNU Affero General Public License v3.0

5 votes

private IntOpenHashSet analyzeAsD2W(DatasetConfiguration config) throws GerbilException {
    C2WDataset dataset = (C2WDataset) config.getDataset(ExperimentType.C2KB);
    if (dataset == null) {
        return null;
    }
    List<HashSet<Tag>> goldStandard = dataset.getC2WGoldStandardList();
    IntOpenHashSet ids = new IntOpenHashSet();
    for (HashSet<Tag> tags : goldStandard) {
        for (Tag tag : tags) {
            ids.add(tag.getConcept());
        }
    }
    return ids;
}

Source File: NotInPredicateEvaluatorFactory.java From incubator-pinot with Apache License 2.0

5 votes

IntRawValueBasedNotInPredicateEvaluator(NotInPredicate notInPredicate) {
  List<String> values = notInPredicate.getValues();
  _nonMatchingValues = new IntOpenHashSet(HashUtil.getMinHashSetSize(values.size()));
  for (String value : values) {
    _nonMatchingValues.add(Integer.parseInt(value));
  }
}

Source File: AccessTrace.java From cache2k-benchmark with Apache License 2.0

5 votes

private void initStatistics() {
  IntSet _values = new IntOpenHashSet();
  for (int v : getArray()) {
    _values.add(v);
    if (v < lowValue) {
      lowValue = v;
    }
    if (v > highValue) {
      highValue = v;
    }
  }
  valueCount = _values.size();
}

Source File: TimeColumn.java From tablesaw with Apache License 2.0

5 votes

@Override
public TimeColumn unique() {
  IntSet ints = new IntOpenHashSet(data);
  TimeColumn column = emptyCopy(ints.size());
  column.data = IntArrayList.wrap(ints.toIntArray());
  column.setName(name() + " Unique values");
  return column;
}

Source File: DistinctCountAggregationFunction.java From incubator-pinot with Apache License 2.0

5 votes

@Override
public IntOpenHashSet extractAggregationResult(AggregationResultHolder aggregationResultHolder) {
  IntOpenHashSet valueSet = aggregationResultHolder.getResult();
  if (valueSet == null) {
    return new IntOpenHashSet();
  } else {
    return valueSet;
  }
}

Source File: IgnoreWIDs.java From tagme with Apache License 2.0

5 votes

@Override
protected IntSet parseSet() throws IOException
{
	log.info("Loading data...");
	Object2IntMap<String> titles = new TitlesToWIDMap(lang).getDataset();
	IntOpenHashSet ids = new IntOpenHashSet(titles.size());
	
	Pattern p_date = WikiPatterns.getPattern(lang, Type.PAGE_DATE);
	Pattern p_other = WikiPatterns.getPattern(lang, Type.PAGE_IGNORE);
	
	PLogger plog = new PLogger(log,"titles","dates","others").setEnd(0, titles.size()).start("Parsing ignore-pages...");
	for(String title : titles.keySet())
	{
		plog.update(0);
		if (p_date.matcher(title).find()) {
			plog.update(1);
			ids.add(titles.get(title));
		}
		else if (p_other.matcher(title).find()) {
			plog.update(2);
			ids.add(titles.get(title));
		}
	}
	plog.stop();
	
	ids.trim();
	return ids;
}

Source File: PageToCategoryIDs.java From tagme with Apache License 2.0

5 votes

@Override
protected int[][] parseSet() throws IOException {
	final Int2ObjectMap<IntSet> map = new Int2ObjectOpenHashMap<IntSet>(3000000);
	final IntSet hidden= DatasetLoader.get(new HiddenCategoriesWIDs(lang));
	File input = WikipediaFiles.CAT_LINKS.getSourceFile(lang);
	final Object2IntMap<String> categories=DatasetLoader.get(new CategoriesToWIDMap(lang));
	
	SQLWikiParser parser = new SQLWikiParser(log) {
		@Override
		public boolean compute(ArrayList<String> values) throws IOException {
			String c_title=cleanPageName(values.get(SQLWikiParser.CATLINKS_TITLE_TO));
			int id=Integer.parseInt(values.get(SQLWikiParser.CATLINKS_ID_FROM));
			if(categories.containsKey(c_title) && !hidden.contains(categories.get(c_title).intValue())){
				if(map.containsKey(id)){
					map.get(id).add(categories.get(c_title).intValue());
				}else{
					IntSet set = new IntOpenHashSet();
					set.add(categories.get(c_title).intValue());
					map.put(id, set);
				}
				return true;
			} else return false;
		}
		
	};
	InputStreamReader reader = new InputStreamReader(new FileInputStream(input), Charset.forName("UTF-8"));
	parser.compute(reader);
	reader.close();
	return createDump(map);
}

Source File: DatasetWikiIdExporter.java From gerbil with GNU Affero General Public License v3.0

5 votes

public void analyzeDataset(DatasetConfiguration config, PrintStream output) throws GerbilException {
    IntOpenHashSet ids = analyzeAsD2W(config);
    if (ids == null) {
        ids = analyzeAsC2W(config);
    }
    printIds(ids, output);
}

it.unimi.dsi.fastutil.ints.IntOpenHashSet Java Examples