org.apache.mahout.common.RandomUtils Java Examples
The following examples show how to use
org.apache.mahout.common.RandomUtils.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TDigestTest.java From streaminer with Apache License 2.0 | 6 votes |
@Test() public void testSizeControl() throws IOException { // very slow running data generator. Don't want to run this normally. To run slow tests use // mvn test -DrunSlowTests=true assumeTrue(Boolean.parseBoolean(System.getProperty("runSlowTests"))); Random gen = RandomUtils.getRandom(); PrintWriter out = new PrintWriter(new FileOutputStream("scaling.tsv")); out.printf("k\tsamples\tcompression\tsize1\tsize2\n"); for (int k = 0; k < 20; k++) { for (int size : new int[]{10, 100, 1000, 10000}) { for (double compression : new double[]{2, 5, 10, 20, 50, 100, 200, 500, 1000}) { TDigest dist = new TDigest(compression, gen); for (int i = 0; i < size * 1000; i++) { dist.add(gen.nextDouble()); } out.printf("%d\t%d\t%.0f\t%d\t%d\n", k, size, compression, dist.smallByteSize(), dist.byteSize()); out.flush(); } } } out.printf("\n"); out.close(); new File("scaling.tsv").delete(); }
Example #2
Source File: FastByIDMap.java From elasticsearch-taste with Apache License 2.0 | 6 votes |
/** * Creates a new {@link FastByIDMap} whose capacity can accommodate the given number of entries without rehash. * * @param size desired capacity * @param maxSize max capacity * @param loadFactor ratio of internal hash table size to current size * @throws IllegalArgumentException if size is less than 0, maxSize is less than 1 * or at least half of {@link RandomUtils#MAX_INT_SMALLER_TWIN_PRIME}, or * loadFactor is less than 1 */ public FastByIDMap(final int size, final int maxSize, final float loadFactor) { Preconditions.checkArgument(size >= 0, "size must be at least 0"); Preconditions.checkArgument(loadFactor >= 1.0f, "loadFactor must be at least 1.0"); this.loadFactor = loadFactor; final int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor); Preconditions .checkArgument(size < max, "size must be less than " + max); Preconditions.checkArgument(maxSize >= 1, "maxSize must be at least 1"); final int hashSize = RandomUtils .nextTwinPrime((int) (loadFactor * size)); keys = new long[hashSize]; Arrays.fill(keys, NULL); values = (V[]) new Object[hashSize]; this.maxSize = maxSize; this.countingAccesses = maxSize != Integer.MAX_VALUE; this.recentlyAccessed = countingAccesses ? new BitSet(hashSize) : null; }
Example #3
Source File: TDigestTest.java From streaminer with Apache License 2.0 | 6 votes |
@Test public void testSequentialPoints() { Random gen = RandomUtils.getRandom(); for (int i = 0; i < repeats(); i++) { runTest(new AbstractContinousDistribution() { double base = 0; @Override public double nextDouble() { base += Math.PI * 1e-5; return base; } }, 100, new double[]{0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999}, "sequential", true, gen); } }
Example #4
Source File: TDigestTest.java From streaminer with Apache License 2.0 | 6 votes |
@Test public void testNarrowNormal() { // this mixture of a uniform and normal distribution has a very narrow peak which is centered // near the median. Our system should be scale invariant and work well regardless. final Random gen = RandomUtils.getRandom(); AbstractContinousDistribution mix = new AbstractContinousDistribution() { AbstractContinousDistribution normal = new Normal(0, 1e-5, gen); AbstractContinousDistribution uniform = new Uniform(-1, 1, gen); @Override public double nextDouble() { double x; if (gen.nextDouble() < 0.5) { x = uniform.nextDouble(); } else { x = normal.nextDouble(); } return x; } }; for (int i = 0; i < repeats(); i++) { runTest(mix, 100, new double[]{0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99, 0.999}, "mixture", false, gen); } }
Example #5
Source File: ParallelSGDFactorizer.java From elasticsearch-taste with Apache License 2.0 | 6 votes |
protected void initialize() { final RandomWrapper random = RandomUtils.getRandom(); userVectors = new double[dataModel.getNumUsers()][rank]; itemVectors = new double[dataModel.getNumItems()][rank]; final double globalAverage = getAveragePreference(); for (int userIndex = 0; userIndex < userVectors.length; userIndex++) { userVectors[userIndex][0] = globalAverage; userVectors[userIndex][USER_BIAS_INDEX] = 0; // will store user bias userVectors[userIndex][ITEM_BIAS_INDEX] = 1; // corresponding item feature contains item bias for (int feature = FEATURE_OFFSET; feature < rank; feature++) { userVectors[userIndex][feature] = random.nextGaussian() * NOISE; } } for (int itemIndex = 0; itemIndex < itemVectors.length; itemIndex++) { itemVectors[itemIndex][0] = 1; // corresponding user feature contains global average itemVectors[itemIndex][USER_BIAS_INDEX] = 1; // corresponding user feature contains user bias itemVectors[itemIndex][ITEM_BIAS_INDEX] = 0; // will store item bias for (int feature = FEATURE_OFFSET; feature < rank; feature++) { itemVectors[itemIndex][feature] = random.nextGaussian() * NOISE; } } }
Example #6
Source File: FastMap.java From elasticsearch-taste with Apache License 2.0 | 6 votes |
/** * Creates a new whose capacity can accommodate the given number of entries without rehash. * * @param size desired capacity * @param maxSize max capacity * @throws IllegalArgumentException if size is less than 0, maxSize is less than 1 * or at least half of {@link RandomUtils#MAX_INT_SMALLER_TWIN_PRIME}, or * loadFactor is less than 1 */ public FastMap(final int size, final int maxSize, final float loadFactor) { Preconditions.checkArgument(size >= 0, "size must be at least 0"); Preconditions.checkArgument(loadFactor >= 1.0f, "loadFactor must be at least 1.0"); this.loadFactor = loadFactor; final int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor); Preconditions .checkArgument(size < max, "size must be less than " + max); Preconditions.checkArgument(maxSize >= 1, "maxSize must be at least 1"); final int hashSize = RandomUtils .nextTwinPrime((int) (loadFactor * size)); keys = (K[]) new Object[hashSize]; values = (V[]) new Object[hashSize]; this.maxSize = maxSize; this.countingAccesses = maxSize != Integer.MAX_VALUE; this.recentlyAccessed = countingAccesses ? new BitSet(hashSize) : null; }
Example #7
Source File: ALSWRFactorizer.java From elasticsearch-taste with Apache License 2.0 | 6 votes |
Features(final ALSWRFactorizer factorizer) { dataModel = factorizer.dataModel; numFeatures = factorizer.numFeatures; final Random random = RandomUtils.getRandom(); M = new double[dataModel.getNumItems()][numFeatures]; final LongPrimitiveIterator itemIDsIterator = dataModel .getItemIDs(); while (itemIDsIterator.hasNext()) { final long itemID = itemIDsIterator.nextLong(); final int itemIDIndex = factorizer.itemIndex(itemID); M[itemIDIndex][0] = averateRating(itemID); for (int feature = 1; feature < numFeatures; feature++) { M[itemIDIndex][feature] = random.nextDouble() * 0.1; } } U = new double[dataModel.getNumUsers()][numFeatures]; }
Example #8
Source File: FixedSizeSamplingIterator.java From elasticsearch-taste with Apache License 2.0 | 6 votes |
public FixedSizeSamplingIterator(final int size, final Iterator<T> source) { final List<T> buf = Lists.newArrayListWithCapacity(size); int sofar = 0; final Random random = RandomUtils.getRandom(); while (source.hasNext()) { final T v = source.next(); sofar++; if (buf.size() < size) { buf.add(v); } else { final int position = random.nextInt(sofar); if (position < buf.size()) { buf.set(position, v); } } } delegate = buf.iterator(); }
Example #9
Source File: RatingSGDFactorizer.java From elasticsearch-taste with Apache License 2.0 | 5 votes |
protected void shufflePreferences() { final RandomWrapper random = RandomUtils.getRandom(); /* Durstenfeld shuffle */ for (int currentPos = cachedUserIDs.length - 1; currentPos > 0; currentPos--) { final int swapPos = random.nextInt(currentPos + 1); swapCachedPreferences(currentPos, swapPos); } }
Example #10
Source File: UpperQuantileTest.java From log-synth with Apache License 2.0 | 5 votes |
@Before public void generate() { RandomUtils.useTestSeed(); uq = new UpperQuantile(101); data = new double[1001]; Random gen = RandomUtils.getRandom(); for (int i = 0; i < 1001; i++) { double x = gen.nextDouble(); data[i] = x; uq.add(x); } Arrays.sort(data); }
Example #11
Source File: User.java From log-synth with Apache License 2.0 | 5 votes |
public User(InetAddress address, String geoCode, TermGenerator terms, double period) { this.terms = terms; this.geoCode = geoCode; this.address = address; this.rate = period; this.sessionTimeDistribution = new Exponential(period, RandomUtils.getRandom()); id = idCounter.addAndGet(1); nextSession = sessionTimeDistribution.nextDouble(); }
Example #12
Source File: TDigestTest.java From streaminer with Apache License 2.0 | 5 votes |
@Test public void testScaling() throws FileNotFoundException { Random gen = RandomUtils.getRandom(); PrintWriter out = new PrintWriter(new FileOutputStream("error-scaling.tsv")); try { out.printf("pass\tcompression\tq\terror\tsize\n"); // change to 50 passes for better graphs int n = repeats() * repeats(); for (int k = 0; k < n; k++) { List<Double> data = Lists.newArrayList(); for (int i = 0; i < 100000; i++) { data.add(gen.nextDouble()); } Collections.sort(data); for (double compression : new double[]{2, 5, 10, 20, 50, 100, 200, 500, 1000}) { TDigest dist = new TDigest(compression, gen); for (Double x : data) { dist.add(x); } dist.compress(); for (double q : new double[]{0.001, 0.01, 0.1, 0.5}) { double estimate = dist.quantile(q); double actual = data.get((int) (q * data.size())); out.printf("%d\t%.0f\t%.3f\t%.9f\t%d\n", k, compression, q, estimate - actual, dist.byteSize()); out.flush(); } } } } finally { out.close(); new File("error-scaling.tsv").delete(); } }
Example #13
Source File: TDigestTest.java From streaminer with Apache License 2.0 | 5 votes |
@Test public void compareToQDigest() { Random rand = RandomUtils.getRandom(); for (int i = 0; i < repeats(); i++) { compare(new Gamma(0.1, 0.1, rand), "gamma", 1L << 48, rand); compare(new Uniform(0, 1, rand), "uniform", 1L << 48, rand); } }
Example #14
Source File: TDigestTest.java From streaminer with Apache License 2.0 | 5 votes |
@Test public void testGamma() { // this Gamma distribution is very heavily skewed. The 0.1%-ile is 6.07e-30 while // the median is 0.006 and the 99.9th %-ile is 33.6 while the mean is 1. // this severe skew means that we have to have positional accuracy that // varies by over 11 orders of magnitude. Random gen = RandomUtils.getRandom(); for (int i = 0; i < repeats(); i++) { runTest(new Gamma(0.1, 0.1, gen), 100, // new double[]{6.0730483624079e-30, 6.0730483624079e-20, 6.0730483627432e-10, 5.9339110446023e-03, // 2.6615455373884e+00, 1.5884778179295e+01, 3.3636770117188e+01}, new double[]{0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999}, "gamma", true, gen); } }
Example #15
Source File: TDigestTest.java From streaminer with Apache License 2.0 | 5 votes |
@Test public void testUniform() { Random gen = RandomUtils.getRandom(); for (int i = 0; i < repeats(); i++) { runTest(new Uniform(0, 1, gen), 100, new double[]{0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999}, "uniform", true, gen); } }
Example #16
Source File: RatingSGDFactorizer.java From elasticsearch-taste with Apache License 2.0 | 5 votes |
protected void prepareTraining() { final RandomWrapper random = RandomUtils.getRandom(); userVectors = new double[dataModel.getNumUsers()][numFeatures]; itemVectors = new double[dataModel.getNumItems()][numFeatures]; final double globalAverage = getAveragePreference(); for (int userIndex = 0; userIndex < userVectors.length; userIndex++) { userVectors[userIndex][0] = globalAverage; userVectors[userIndex][USER_BIAS_INDEX] = 0; // will store user bias userVectors[userIndex][ITEM_BIAS_INDEX] = 1; // corresponding item feature contains item bias for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) { userVectors[userIndex][feature] = random.nextGaussian() * randomNoise; } } for (int itemIndex = 0; itemIndex < itemVectors.length; itemIndex++) { itemVectors[itemIndex][0] = 1; // corresponding user feature contains global average itemVectors[itemIndex][USER_BIAS_INDEX] = 1; // corresponding user feature contains user bias itemVectors[itemIndex][ITEM_BIAS_INDEX] = 0; // will store item bias for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) { itemVectors[itemIndex][feature] = random.nextGaussian() * randomNoise; } } cachePreferences(); shufflePreferences(); }
Example #17
Source File: FastIDSet.java From elasticsearch-taste with Apache License 2.0 | 5 votes |
public FastIDSet(final int size, final float loadFactor) { Preconditions.checkArgument(size >= 0, "size must be at least 0"); Preconditions.checkArgument(loadFactor >= 1.0f, "loadFactor must be at least 1.0"); this.loadFactor = loadFactor; final int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor); Preconditions.checkArgument(size < max, "size must be less than %d", max); final int hashSize = RandomUtils .nextTwinPrime((int) (loadFactor * size)); keys = new long[hashSize]; Arrays.fill(keys, NULL); }
Example #18
Source File: AbstractDifferenceRecommenderEvaluator.java From elasticsearch-taste with Apache License 2.0 | 4 votes |
protected AbstractDifferenceRecommenderEvaluator() { random = RandomUtils.getRandom(); }
Example #19
Source File: GenericItemSimilarity.java From elasticsearch-taste with Apache License 2.0 | 4 votes |
@Override public int hashCode() { return (int) itemID1 ^ (int) itemID2 ^ RandomUtils.hashDouble(value); }
Example #20
Source File: GenericUserSimilarity.java From elasticsearch-taste with Apache License 2.0 | 4 votes |
@Override public int hashCode() { return (int) userID1 ^ (int) userID2 ^ RandomUtils.hashDouble(value); }
Example #21
Source File: DateSampler.java From log-synth with Apache License 2.0 | 4 votes |
@SuppressWarnings("UnusedDeclaration") public void setEnd(String end) throws ParseException { this.end = df.parse(end).getTime(); base = new Uniform(0, this.end - this.start, RandomUtils.getRandom()); }
Example #22
Source File: DateSampler.java From log-synth with Apache License 2.0 | 4 votes |
@SuppressWarnings("UnusedDeclaration") public void setStart(String start) throws ParseException { this.start = df.parse(start).getTime(); base = new Uniform(0, this.end - this.start, RandomUtils.getRandom()); }
Example #23
Source File: ArrivalSampler.java From log-synth with Apache License 2.0 | 4 votes |
@Override public void setSeed(long seed) { base = RandomUtils.getRandom(seed); }
Example #24
Source File: ArrivalSampler.java From log-synth with Apache License 2.0 | 4 votes |
public ArrivalSampler() { base = RandomUtils.getRandom(); }
Example #25
Source File: ZipSampler.java From log-synth with Apache License 2.0 | 4 votes |
@Override @SuppressWarnings("unused") public void setSeed(long seed) { rand = RandomUtils.getRandom(seed); }
Example #26
Source File: IntegerSampler.java From log-synth with Apache License 2.0 | 4 votes |
@Override public void setSeed(long seed) { base = RandomUtils.getRandom(seed); }
Example #27
Source File: IntegerSampler.java From log-synth with Apache License 2.0 | 4 votes |
@SuppressWarnings("WeakerAccess") public IntegerSampler() { base = RandomUtils.getRandom(); }
Example #28
Source File: TDigestTest.java From t-digest with Apache License 2.0 | 4 votes |
@BeforeClass public static void freezeSeed() { RandomUtils.useTestSeed(); }
Example #29
Source File: AVLGroupTreeTest.java From t-digest with Apache License 2.0 | 4 votes |
@Before public void setUp() { RandomUtils.useTestSeed(); }
Example #30
Source File: MergingDigestTest.java From t-digest with Apache License 2.0 | 4 votes |
@Before public void testSetUp() { RandomUtils.useTestSeed(); }