org.apache.mahout.common.RandomUtils Java Exaples

Source File: TDigestTest.java From streaminer with Apache License 2.0

6 votes

@Test()
public void testSizeControl() throws IOException {
    // very slow running data generator.  Don't want to run this normally.  To run slow tests use
    // mvn test -DrunSlowTests=true
    assumeTrue(Boolean.parseBoolean(System.getProperty("runSlowTests")));

    Random gen = RandomUtils.getRandom();
    PrintWriter out = new PrintWriter(new FileOutputStream("scaling.tsv"));
    out.printf("k\tsamples\tcompression\tsize1\tsize2\n");
    for (int k = 0; k < 20; k++) {
        for (int size : new int[]{10, 100, 1000, 10000}) {
            for (double compression : new double[]{2, 5, 10, 20, 50, 100, 200, 500, 1000}) {
                TDigest dist = new TDigest(compression, gen);
                for (int i = 0; i < size * 1000; i++) {
                    dist.add(gen.nextDouble());
                }
                out.printf("%d\t%d\t%.0f\t%d\t%d\n", k, size, compression, dist.smallByteSize(), dist.byteSize());
                out.flush();
            }
        }
    }
    out.printf("\n");
    out.close();
    new File("scaling.tsv").delete();
}

Source File: FastByIDMap.java From elasticsearch-taste with Apache License 2.0

6 votes

/**
 * Creates a new {@link FastByIDMap} whose capacity can accommodate the given number of entries without rehash.
 *
 * @param size desired capacity
 * @param maxSize max capacity
 * @param loadFactor ratio of internal hash table size to current size
 * @throws IllegalArgumentException if size is less than 0, maxSize is less than 1
 *  or at least half of {@link RandomUtils#MAX_INT_SMALLER_TWIN_PRIME}, or
 *  loadFactor is less than 1
 */
public FastByIDMap(final int size, final int maxSize, final float loadFactor) {
    Preconditions.checkArgument(size >= 0, "size must be at least 0");
    Preconditions.checkArgument(loadFactor >= 1.0f,
            "loadFactor must be at least 1.0");
    this.loadFactor = loadFactor;
    final int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor);
    Preconditions
            .checkArgument(size < max, "size must be less than " + max);
    Preconditions.checkArgument(maxSize >= 1, "maxSize must be at least 1");
    final int hashSize = RandomUtils
            .nextTwinPrime((int) (loadFactor * size));
    keys = new long[hashSize];
    Arrays.fill(keys, NULL);
    values = (V[]) new Object[hashSize];
    this.maxSize = maxSize;
    this.countingAccesses = maxSize != Integer.MAX_VALUE;
    this.recentlyAccessed = countingAccesses ? new BitSet(hashSize) : null;
}

Source File: TDigestTest.java From streaminer with Apache License 2.0

6 votes

@Test
public void testSequentialPoints() {
    Random gen = RandomUtils.getRandom();
    for (int i = 0; i < repeats(); i++) {
        runTest(new AbstractContinousDistribution() {
            double base = 0;

            @Override
            public double nextDouble() {
                base += Math.PI * 1e-5;
                return base;
            }
        }, 100, new double[]{0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999},
                "sequential", true, gen);
    }
}

Source File: TDigestTest.java From streaminer with Apache License 2.0

6 votes

@Test
public void testNarrowNormal() {
    // this mixture of a uniform and normal distribution has a very narrow peak which is centered
    // near the median.  Our system should be scale invariant and work well regardless.
    final Random gen = RandomUtils.getRandom();
    AbstractContinousDistribution mix = new AbstractContinousDistribution() {
        AbstractContinousDistribution normal = new Normal(0, 1e-5, gen);
        AbstractContinousDistribution uniform = new Uniform(-1, 1, gen);

        @Override
        public double nextDouble() {
            double x;
            if (gen.nextDouble() < 0.5) {
                x = uniform.nextDouble();
            } else {
                x = normal.nextDouble();
            }
            return x;
        }
    };

    for (int i = 0; i < repeats(); i++) {
        runTest(mix, 100, new double[]{0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99, 0.999}, "mixture", false, gen);
    }
}

Source File: ParallelSGDFactorizer.java From elasticsearch-taste with Apache License 2.0

6 votes

protected void initialize() {
    final RandomWrapper random = RandomUtils.getRandom();
    userVectors = new double[dataModel.getNumUsers()][rank];
    itemVectors = new double[dataModel.getNumItems()][rank];

    final double globalAverage = getAveragePreference();
    for (int userIndex = 0; userIndex < userVectors.length; userIndex++) {
        userVectors[userIndex][0] = globalAverage;
        userVectors[userIndex][USER_BIAS_INDEX] = 0; // will store user bias
        userVectors[userIndex][ITEM_BIAS_INDEX] = 1; // corresponding item feature contains item bias
        for (int feature = FEATURE_OFFSET; feature < rank; feature++) {
            userVectors[userIndex][feature] = random.nextGaussian() * NOISE;
        }
    }
    for (int itemIndex = 0; itemIndex < itemVectors.length; itemIndex++) {
        itemVectors[itemIndex][0] = 1; // corresponding user feature contains global average
        itemVectors[itemIndex][USER_BIAS_INDEX] = 1; // corresponding user feature contains user bias
        itemVectors[itemIndex][ITEM_BIAS_INDEX] = 0; // will store item bias
        for (int feature = FEATURE_OFFSET; feature < rank; feature++) {
            itemVectors[itemIndex][feature] = random.nextGaussian() * NOISE;
        }
    }
}

Source File: FastMap.java From elasticsearch-taste with Apache License 2.0

6 votes

/**
 * Creates a new  whose capacity can accommodate the given number of entries without rehash.
 *
 * @param size desired capacity
 * @param maxSize max capacity
 * @throws IllegalArgumentException if size is less than 0, maxSize is less than 1
 *  or at least half of {@link RandomUtils#MAX_INT_SMALLER_TWIN_PRIME}, or
 *  loadFactor is less than 1
 */
public FastMap(final int size, final int maxSize, final float loadFactor) {
    Preconditions.checkArgument(size >= 0, "size must be at least 0");
    Preconditions.checkArgument(loadFactor >= 1.0f,
            "loadFactor must be at least 1.0");
    this.loadFactor = loadFactor;
    final int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor);
    Preconditions
            .checkArgument(size < max, "size must be less than " + max);
    Preconditions.checkArgument(maxSize >= 1, "maxSize must be at least 1");
    final int hashSize = RandomUtils
            .nextTwinPrime((int) (loadFactor * size));
    keys = (K[]) new Object[hashSize];
    values = (V[]) new Object[hashSize];
    this.maxSize = maxSize;
    this.countingAccesses = maxSize != Integer.MAX_VALUE;
    this.recentlyAccessed = countingAccesses ? new BitSet(hashSize) : null;
}

Source File: ALSWRFactorizer.java From elasticsearch-taste with Apache License 2.0

6 votes

Features(final ALSWRFactorizer factorizer) {
    dataModel = factorizer.dataModel;
    numFeatures = factorizer.numFeatures;
    final Random random = RandomUtils.getRandom();
    M = new double[dataModel.getNumItems()][numFeatures];
    final LongPrimitiveIterator itemIDsIterator = dataModel
            .getItemIDs();
    while (itemIDsIterator.hasNext()) {
        final long itemID = itemIDsIterator.nextLong();
        final int itemIDIndex = factorizer.itemIndex(itemID);
        M[itemIDIndex][0] = averateRating(itemID);
        for (int feature = 1; feature < numFeatures; feature++) {
            M[itemIDIndex][feature] = random.nextDouble() * 0.1;
        }
    }
    U = new double[dataModel.getNumUsers()][numFeatures];
}

Source File: FixedSizeSamplingIterator.java From elasticsearch-taste with Apache License 2.0

6 votes

public FixedSizeSamplingIterator(final int size, final Iterator<T> source) {
    final List<T> buf = Lists.newArrayListWithCapacity(size);
    int sofar = 0;
    final Random random = RandomUtils.getRandom();
    while (source.hasNext()) {
        final T v = source.next();
        sofar++;
        if (buf.size() < size) {
            buf.add(v);
        } else {
            final int position = random.nextInt(sofar);
            if (position < buf.size()) {
                buf.set(position, v);
            }
        }
    }
    delegate = buf.iterator();
}

Source File: RatingSGDFactorizer.java From elasticsearch-taste with Apache License 2.0

5 votes

protected void shufflePreferences() {
    final RandomWrapper random = RandomUtils.getRandom();
    /* Durstenfeld shuffle */
    for (int currentPos = cachedUserIDs.length - 1; currentPos > 0; currentPos--) {
        final int swapPos = random.nextInt(currentPos + 1);
        swapCachedPreferences(currentPos, swapPos);
    }
}

Source File: UpperQuantileTest.java From log-synth with Apache License 2.0

5 votes

@Before
public void generate() {
    RandomUtils.useTestSeed();
    uq = new UpperQuantile(101);
    data = new double[1001];
    Random gen = RandomUtils.getRandom();
    for (int i = 0; i < 1001; i++) {
        double x = gen.nextDouble();
        data[i] = x;
        uq.add(x);
    }
    Arrays.sort(data);
}

Source File: User.java From log-synth with Apache License 2.0

5 votes

public User(InetAddress address, String geoCode, TermGenerator terms, double period) {
    this.terms = terms;
    this.geoCode = geoCode;
    this.address = address;
    this.rate = period;
    this.sessionTimeDistribution = new Exponential(period, RandomUtils.getRandom());

    id = idCounter.addAndGet(1);
    nextSession = sessionTimeDistribution.nextDouble();
}

Source File: TDigestTest.java From streaminer with Apache License 2.0

5 votes

@Test
public void testScaling() throws FileNotFoundException {
    Random gen = RandomUtils.getRandom();
    PrintWriter out = new PrintWriter(new FileOutputStream("error-scaling.tsv"));
    try {
        out.printf("pass\tcompression\tq\terror\tsize\n");
        // change to 50 passes for better graphs
        int n = repeats() * repeats();
        for (int k = 0; k < n; k++) {
            List<Double> data = Lists.newArrayList();
            for (int i = 0; i < 100000; i++) {
                data.add(gen.nextDouble());
            }
            Collections.sort(data);

            for (double compression : new double[]{2, 5, 10, 20, 50, 100, 200, 500, 1000}) {
                TDigest dist = new TDigest(compression, gen);
                for (Double x : data) {
                    dist.add(x);
                }
                dist.compress();

                for (double q : new double[]{0.001, 0.01, 0.1, 0.5}) {
                    double estimate = dist.quantile(q);
                    double actual = data.get((int) (q * data.size()));
                    out.printf("%d\t%.0f\t%.3f\t%.9f\t%d\n", k, compression, q, estimate - actual, dist.byteSize());
                    out.flush();
                }
            }
        }
    } finally {
        out.close();
        new File("error-scaling.tsv").delete();
    }
}

Source File: TDigestTest.java From streaminer with Apache License 2.0

5 votes

@Test
public void compareToQDigest() {
    Random rand = RandomUtils.getRandom();

    for (int i = 0; i < repeats(); i++) {
        compare(new Gamma(0.1, 0.1, rand), "gamma", 1L << 48, rand);
        compare(new Uniform(0, 1, rand), "uniform", 1L << 48, rand);
    }
}

Source File: TDigestTest.java From streaminer with Apache License 2.0

5 votes

@Test
    public void testGamma() {
        // this Gamma distribution is very heavily skewed.  The 0.1%-ile is 6.07e-30 while
        // the median is 0.006 and the 99.9th %-ile is 33.6 while the mean is 1.
        // this severe skew means that we have to have positional accuracy that
        // varies by over 11 orders of magnitude.
        Random gen = RandomUtils.getRandom();
        for (int i = 0; i < repeats(); i++) {
            runTest(new Gamma(0.1, 0.1, gen), 100,
//                    new double[]{6.0730483624079e-30, 6.0730483624079e-20, 6.0730483627432e-10, 5.9339110446023e-03,
//                            2.6615455373884e+00, 1.5884778179295e+01, 3.3636770117188e+01},
                    new double[]{0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999},
                    "gamma", true, gen);
        }
    }

Source File: TDigestTest.java From streaminer with Apache License 2.0

5 votes

@Test
public void testUniform() {
    Random gen = RandomUtils.getRandom();
    for (int i = 0; i < repeats(); i++) {
        runTest(new Uniform(0, 1, gen), 100,
                new double[]{0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999},
                "uniform", true, gen);
    }
}

Source File: RatingSGDFactorizer.java From elasticsearch-taste with Apache License 2.0

5 votes

protected void prepareTraining() {
    final RandomWrapper random = RandomUtils.getRandom();
    userVectors = new double[dataModel.getNumUsers()][numFeatures];
    itemVectors = new double[dataModel.getNumItems()][numFeatures];

    final double globalAverage = getAveragePreference();
    for (int userIndex = 0; userIndex < userVectors.length; userIndex++) {
        userVectors[userIndex][0] = globalAverage;
        userVectors[userIndex][USER_BIAS_INDEX] = 0; // will store user bias
        userVectors[userIndex][ITEM_BIAS_INDEX] = 1; // corresponding item feature contains item bias
        for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) {
            userVectors[userIndex][feature] = random.nextGaussian()
                    * randomNoise;
        }
    }
    for (int itemIndex = 0; itemIndex < itemVectors.length; itemIndex++) {
        itemVectors[itemIndex][0] = 1; // corresponding user feature contains global average
        itemVectors[itemIndex][USER_BIAS_INDEX] = 1; // corresponding user feature contains user bias
        itemVectors[itemIndex][ITEM_BIAS_INDEX] = 0; // will store item bias
        for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) {
            itemVectors[itemIndex][feature] = random.nextGaussian()
                    * randomNoise;
        }
    }

    cachePreferences();
    shufflePreferences();
}

Source File: FastIDSet.java From elasticsearch-taste with Apache License 2.0

5 votes

public FastIDSet(final int size, final float loadFactor) {
    Preconditions.checkArgument(size >= 0, "size must be at least 0");
    Preconditions.checkArgument(loadFactor >= 1.0f,
            "loadFactor must be at least 1.0");
    this.loadFactor = loadFactor;
    final int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor);
    Preconditions.checkArgument(size < max, "size must be less than %d",
            max);
    final int hashSize = RandomUtils
            .nextTwinPrime((int) (loadFactor * size));
    keys = new long[hashSize];
    Arrays.fill(keys, NULL);
}

Source File: AbstractDifferenceRecommenderEvaluator.java From elasticsearch-taste with Apache License 2.0

4 votes

protected AbstractDifferenceRecommenderEvaluator() {
    random = RandomUtils.getRandom();
}

Source File: GenericItemSimilarity.java From elasticsearch-taste with Apache License 2.0

4 votes

@Override
public int hashCode() {
    return (int) itemID1 ^ (int) itemID2
            ^ RandomUtils.hashDouble(value);
}

Source File: GenericUserSimilarity.java From elasticsearch-taste with Apache License 2.0

4 votes

@Override
public int hashCode() {
    return (int) userID1 ^ (int) userID2
            ^ RandomUtils.hashDouble(value);
}

Source File: DateSampler.java From log-synth with Apache License 2.0

4 votes

@SuppressWarnings("UnusedDeclaration")
public void setEnd(String end) throws ParseException {
    this.end = df.parse(end).getTime();
    base = new Uniform(0, this.end - this.start, RandomUtils.getRandom());
}

Source File: DateSampler.java From log-synth with Apache License 2.0

4 votes

@SuppressWarnings("UnusedDeclaration")
public void setStart(String start) throws ParseException {
    this.start = df.parse(start).getTime();
    base = new Uniform(0, this.end - this.start, RandomUtils.getRandom());
}

Source File: ArrivalSampler.java From log-synth with Apache License 2.0

4 votes

@Override
public void setSeed(long seed) {
    base = RandomUtils.getRandom(seed);
}

Source File: ArrivalSampler.java From log-synth with Apache License 2.0

4 votes

public ArrivalSampler() {
    base = RandomUtils.getRandom();
}

Source File: ZipSampler.java From log-synth with Apache License 2.0

4 votes

@Override
@SuppressWarnings("unused")
public void setSeed(long seed) {
    rand = RandomUtils.getRandom(seed);
}

Source File: IntegerSampler.java From log-synth with Apache License 2.0

4 votes

@Override
public void setSeed(long seed) {
    base = RandomUtils.getRandom(seed);
}

Source File: IntegerSampler.java From log-synth with Apache License 2.0

4 votes

@SuppressWarnings("WeakerAccess")
public IntegerSampler() {
    base = RandomUtils.getRandom();
}

Source File: TDigestTest.java From t-digest with Apache License 2.0

4 votes

@BeforeClass
public static void freezeSeed() {
    RandomUtils.useTestSeed();
}

Source File: AVLGroupTreeTest.java From t-digest with Apache License 2.0

4 votes

@Before
public void setUp() {
    RandomUtils.useTestSeed();
}

Source File: MergingDigestTest.java From t-digest with Apache License 2.0

4 votes

@Before
public void testSetUp() {
    RandomUtils.useTestSeed();
}

org.apache.mahout.common.RandomUtils Java Examples