Java Code Examples for org.apache.mahout.common.RandomUtils#getRandom()
The following examples show how to use
org.apache.mahout.common.RandomUtils#getRandom() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TDigestTest.java From streaminer with Apache License 2.0 | 6 votes |
@Test() public void testSizeControl() throws IOException { // very slow running data generator. Don't want to run this normally. To run slow tests use // mvn test -DrunSlowTests=true assumeTrue(Boolean.parseBoolean(System.getProperty("runSlowTests"))); Random gen = RandomUtils.getRandom(); PrintWriter out = new PrintWriter(new FileOutputStream("scaling.tsv")); out.printf("k\tsamples\tcompression\tsize1\tsize2\n"); for (int k = 0; k < 20; k++) { for (int size : new int[]{10, 100, 1000, 10000}) { for (double compression : new double[]{2, 5, 10, 20, 50, 100, 200, 500, 1000}) { TDigest dist = new TDigest(compression, gen); for (int i = 0; i < size * 1000; i++) { dist.add(gen.nextDouble()); } out.printf("%d\t%d\t%.0f\t%d\t%d\n", k, size, compression, dist.smallByteSize(), dist.byteSize()); out.flush(); } } } out.printf("\n"); out.close(); new File("scaling.tsv").delete(); }
Example 2
Source File: ALSWRFactorizer.java From elasticsearch-taste with Apache License 2.0 | 6 votes |
Features(final ALSWRFactorizer factorizer) { dataModel = factorizer.dataModel; numFeatures = factorizer.numFeatures; final Random random = RandomUtils.getRandom(); M = new double[dataModel.getNumItems()][numFeatures]; final LongPrimitiveIterator itemIDsIterator = dataModel .getItemIDs(); while (itemIDsIterator.hasNext()) { final long itemID = itemIDsIterator.nextLong(); final int itemIDIndex = factorizer.itemIndex(itemID); M[itemIDIndex][0] = averateRating(itemID); for (int feature = 1; feature < numFeatures; feature++) { M[itemIDIndex][feature] = random.nextDouble() * 0.1; } } U = new double[dataModel.getNumUsers()][numFeatures]; }
Example 3
Source File: ParallelSGDFactorizer.java From elasticsearch-taste with Apache License 2.0 | 6 votes |
protected void initialize() { final RandomWrapper random = RandomUtils.getRandom(); userVectors = new double[dataModel.getNumUsers()][rank]; itemVectors = new double[dataModel.getNumItems()][rank]; final double globalAverage = getAveragePreference(); for (int userIndex = 0; userIndex < userVectors.length; userIndex++) { userVectors[userIndex][0] = globalAverage; userVectors[userIndex][USER_BIAS_INDEX] = 0; // will store user bias userVectors[userIndex][ITEM_BIAS_INDEX] = 1; // corresponding item feature contains item bias for (int feature = FEATURE_OFFSET; feature < rank; feature++) { userVectors[userIndex][feature] = random.nextGaussian() * NOISE; } } for (int itemIndex = 0; itemIndex < itemVectors.length; itemIndex++) { itemVectors[itemIndex][0] = 1; // corresponding user feature contains global average itemVectors[itemIndex][USER_BIAS_INDEX] = 1; // corresponding user feature contains user bias itemVectors[itemIndex][ITEM_BIAS_INDEX] = 0; // will store item bias for (int feature = FEATURE_OFFSET; feature < rank; feature++) { itemVectors[itemIndex][feature] = random.nextGaussian() * NOISE; } } }
Example 4
Source File: TDigestTest.java From streaminer with Apache License 2.0 | 6 votes |
@Test public void testNarrowNormal() { // this mixture of a uniform and normal distribution has a very narrow peak which is centered // near the median. Our system should be scale invariant and work well regardless. final Random gen = RandomUtils.getRandom(); AbstractContinousDistribution mix = new AbstractContinousDistribution() { AbstractContinousDistribution normal = new Normal(0, 1e-5, gen); AbstractContinousDistribution uniform = new Uniform(-1, 1, gen); @Override public double nextDouble() { double x; if (gen.nextDouble() < 0.5) { x = uniform.nextDouble(); } else { x = normal.nextDouble(); } return x; } }; for (int i = 0; i < repeats(); i++) { runTest(mix, 100, new double[]{0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99, 0.999}, "mixture", false, gen); } }
Example 5
Source File: TDigestTest.java From streaminer with Apache License 2.0 | 5 votes |
@Test public void testScaling() throws FileNotFoundException { Random gen = RandomUtils.getRandom(); PrintWriter out = new PrintWriter(new FileOutputStream("error-scaling.tsv")); try { out.printf("pass\tcompression\tq\terror\tsize\n"); // change to 50 passes for better graphs int n = repeats() * repeats(); for (int k = 0; k < n; k++) { List<Double> data = Lists.newArrayList(); for (int i = 0; i < 100000; i++) { data.add(gen.nextDouble()); } Collections.sort(data); for (double compression : new double[]{2, 5, 10, 20, 50, 100, 200, 500, 1000}) { TDigest dist = new TDigest(compression, gen); for (Double x : data) { dist.add(x); } dist.compress(); for (double q : new double[]{0.001, 0.01, 0.1, 0.5}) { double estimate = dist.quantile(q); double actual = data.get((int) (q * data.size())); out.printf("%d\t%.0f\t%.3f\t%.9f\t%d\n", k, compression, q, estimate - actual, dist.byteSize()); out.flush(); } } } } finally { out.close(); new File("error-scaling.tsv").delete(); } }
Example 6
Source File: TDigestTest.java From streaminer with Apache License 2.0 | 5 votes |
@Test public void compareToQDigest() { Random rand = RandomUtils.getRandom(); for (int i = 0; i < repeats(); i++) { compare(new Gamma(0.1, 0.1, rand), "gamma", 1L << 48, rand); compare(new Uniform(0, 1, rand), "uniform", 1L << 48, rand); } }
Example 7
Source File: UpperQuantileTest.java From log-synth with Apache License 2.0 | 5 votes |
@Before public void generate() { RandomUtils.useTestSeed(); uq = new UpperQuantile(101); data = new double[1001]; Random gen = RandomUtils.getRandom(); for (int i = 0; i < 1001; i++) { double x = gen.nextDouble(); data[i] = x; uq.add(x); } Arrays.sort(data); }
Example 8
Source File: User.java From log-synth with Apache License 2.0 | 5 votes |
public User(InetAddress address, String geoCode, TermGenerator terms, double period) { this.terms = terms; this.geoCode = geoCode; this.address = address; this.rate = period; this.sessionTimeDistribution = new Exponential(period, RandomUtils.getRandom()); id = idCounter.addAndGet(1); nextSession = sessionTimeDistribution.nextDouble(); }
Example 9
Source File: RatingSGDFactorizer.java From elasticsearch-taste with Apache License 2.0 | 5 votes |
protected void shufflePreferences() { final RandomWrapper random = RandomUtils.getRandom(); /* Durstenfeld shuffle */ for (int currentPos = cachedUserIDs.length - 1; currentPos > 0; currentPos--) { final int swapPos = random.nextInt(currentPos + 1); swapCachedPreferences(currentPos, swapPos); } }
Example 10
Source File: TDigestTest.java From streaminer with Apache License 2.0 | 5 votes |
@Test public void testUniform() { Random gen = RandomUtils.getRandom(); for (int i = 0; i < repeats(); i++) { runTest(new Uniform(0, 1, gen), 100, new double[]{0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999}, "uniform", true, gen); } }
Example 11
Source File: DateSampler.java From log-synth with Apache License 2.0 | 4 votes |
@SuppressWarnings("UnusedDeclaration") public void setEnd(String end) throws ParseException { this.end = df.parse(end).getTime(); base = new Uniform(0, this.end - this.start, RandomUtils.getRandom()); }
Example 12
Source File: LogisticTest.java From ml-models with Apache License 2.0 | 4 votes |
@Test public void makeModel() throws Exception { String csvFile = "/Users/laurenshin/documents/linreg-graph-analytics/src/test/resources/iris-full.csv"; String line = ""; String csvSplitBy = ","; List<Map<String,Double>> data = new ArrayList<>(); List<String> target = new ArrayList<>(); List<Integer> order = new ArrayList<>(); /*Map<String, Integer> stringToInt = new HashMap<>(); Map<Integer, String> intToString = new HashMap<>(); stringToInt.put("Iris-setosa", 0); stringToInt.put("Iris-versicolor", 1); stringToInt.put("Iris-virginica", 2); intToString.put(0, "Iris-setosa"); intToString.put(1, "Iris-versicolor"); intToString.put(2, "Iris-virginica");*/ try (BufferedReader br = new BufferedReader(new FileReader(csvFile))){ br.readLine(); //skip headers int i = 0; while ((line = br.readLine()) != null) { String[] flower = line.split(csvSplitBy); Map<String, Double> v = new HashMap<>(4); v.put("sepallength", Double.parseDouble(flower[1])); //sepal length v.put("sepalwidth", Double.parseDouble(flower[2])); //sepal width v.put("petallength", Double.parseDouble(flower[3])); //petal length v.put("petalwidth", Double.parseDouble(flower[4])); //petal width data.add(v); target.add(flower[5]); //class order.add(i++); } } catch (IOException e) { e.printStackTrace(); Assert.fail("unable to read csv file for test data"); } RandomUtils.useTestSeed(); Random random = RandomUtils.getRandom(); Collections.shuffle(order, random); List<Integer> train = order.subList(0, 100); List<Integer> test = order.subList(100, 150); db.execute("CALL regression.logistic.create('model', ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], " + "{sepallength:'float', sepalwidth:'float', petallength:'float', petalwidth:'float'}, {prior:'L2'})").close(); for (int pass = 0; pass < 30; pass++) { Collections.shuffle(train, random); for (int j : train) { db.execute("CALL regression.logistic.add('model', {output}, {inputs})", map("inputs", data.get(j), "output", target.get(j))); } } int successes = 0; int failures = 0; for (int k : test) { String t; String guess = ((String) db.execute("RETURN regression.logistic.predict('model', {inputs}) as prediction", map("inputs", data.get(k))).next().get("prediction")); if (guess.equals(target.get(k))) { t = "SUCCESS!"; successes++; } else { t = "FAIL!"; failures++; } System.out.format("Expected: %s, Actual: %s %s%n", target.get(k), guess, t); } System.out.format("SUCCESSES: %d%n", successes); System.out.format("FAILURES: %d%n", failures); db.execute("CALL regression.logistic.delete('model')"); }
Example 13
Source File: ArrivalSampler.java From log-synth with Apache License 2.0 | 4 votes |
@Override public void setSeed(long seed) { base = RandomUtils.getRandom(seed); }
Example 14
Source File: ZipSampler.java From log-synth with Apache License 2.0 | 4 votes |
@Override @SuppressWarnings("unused") public void setSeed(long seed) { rand = RandomUtils.getRandom(seed); }
Example 15
Source File: SVDPlusPlusFactorizer.java From elasticsearch-taste with Apache License 2.0 | 4 votes |
@Override protected void prepareTraining() { super.prepareTraining(); final Random random = RandomUtils.getRandom(); p = new double[dataModel.getNumUsers()][numFeatures]; for (int i = 0; i < p.length; i++) { for (int feature = 0; feature < FEATURE_OFFSET; feature++) { p[i][feature] = 0; } for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) { p[i][feature] = random.nextGaussian() * randomNoise; } } y = new double[dataModel.getNumItems()][numFeatures]; for (int i = 0; i < y.length; i++) { for (int feature = 0; feature < FEATURE_OFFSET; feature++) { y[i][feature] = 0; } for (int feature = FEATURE_OFFSET; feature < numFeatures; feature++) { y[i][feature] = random.nextGaussian() * randomNoise; } } /* get internal item IDs which we will need several times */ itemsByUser = Maps.newHashMap(); final LongPrimitiveIterator userIDs = dataModel.getUserIDs(); while (userIDs.hasNext()) { final long userId = userIDs.nextLong(); final int userIndex = userIndex(userId); final FastIDSet itemIDsFromUser = dataModel .getItemIDsFromUser(userId); final List<Integer> itemIndexes = Lists .newArrayListWithCapacity(itemIDsFromUser.size()); itemsByUser.put(userIndex, itemIndexes); for (final long itemID2 : itemIDsFromUser) { final int i2 = itemIndex(itemID2); itemIndexes.add(i2); } } }
Example 16
Source File: DateSampler.java From log-synth with Apache License 2.0 | 4 votes |
@SuppressWarnings("UnusedDeclaration") public void setStart(String start) throws ParseException { this.start = df.parse(start).getTime(); base = new Uniform(0, this.end - this.start, RandomUtils.getRandom()); }
Example 17
Source File: SamplingLongPrimitiveIterator.java From elasticsearch-taste with Apache License 2.0 | 4 votes |
public SamplingLongPrimitiveIterator(final LongPrimitiveIterator delegate, final double samplingRate) { this(RandomUtils.getRandom(), delegate, samplingRate); }
Example 18
Source File: IntegerSampler.java From log-synth with Apache License 2.0 | 4 votes |
@Override public void setSeed(long seed) { base = RandomUtils.getRandom(seed); }
Example 19
Source File: GenericRecommenderIRStatsEvaluator.java From elasticsearch-taste with Apache License 2.0 | 4 votes |
public GenericRecommenderIRStatsEvaluator( final RelevantItemsDataSplitter dataSplitter) { Preconditions.checkNotNull(dataSplitter); random = RandomUtils.getRandom(); this.dataSplitter = dataSplitter; }
Example 20
Source File: AbstractDifferenceEvaluator.java From elasticsearch-taste with Apache License 2.0 | 4 votes |
protected AbstractDifferenceEvaluator() { random = RandomUtils.getRandom(); maxPreference = Float.NaN; minPreference = Float.NaN; }