org.apache.commons.math3.distribution.ZipfDistribution Java Examples
The following examples show how to use
org.apache.commons.math3.distribution.ZipfDistribution.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: RandomDataGeneratorTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void testNextZipf() { ZipfDistributionTest testInstance = new ZipfDistributionTest(); int[] densityPoints = testInstance.makeDensityTestPoints(); double[] densityValues = testInstance.makeDensityTestValues(); int sampleSize = 1000; int length = TestUtils.eliminateZeroMassPoints(densityPoints, densityValues); ZipfDistribution distribution = (ZipfDistribution) testInstance.makeDistribution(); double[] expectedCounts = new double[length]; long[] observedCounts = new long[length]; for (int i = 0; i < length; i++) { expectedCounts[i] = sampleSize * densityValues[i]; } randomData.reSeed(1000); for (int i = 0; i < sampleSize; i++) { int value = randomData.nextZipf(distribution.getNumberOfElements(), distribution.getExponent()); for (int j = 0; j < length; j++) { if (value == densityPoints[j]) { observedCounts[j]++; } } } TestUtils.assertChiSquareAccept(densityPoints, expectedCounts, observedCounts, .001); }
Example #2
Source File: RedisHashLoadGenerator.java From yb-sample-apps with Apache License 2.0 | 6 votes |
public RedisHashLoadGenerator(String prefix, int numKeys, int numSubKeys, double keyZipfExp, double subkeyZipfExp, int numSubkeysPerWrite, int numSubkeysPerRead) { this.prefix = prefix; this.numKeys = numKeys; this.keyFreqDist = (keyZipfExp > 0 ? new ZipfDistribution(numKeys, keyZipfExp) : new UniformIntegerDistribution(1, numKeys)); printInfoAboutZifpian(numKeys, keyZipfExp, keyZipfExpThreshold); this.numSubKeys = numSubKeys; this.subkeyFreqDist = (subkeyZipfExp > 0 ? new ZipfDistribution(numSubKeys, subkeyZipfExp) : new UniformIntegerDistribution(1, numSubKeys)); printInfoAboutZifpian(numSubKeys, subkeyZipfExp, subkeyZipfExpThreshold); this.numSubkeysPerRead = numSubkeysPerRead; this.numSubkeysPerWrite = numSubkeysPerWrite; int numWrites = numKeys * (int)Math.ceil((double) numSubKeys / numSubkeysPerWrite); // Generates 0 ... (numWrites - 1). this.loadGenerator = new SimpleLoadGenerator(0, numWrites, -1); }
Example #3
Source File: RandomDataGeneratorTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void testNextZipf() { ZipfDistributionTest testInstance = new ZipfDistributionTest(); int[] densityPoints = testInstance.makeDensityTestPoints(); double[] densityValues = testInstance.makeDensityTestValues(); int sampleSize = 1000; int length = TestUtils.eliminateZeroMassPoints(densityPoints, densityValues); ZipfDistribution distribution = (ZipfDistribution) testInstance.makeDistribution(); double[] expectedCounts = new double[length]; long[] observedCounts = new long[length]; for (int i = 0; i < length; i++) { expectedCounts[i] = sampleSize * densityValues[i]; } randomData.reSeed(1000); for (int i = 0; i < sampleSize; i++) { int value = randomData.nextZipf(distribution.getNumberOfElements(), distribution.getExponent()); for (int j = 0; j < length; j++) { if (value == densityPoints[j]) { observedCounts[j]++; } } } TestUtils.assertChiSquareAccept(densityPoints, expectedCounts, observedCounts, .001); }
Example #4
Source File: RandomDataGeneratorTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void testNextZipf() { ZipfDistributionTest testInstance = new ZipfDistributionTest(); int[] densityPoints = testInstance.makeDensityTestPoints(); double[] densityValues = testInstance.makeDensityTestValues(); int sampleSize = 1000; int length = TestUtils.eliminateZeroMassPoints(densityPoints, densityValues); ZipfDistribution distribution = (ZipfDistribution) testInstance.makeDistribution(); double[] expectedCounts = new double[length]; long[] observedCounts = new long[length]; for (int i = 0; i < length; i++) { expectedCounts[i] = sampleSize * densityValues[i]; } randomData.reSeed(1000); for (int i = 0; i < sampleSize; i++) { int value = randomData.nextZipf(distribution.getNumberOfElements(), distribution.getExponent()); for (int j = 0; j < length; j++) { if (value == densityPoints[j]) { observedCounts[j]++; } } } TestUtils.assertChiSquareAccept(densityPoints, expectedCounts, observedCounts, .001); }
Example #5
Source File: RandomDataTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void testNextZipf() throws Exception { ZipfDistributionTest testInstance = new ZipfDistributionTest(); int[] densityPoints = testInstance.makeDensityTestPoints(); double[] densityValues = testInstance.makeDensityTestValues(); int sampleSize = 1000; int length = TestUtils.eliminateZeroMassPoints(densityPoints, densityValues); ZipfDistribution distribution = (ZipfDistribution) testInstance.makeDistribution(); double[] expectedCounts = new double[length]; long[] observedCounts = new long[length]; for (int i = 0; i < length; i++) { expectedCounts[i] = sampleSize * densityValues[i]; } randomData.reSeed(1000); for (int i = 0; i < sampleSize; i++) { int value = randomData.nextZipf(distribution.getNumberOfElements(), distribution.getExponent()); for (int j = 0; j < length; j++) { if (value == densityPoints[j]) { observedCounts[j]++; } } } TestUtils.assertChiSquareAccept(densityPoints, expectedCounts, observedCounts, .001); }
Example #6
Source File: RandomDataGeneratorTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void testNextZipf() { ZipfDistributionTest testInstance = new ZipfDistributionTest(); int[] densityPoints = testInstance.makeDensityTestPoints(); double[] densityValues = testInstance.makeDensityTestValues(); int sampleSize = 1000; int length = TestUtils.eliminateZeroMassPoints(densityPoints, densityValues); ZipfDistribution distribution = (ZipfDistribution) testInstance.makeDistribution(); double[] expectedCounts = new double[length]; long[] observedCounts = new long[length]; for (int i = 0; i < length; i++) { expectedCounts[i] = sampleSize * densityValues[i]; } randomData.reSeed(1000); for (int i = 0; i < sampleSize; i++) { int value = randomData.nextZipf(distribution.getNumberOfElements(), distribution.getExponent()); for (int j = 0; j < length; j++) { if (value == densityPoints[j]) { observedCounts[j]++; } } } TestUtils.assertChiSquareAccept(densityPoints, expectedCounts, observedCounts, .001); }
Example #7
Source File: RandomDataTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void testNextZipf() { ZipfDistributionTest testInstance = new ZipfDistributionTest(); int[] densityPoints = testInstance.makeDensityTestPoints(); double[] densityValues = testInstance.makeDensityTestValues(); int sampleSize = 1000; int length = TestUtils.eliminateZeroMassPoints(densityPoints, densityValues); ZipfDistribution distribution = (ZipfDistribution) testInstance.makeDistribution(); double[] expectedCounts = new double[length]; long[] observedCounts = new long[length]; for (int i = 0; i < length; i++) { expectedCounts[i] = sampleSize * densityValues[i]; } randomData.reSeed(1000); for (int i = 0; i < sampleSize; i++) { int value = randomData.nextZipf(distribution.getNumberOfElements(), distribution.getExponent()); for (int j = 0; j < length; j++) { if (value == densityPoints[j]) { observedCounts[j]++; } } } TestUtils.assertChiSquareAccept(densityPoints, expectedCounts, observedCounts, .001); }
Example #8
Source File: RandomDataTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void testNextZipf() { ZipfDistributionTest testInstance = new ZipfDistributionTest(); int[] densityPoints = testInstance.makeDensityTestPoints(); double[] densityValues = testInstance.makeDensityTestValues(); int sampleSize = 1000; int length = TestUtils.eliminateZeroMassPoints(densityPoints, densityValues); ZipfDistribution distribution = (ZipfDistribution) testInstance.makeDistribution(); double[] expectedCounts = new double[length]; long[] observedCounts = new long[length]; for (int i = 0; i < length; i++) { expectedCounts[i] = sampleSize * densityValues[i]; } randomData.reSeed(1000); for (int i = 0; i < sampleSize; i++) { int value = randomData.nextZipf(distribution.getNumberOfElements(), distribution.getExponent()); for (int j = 0; j < length; j++) { if (value == densityPoints[j]) { observedCounts[j]++; } } } TestUtils.assertChiSquareAccept(densityPoints, expectedCounts, observedCounts, .001); }
Example #9
Source File: RandomDataGeneratorTest.java From astor with GNU General Public License v2.0 | 6 votes |
@Test public void testNextZipf() { ZipfDistributionTest testInstance = new ZipfDistributionTest(); int[] densityPoints = testInstance.makeDensityTestPoints(); double[] densityValues = testInstance.makeDensityTestValues(); int sampleSize = 1000; int length = TestUtils.eliminateZeroMassPoints(densityPoints, densityValues); ZipfDistribution distribution = (ZipfDistribution) testInstance.makeDistribution(); double[] expectedCounts = new double[length]; long[] observedCounts = new long[length]; for (int i = 0; i < length; i++) { expectedCounts[i] = sampleSize * densityValues[i]; } randomData.reSeed(1000); for (int i = 0; i < sampleSize; i++) { int value = randomData.nextZipf(distribution.getNumberOfElements(), distribution.getExponent()); for (int j = 0; j < length; j++) { if (value == densityPoints[j]) { observedCounts[j]++; } } } TestUtils.assertChiSquareAccept(densityPoints, expectedCounts, observedCounts, .001); }
Example #10
Source File: ContentService.java From EdgeSim with MIT License | 5 votes |
/** * Initialize the popularity according to power law distribution */ public static List<SingleLocalHobby> initialPopularity(List<SingleLocalHobby> noPopularityMediaList){ ZipfDistribution zd = new ZipfDistribution(Parameter.ZIPF_SAMPLE, Parameter.ZIPF_CONEFFICIENT); int [] popularity = zd.sample(noPopularityMediaList.size()); for(int i = 0 ; i<noPopularityMediaList.size();i++){ noPopularityMediaList.get(i).getSingleContent().setPopularity(popularity[i]); } return sortByInitialPopularity(noPopularityMediaList); }
Example #11
Source File: TopNCounterTest.java From kylin with Apache License 2.0 | 5 votes |
protected String prepareTestDate() throws IOException { String[] allKeys = new String[KEY_SPACE]; for (int i = 0; i < KEY_SPACE; i++) { allKeys[i] = RandomStringUtils.randomAlphabetic(10); } outputMsg("Start to create test random data..."); long startTime = System.currentTimeMillis(); ZipfDistribution zipf = new ZipfDistribution(KEY_SPACE, 0.5); int keyIndex; File tempFile = File.createTempFile("ZipfDistribution", ".txt"); if (tempFile.exists()) FileUtils.forceDelete(tempFile); Writer fw = new OutputStreamWriter(new FileOutputStream(tempFile), StandardCharsets.UTF_8); try { for (int i = 0; i < TOTAL_RECORDS; i++) { keyIndex = zipf.sample() - 1; fw.write(allKeys[keyIndex]); fw.write('\n'); } } finally { if (fw != null) fw.close(); } outputMsg("Create test data takes : " + (System.currentTimeMillis() - startTime) / 1000 + " seconds."); outputMsg("Test data in : " + tempFile.getAbsolutePath()); return tempFile.getAbsolutePath(); }
Example #12
Source File: SyntheticBoundedSourceTest.java From beam with Apache License 2.0 | 5 votes |
@Before public void setUp() { testSourceOptions.splitPointFrequencyRecords = 1; testSourceOptions.numRecords = 10; testSourceOptions.keySizeBytes = 10; testSourceOptions.valueSizeBytes = 20; testSourceOptions.numHotKeys = 3; testSourceOptions.hotKeyFraction = 0.3; testSourceOptions.setSeed(123456); testSourceOptions.bundleSizeDistribution = fromIntegerDistribution(new ZipfDistribution(100, 2.5)); testSourceOptions.forceNumInitialBundles = null; }
Example #13
Source File: ZipFDistributionEvaluator.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Object doWork(Object first, Object second) throws IOException{ if(null == first){ throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the first value",toExpression(constructingFactory))); } if(null == second){ throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the second value",toExpression(constructingFactory))); } Number size = (Number)first; Number exp = (Number)second; return new ZipfDistribution(size.intValue(), exp.doubleValue()); }
Example #14
Source File: RedisHashLoadGenerator.java From yb-sample-apps with Apache License 2.0 | 5 votes |
private static void printInfoAboutZifpian(int numElements, double zifpExp, double warnThreshold) { if (zifpExp <= 0) { return; } if (zifpExp >= warnThreshold) { LOG.warn("Zipf exponent of " + zifpExp + " may cause the probabilities to decay too fast."); } LOG.info("Printing distribution for Zifpian n = " + numElements + " exp = " + zifpExp); ZipfDistribution zipf = new ZipfDistribution(numElements, zifpExp); int step = Math.max(1, numElements / 10); for (int i = step; i <= numElements; i += step) { LOG.info("p[ x <= " + i + "] = " + zipf.cumulativeProbability(i)); } }
Example #15
Source File: TopNCounterTest.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
protected String prepareTestDate() throws IOException { String[] allKeys = new String[KEY_SPACE]; for (int i = 0; i < KEY_SPACE; i++) { allKeys[i] = RandomStringUtils.randomAlphabetic(10); } outputMsg("Start to create test random data..."); long startTime = System.currentTimeMillis(); ZipfDistribution zipf = new ZipfDistribution(KEY_SPACE, 0.5); int keyIndex; File tempFile = File.createTempFile("ZipfDistribution", ".txt"); if (tempFile.exists()) FileUtils.forceDelete(tempFile); Writer fw = new OutputStreamWriter(new FileOutputStream(tempFile), StandardCharsets.UTF_8); try { for (int i = 0; i < TOTAL_RECORDS; i++) { keyIndex = zipf.sample() - 1; fw.write(allKeys[keyIndex]); fw.write('\n'); } } finally { if (fw != null) fw.close(); } outputMsg("Create test data takes : " + (System.currentTimeMillis() - startTime) / 1000 + " seconds."); outputMsg("Test data in : " + tempFile.getAbsolutePath()); return tempFile.getAbsolutePath(); }
Example #16
Source File: RedisHashPipelined.java From yb-sample-apps with Apache License 2.0 | 4 votes |
public RedisHashPipelined() { int kMinValueSize = 10; // Give enough room for the checksum. int kMaxValueSize = appConfig.maxValueSize; if (appConfig.valueSizeZipfExponent > 0) { int minBits = log2ceil(kMinValueSize); int maxBits = log2ceil(kMaxValueSize); AbstractIntegerDistribution valueSizeDist = new ZipfDistribution( maxBits - minBits + 1, appConfig.valueSizeZipfExponent); // Get (1 + numSubKey) value-sizes from the above distribution. // Scale up/down the values such that the expected-mean value is // appConfig.valueSize // Adjust values to make sure they are within [kMinValueSize, // kMaxValueSize] subkeyValueSize = valueSizeDist.sample(appConfig.numSubkeysPerKey + 1); Arrays.sort(subkeyValueSize); // Estimate the expected size of the subkey value size. AbstractIntegerDistribution freqDist = getRedisHashLoadGenerator().getSubkeyDistribution(); double expected_size = 0; for (int i = 0; i < subkeyValueSize.length; i++) { subkeyValueSize[i] = (1 << (subkeyValueSize[i] + minBits - 1)); expected_size += freqDist.probability(i) * subkeyValueSize[i]; } LOG.debug("Expected size for the distribution is " + valueSizeDist.getNumericalMean()); // Update the sizes so that the expected is appConfig.valueSize. for (int i = 0; i < subkeyValueSize.length; i++) { subkeyValueSize[i] = (int)Math.round( subkeyValueSize[i] * appConfig.valueSize / expected_size); // Set the min value size to be at least kMinValueSize. if (subkeyValueSize[i] < kMinValueSize) { LOG.debug("Updating value size for subkey[ " + i + "] from " + subkeyValueSize[i] + " to " + kMinValueSize); subkeyValueSize[i] = kMinValueSize; } if (subkeyValueSize[i] > kMaxValueSize) { LOG.debug("Updating value size for subkey[ " + i + "] from " + subkeyValueSize[i] + " to " + kMaxValueSize); subkeyValueSize[i] = kMaxValueSize; } LOG.info("Value size for subkey[ " + i + "] is " + subkeyValueSize[i]); } } else { subkeyValueSize = new int[appConfig.numSubkeysPerKey + 1]; Arrays.fill(subkeyValueSize, appConfig.valueSize); } subkeyValueBuffers = new byte[subkeyValueSize.length][]; for (int i = 0; i < subkeyValueSize.length; i++) { subkeyValueBuffers[i] = new byte[subkeyValueSize[i]]; } }
Example #17
Source File: RandomDataGenerator.java From astor with GNU General Public License v2.0 | 2 votes |
/** * Generates a random value from the {@link ZipfDistribution Zipf Distribution}. * * @param numberOfElements the number of elements of the ZipfDistribution * @param exponent the exponent of the ZipfDistribution * @return random value sampled from the Zipf(numberOfElements, exponent) distribution * @exception NotStrictlyPositiveException if {@code numberOfElements <= 0} * or {@code exponent <= 0}. */ public int nextZipf(int numberOfElements, double exponent) throws NotStrictlyPositiveException { return new ZipfDistribution(getRan(), numberOfElements, exponent).sample(); }
Example #18
Source File: RandomDataGenerator.java From astor with GNU General Public License v2.0 | 2 votes |
/** * Generates a random value from the {@link ZipfDistribution Zipf Distribution}. * * @param numberOfElements the number of elements of the ZipfDistribution * @param exponent the exponent of the ZipfDistribution * @return random value sampled from the Zipf(numberOfElements, exponent) distribution * @exception NotStrictlyPositiveException if {@code numberOfElements <= 0} * or {@code exponent <= 0}. */ public int nextZipf(int numberOfElements, double exponent) throws NotStrictlyPositiveException { return new ZipfDistribution(getRandomGenerator(), numberOfElements, exponent).sample(); }
Example #19
Source File: RandomDataImpl.java From astor with GNU General Public License v2.0 | 2 votes |
/** * Generates a random value from the {@link ZipfDistribution Zipf Distribution}. * This implementation uses {@link #nextInversionDeviate(IntegerDistribution) inversion} * to generate random values. * * @param numberOfElements the number of elements of the ZipfDistribution * @param exponent the exponent of the ZipfDistribution * @return random value sampled from the Zipf(numberOfElements, exponent) distribution * @since 2.2 */ public int nextZipf(int numberOfElements, double exponent) { return nextInversionDeviate(new ZipfDistribution(numberOfElements, exponent)); }
Example #20
Source File: RandomDataGenerator.java From astor with GNU General Public License v2.0 | 2 votes |
/** * Generates a random value from the {@link ZipfDistribution Zipf Distribution}. * * @param numberOfElements the number of elements of the ZipfDistribution * @param exponent the exponent of the ZipfDistribution * @return random value sampled from the Zipf(numberOfElements, exponent) distribution * @exception NotStrictlyPositiveException if {@code numberOfElements <= 0} * or {@code exponent <= 0}. */ public int nextZipf(int numberOfElements, double exponent) throws NotStrictlyPositiveException { return new ZipfDistribution(getRandomGenerator(), numberOfElements, exponent).sample(); }
Example #21
Source File: RandomDataImpl.java From astor with GNU General Public License v2.0 | 2 votes |
/** * Generates a random value from the {@link ZipfDistribution Zipf Distribution}. * This implementation uses {@link #nextInversionDeviate(IntegerDistribution) inversion} * to generate random values. * * @param numberOfElements the number of elements of the ZipfDistribution * @param exponent the exponent of the ZipfDistribution * @return random value sampled from the Zipf(numberOfElements, exponent) distribution * @since 2.2 */ public int nextZipf(int numberOfElements, double exponent) { return nextInversionDeviate(new ZipfDistribution(numberOfElements, exponent)); }
Example #22
Source File: RandomDataGenerator.java From astor with GNU General Public License v2.0 | 2 votes |
/** * Generates a random value from the {@link ZipfDistribution Zipf Distribution}. * * @param numberOfElements the number of elements of the ZipfDistribution * @param exponent the exponent of the ZipfDistribution * @return random value sampled from the Zipf(numberOfElements, exponent) distribution * @exception NotStrictlyPositiveException if {@code numberOfElements <= 0} * or {@code exponent <= 0}. */ public int nextZipf(int numberOfElements, double exponent) throws NotStrictlyPositiveException { return new ZipfDistribution(getRandomGenerator(), numberOfElements, exponent).sample(); }
Example #23
Source File: RandomDataGenerator.java From astor with GNU General Public License v2.0 | 2 votes |
/** * Generates a random value from the {@link ZipfDistribution Zipf Distribution}. * * @param numberOfElements the number of elements of the ZipfDistribution * @param exponent the exponent of the ZipfDistribution * @return random value sampled from the Zipf(numberOfElements, exponent) distribution * @exception NotStrictlyPositiveException if {@code numberOfElements <= 0} * or {@code exponent <= 0}. */ public int nextZipf(int numberOfElements, double exponent) throws NotStrictlyPositiveException { return new ZipfDistribution(getRandomGenerator(), numberOfElements, exponent).sample(); }
Example #24
Source File: RandomDataGenerator.java From astor with GNU General Public License v2.0 | 2 votes |
/** * Generates a random value from the {@link ZipfDistribution Zipf Distribution}. * * @param numberOfElements the number of elements of the ZipfDistribution * @param exponent the exponent of the ZipfDistribution * @return random value sampled from the Zipf(numberOfElements, exponent) distribution * @exception NotStrictlyPositiveException if {@code numberOfElements <= 0} * or {@code exponent <= 0}. */ public int nextZipf(int numberOfElements, double exponent) throws NotStrictlyPositiveException { return new ZipfDistribution(getRandomGenerator(), numberOfElements, exponent).sample(); }