Java Code Examples for org.apache.commons.math3.random.Well1024a#nextLong()

The following examples show how to use org.apache.commons.math3.random.Well1024a#nextLong() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: LibMatrixDatagen.java From systemds with Apache License 2.0

5 votes

private static long[] generateSeedsForCP(Well1024a bigrand, int nrb, int ncb)
{
	int numBlocks = nrb * ncb;
	long[] seeds = new long[numBlocks];
	for( int l = 0; l < numBlocks; l++ )
		seeds[l] = bigrand.nextLong();
	
	return seeds;
}

Example 2

Source File: LibMatrixDatagen.java From systemds with Apache License 2.0

5 votes

private static long[] generateSeedsForCP(Well1024a bigrand, int nrb, int ncb)
{
	int numBlocks = nrb * ncb;
	long[] seeds = new long[numBlocks];
	for( int l = 0; l < numBlocks; l++ )
		seeds[l] = bigrand.nextLong();
	
	return seeds;
}

Example 3

Source File: RandSPInstruction.java From systemds with Apache License 2.0

4 votes

/**
 * Helper function to construct a sample.
 * 
 * @param sec spark execution context
 */
private void generateSample(SparkExecutionContext sec) {
	long lrows = sec.getScalarInput(rows).getLongValue();
	if ( maxValue < lrows && !replace )
		throw new DMLRuntimeException("Sample (size=" + rows + ") larger than population (size=" + maxValue + ") can only be generated with replacement.");

	if( LOG.isTraceEnabled() )
		LOG.trace("Process RandSPInstruction sample with range="+ maxValue +", size="+ lrows +", replace="+ replace + ", seed=" + seed);
	
	// sampling rate that guarantees a sample of size >= sampleSizeLowerBound 99.99% of the time.
	double fraction = SamplingUtils.computeFractionForSampleSize((int)lrows, UtilFunctions.toLong(maxValue), replace);
	
	Well1024a bigrand = LibMatrixDatagen.setupSeedsForRand(seed);

	// divide the population range across numPartitions by creating SampleTasks
	double hdfsBlockSize = InfrastructureAnalyzer.getHDFSBlockSize();
	long outputSize = MatrixBlock.estimateSizeDenseInMemory(lrows,1);
	int numPartitions = (int) Math.ceil(outputSize/hdfsBlockSize);
	long partitionSize = (long) Math.ceil(maxValue /numPartitions);

	ArrayList<SampleTask> offsets = new ArrayList<>();
	long st = 1;
	while (st <= maxValue) {
		SampleTask s = new SampleTask();
		s.range_start = st;
		s.seed = bigrand.nextLong();
		offsets.add(s);
		st = st + partitionSize;
	}
	JavaRDD<SampleTask> offsetRDD = sec.getSparkContext().parallelize(offsets, numPartitions);
	
	// Construct the sample in a distributed manner
	JavaRDD<Double> rdd = offsetRDD.flatMap( (new GenerateSampleBlock(replace, fraction, (long) maxValue, partitionSize)) );
	
	// Randomize the sampled elements
	JavaRDD<Double> randomizedRDD = rdd.mapToPair(new AttachRandom()).sortByKey().values();
	
	// Trim the sampled list to required size & attach matrix indexes to randomized elements
	JavaPairRDD<MatrixIndexes, MatrixCell> miRDD = randomizedRDD
			.zipWithIndex()
			.filter( new TrimSample(lrows) )
			.mapToPair( new Double2MatrixCell() );
	
	DataCharacteristics mcOut = new MatrixCharacteristics(lrows, 1, blocksize, lrows);
	
	// Construct BinaryBlock representation
	JavaPairRDD<MatrixIndexes, MatrixBlock> mbRDD = 
			RDDConverterUtils.binaryCellToBinaryBlock(sec.getSparkContext(), miRDD, mcOut, true);
	
	sec.getDataCharacteristics(output.getName()).setNonZeros(lrows);
	sec.setRDDHandleForVariable(output.getName(), mbRDD);
}

Example 4

Source File: RandSPInstruction.java From systemds with Apache License 2.0

4 votes

/**
 * Helper function to construct a sample.
 * 
 * @param sec spark execution context
 */
private void generateSample(SparkExecutionContext sec) {
	long lrows = sec.getScalarInput(rows).getLongValue();
	if ( maxValue < lrows && !replace )
		throw new DMLRuntimeException("Sample (size=" + rows + ") larger than population (size=" + maxValue + ") can only be generated with replacement.");

	if( LOG.isTraceEnabled() )
		LOG.trace("Process RandSPInstruction sample with range="+ maxValue +", size="+ lrows +", replace="+ replace + ", seed=" + seed);
	
	// sampling rate that guarantees a sample of size >= sampleSizeLowerBound 99.99% of the time.
	double fraction = SamplingUtils.computeFractionForSampleSize((int)lrows, UtilFunctions.toLong(maxValue), replace);
	
	Well1024a bigrand = LibMatrixDatagen.setupSeedsForRand(seed);

	// divide the population range across numPartitions by creating SampleTasks
	double hdfsBlockSize = InfrastructureAnalyzer.getHDFSBlockSize();
	long outputSize = MatrixBlock.estimateSizeDenseInMemory(lrows,1);
	int numPartitions = (int) Math.ceil(outputSize/hdfsBlockSize);
	long partitionSize = (long) Math.ceil(maxValue /numPartitions);

	ArrayList<SampleTask> offsets = new ArrayList<>();
	long st = 1;
	while (st <= maxValue) {
		SampleTask s = new SampleTask();
		s.range_start = st;
		s.seed = bigrand.nextLong();
		offsets.add(s);
		st = st + partitionSize;
	}
	JavaRDD<SampleTask> offsetRDD = sec.getSparkContext().parallelize(offsets, numPartitions);
	
	// Construct the sample in a distributed manner
	JavaRDD<Double> rdd = offsetRDD.flatMap( (new GenerateSampleBlock(replace, fraction, (long) maxValue, partitionSize)) );
	
	// Randomize the sampled elements
	JavaRDD<Double> randomizedRDD = rdd.mapToPair(new AttachRandom()).sortByKey().values();
	
	// Trim the sampled list to required size & attach matrix indexes to randomized elements
	JavaPairRDD<MatrixIndexes, MatrixCell> miRDD = randomizedRDD
			.zipWithIndex()
			.filter( new TrimSample(lrows) )
			.mapToPair( new Double2MatrixCell() );
	
	DataCharacteristics mcOut = new MatrixCharacteristics(lrows, 1, blocksize, lrows);
	
	// Construct BinaryBlock representation
	JavaPairRDD<MatrixIndexes, MatrixBlock> mbRDD = 
			RDDConverterUtils.binaryCellToBinaryBlock(sec.getSparkContext(), miRDD, mcOut, true);
	
	sec.getDataCharacteristics(output.getName()).setNonZeros(lrows);
	sec.setRDDHandleForVariable(output.getName(), mbRDD);
}