org.apache.spark.util.random.BernoulliCellSampler Scala Examples
The following examples show how to use org.apache.spark.util.random.BernoulliCellSampler.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: SampleNode.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.local import org.apache.spark.sql.SQLConf import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.util.random.{BernoulliCellSampler, PoissonSampler} case class SampleNode( conf: SQLConf, lowerBound: Double, upperBound: Double, withReplacement: Boolean, seed: Long, child: LocalNode) extends UnaryLocalNode(conf) { override def output: Seq[Attribute] = child.output private[this] var iterator: Iterator[InternalRow] = _ private[this] var currentRow: InternalRow = _ override def open(): Unit = { child.open() val sampler = if (withReplacement) { // Disable gap sampling since the gap sampling method buffers two rows internally, // requiring us to copy the row, which is more expensive than the random number generator. new PoissonSampler[InternalRow](upperBound - lowerBound, useGapSamplingIfPossible = false) } else { new BernoulliCellSampler[InternalRow](lowerBound, upperBound) } sampler.setSeed(seed) iterator = sampler.sample(child.asIterator) } override def next(): Boolean = { if (iterator.hasNext) { currentRow = iterator.next() true } else { false } } override def fetch(): InternalRow = currentRow override def close(): Unit = child.close() }
Example 2
Source File: SampleNodeSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.local import org.apache.spark.util.random.{BernoulliCellSampler, PoissonSampler} class SampleNodeSuite extends LocalNodeTest { private def testSample(withReplacement: Boolean): Unit = { val seed = 0L val lowerb = 0.0 val upperb = 0.3 val maybeOut = if (withReplacement) "" else "out" test(s"with$maybeOut replacement") { val inputData = (1 to 1000).map { i => (i, i) }.toArray val inputNode = new DummyNode(kvIntAttributes, inputData) val sampleNode = new SampleNode(conf, lowerb, upperb, withReplacement, seed, inputNode) val sampler = if (withReplacement) { new PoissonSampler[(Int, Int)](upperb - lowerb, useGapSamplingIfPossible = false) } else { new BernoulliCellSampler[(Int, Int)](lowerb, upperb) } sampler.setSeed(seed) val expectedOutput = sampler.sample(inputData.iterator).toArray val actualOutput = sampleNode.collect().map { case row => (row.getInt(0), row.getInt(1)) } assert(actualOutput === expectedOutput) } } testSample(withReplacement = true) testSample(withReplacement = false) }