scala.collection.GenSeq Scala Examples
The following examples show how to use scala.collection.GenSeq.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: UtilSpark.scala From Clustering4Ever with Apache License 2.0 | 5 votes |
package org.clustering4ever.sparktools import scala.language.higherKinds import org.apache.spark.rdd.RDD import org.apache.spark.HashPartitioner import scala.reflect.runtime.universe.TypeTag import scala.util.Random import scala.reflect.ClassTag import scala.collection.{GenSeq, mutable} import org.clustering4ever.preprocessing.Preprocessable import org.clustering4ever.hashing.HashingScalar import org.clustering4ever.vectors.{GVector, ScalarVector} object UtilSpark { type IndexPartition = Int type HasConverged = Boolean type IsOriginalDot = Boolean final def generateDataLocalityOnHashsedDS[ O, Pz[B, C <: GVector[C]] <: Preprocessable[B, C, Pz] ]( rddToPartitioned: RDD[Pz[O, ScalarVector]], nbblocs1: Int, nbBucketRange: Int ): RDD[(IndexPartition, (Pz[O, ScalarVector], IsOriginalDot, HasConverged))] = { val isOriginalPoint = true val hasConverged = true val bucketRange = 1 to nbBucketRange val lshRDD = rddToPartitioned.map((_, isOriginalPoint, !hasConverged)) val localityPerPartitionRDD = lshRDD.mapPartitionsWithIndex{ (idx, it) => val ar = it.toList def rightNeighbourhood = ar.flatMap{ case (cz, _, _) => bucketRange.collect{ case i if(idx + i < nbblocs1) => (idx + i, (cz, !isOriginalPoint, !hasConverged)) } } def leftNeighbourhood = ar.flatMap{ case (cz, _, _) => bucketRange.collect{ case i if(idx - i >= 0) => (idx - i, (cz, !isOriginalPoint, !hasConverged)) } } val composing = if(idx == 0) ar.map((idx, _)) ::: rightNeighbourhood else if(idx == nbblocs1 - 1) ar.map((idx, _)) ::: leftNeighbourhood else ar.map((idx, _)) ::: leftNeighbourhood ::: rightNeighbourhood composing.toIterator }.partitionBy(new HashPartitioner(nbblocs1)) localityPerPartitionRDD } final def generateDataLocalityLD[ O, Pz[B, C <: GVector[C]] <: Preprocessable[B, C, Pz], Hasher <: HashingScalar ]( rddToPartitioned: RDD[Pz[O, ScalarVector]], hashing: Hasher, nbblocs1: Int, nbBucketRange: Int ): RDD[(IndexPartition, (Pz[O, ScalarVector], IsOriginalDot, HasConverged))] = { val hashedRDD = rddToPartitioned.sortBy( cz => hashing.hf(cz.v) , ascending = true, nbblocs1 ) generateDataLocalityOnHashsedDS(hashedRDD, nbblocs1, nbBucketRange) } }
Example 2
Source File: K-Modes.scala From Clustering4Ever with Apache License 2.0 | 5 votes |
package org.clustering4ever.clustering.kcenters.scala final def fit[D <: BinaryDistance, GS[Y] <: GenSeq[Y]]( data: GS[Array[Int]], k: Int, metric: D, maxIterations: Int, minShift: Double ): KModesModel[D] = { KModes(k, metric, minShift, maxIterations).fit(binaryToClusterizable(data)) } }
Example 3
Source File: KPPInitializer.scala From Clustering4Ever with Apache License 2.0 | 5 votes |
package org.clustering4ever.clustering.kcenters.scala final def kppInit[ O, V <: GVector[V], Cz[Y, Z <: GVector[Z]] <: Clusterizable[Y, Z, Cz], D <: Distance[V] ](data: GenSeq[Cz[O, V]], metric: D, k: Int): immutable.HashMap[Int, V] = { val centers = mutable.ArrayBuffer(data(Random.nextInt(data.size)).v) def obtainNearestCenter(v: V): V = centers.minBy(metric.d(_, v)) @annotation.tailrec def go(i: Int): Unit = { val preprocessed = data.map{ cz => val toPow2 = metric.d(cz.v, obtainNearestCenter(cz.v)) (cz.v, toPow2 * toPow2) } val phi = preprocessed.aggregate(0D)((agg, e) => agg + e._2, _ + _) val probabilities = preprocessed.map{ case (v, toPow2) => (v, toPow2 / phi) }.seq val shuffled = Random.shuffle(probabilities) centers += Stats.obtainMedianFollowingWeightedDistribution[V](shuffled) if(i < k - 2) go(i + 1) } go(0) immutable.HashMap(centers.zipWithIndex.map{ case (center, clusterID) => (clusterID, center) }:_*) } }
Example 4
Source File: K-Means.scala From Clustering4Ever with Apache License 2.0 | 5 votes |
package org.clustering4ever.clustering.kcenters.scala final def fit[D <: ContinuousDistance, GS[Y] <: GenSeq[Y]]( data: GS[Array[Double]], k: Int, metric: D, minShift: Double, maxIterations: Int ): KMeansModel[D] = { KMeans(k, metric, minShift, maxIterations, immutable.HashMap.empty[Int, ScalarVector]).fit(scalarToClusterizable(data)) } }
Example 5
Source File: Statistics.scala From Clustering4Ever with Apache License 2.0 | 5 votes |
package org.clustering4ever.stats final def obtainMedianFollowingWeightedDistribution[V](distribution: Seq[(V, Double)]): V = { val p = scala.util.Random.nextDouble * distribution.foldLeft(0D)((agg, e) => agg + e._2) @annotation.tailrec def go(accum: Double, i: Int): Int = { if(accum < p) go(accum + distribution(i)._2, i + 1) else i } val cpt = go(0D, 0) if(cpt == 0) distribution.head._1 else distribution(cpt - 1)._1 } }
Example 6
Source File: SortingTools.scala From Clustering4Ever with Apache License 2.0 | 5 votes |
package org.clustering4ever.utils final def bucketSort(toSort: Array[Double], b: Int) = { val buckets = parallel.mutable.ParArray.fill(b)(mutable.ArrayBuffer.empty[Double]) val m = toSort.max @annotation.tailrec def go(i: Int) : Unit = { if(i < toSort.size) { buckets((toSort(i) / m * (b - 1)).toInt) += toSort(i) go(i + 1) } } go(0) buckets.flatMap(_.sorted) } }