scala.collection.GenSeq Scala Examples

The following examples show how to use scala.collection.GenSeq. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: UtilSpark.scala    From Clustering4Ever   with Apache License 2.0 5 votes vote down vote up
package org.clustering4ever.sparktools

import scala.language.higherKinds
import org.apache.spark.rdd.RDD
import org.apache.spark.HashPartitioner
import scala.reflect.runtime.universe.TypeTag
import scala.util.Random
import scala.reflect.ClassTag
import scala.collection.{GenSeq, mutable}
import org.clustering4ever.preprocessing.Preprocessable
import org.clustering4ever.hashing.HashingScalar
import org.clustering4ever.vectors.{GVector, ScalarVector}

object UtilSpark
{

	type IndexPartition = Int
	type HasConverged = Boolean
    type IsOriginalDot = Boolean


	final def generateDataLocalityOnHashsedDS[
		O,
		Pz[B, C <: GVector[C]] <: Preprocessable[B, C, Pz]
	](
		rddToPartitioned: RDD[Pz[O, ScalarVector]],
		nbblocs1: Int,
		nbBucketRange: Int
	): RDD[(IndexPartition, (Pz[O, ScalarVector], IsOriginalDot, HasConverged))] = {
		val isOriginalPoint = true
		val hasConverged = true
		val bucketRange = 1 to nbBucketRange

		val lshRDD = rddToPartitioned.map((_, isOriginalPoint, !hasConverged))

		val localityPerPartitionRDD = lshRDD.mapPartitionsWithIndex{ (idx, it) =>
			val ar = it.toList
			def rightNeighbourhood = ar.flatMap{ case (cz, _, _) => bucketRange.collect{ case i if(idx + i < nbblocs1) => (idx + i, (cz, !isOriginalPoint, !hasConverged)) } }
			def leftNeighbourhood = ar.flatMap{ case (cz, _, _) => bucketRange.collect{ case i if(idx - i >= 0) => (idx - i, (cz, !isOriginalPoint, !hasConverged)) } }
			val composing = if(idx == 0) ar.map((idx, _)) ::: rightNeighbourhood
				else if(idx == nbblocs1 - 1) ar.map((idx, _)) ::: leftNeighbourhood
				else ar.map((idx, _)) ::: leftNeighbourhood ::: rightNeighbourhood

	      composing.toIterator

	    }.partitionBy(new HashPartitioner(nbblocs1))
	    
	    localityPerPartitionRDD
	}

	final def generateDataLocalityLD[
		O,
		Pz[B, C <: GVector[C]] <: Preprocessable[B, C, Pz],
		Hasher <: HashingScalar
	](
		rddToPartitioned: RDD[Pz[O, ScalarVector]],
		hashing: Hasher,
		nbblocs1: Int,
		nbBucketRange: Int
	): RDD[(IndexPartition, (Pz[O, ScalarVector], IsOriginalDot, HasConverged))] = {
		val hashedRDD = rddToPartitioned.sortBy( cz => hashing.hf(cz.v) , ascending = true, nbblocs1 )
		generateDataLocalityOnHashsedDS(hashedRDD, nbblocs1, nbBucketRange)
	}

} 
Example 2
Source File: K-Modes.scala    From Clustering4Ever   with Apache License 2.0 5 votes vote down vote up
package org.clustering4ever.clustering.kcenters.scala

	final def fit[D <: BinaryDistance, GS[Y] <: GenSeq[Y]](
		data: GS[Array[Int]],
		k: Int,
		metric: D,
		maxIterations: Int,
		minShift: Double
	): KModesModel[D] = {
		KModes(k, metric, minShift, maxIterations).fit(binaryToClusterizable(data))
	}
} 
Example 3
Source File: KPPInitializer.scala    From Clustering4Ever   with Apache License 2.0 5 votes vote down vote up
package org.clustering4ever.clustering.kcenters.scala

	final def kppInit[
		O,
		V <: GVector[V],
		Cz[Y, Z <: GVector[Z]] <: Clusterizable[Y, Z, Cz],
		D <: Distance[V]
	](data: GenSeq[Cz[O, V]], metric: D, k: Int): immutable.HashMap[Int, V] = {

		val centers = mutable.ArrayBuffer(data(Random.nextInt(data.size)).v)

		def obtainNearestCenter(v: V): V = centers.minBy(metric.d(_, v))

		@annotation.tailrec
		def go(i: Int): Unit = {
			val preprocessed = data.map{ cz =>
				val toPow2 = metric.d(cz.v, obtainNearestCenter(cz.v))
				(cz.v, toPow2 * toPow2)
			}
			val phi = preprocessed.aggregate(0D)((agg, e) => agg + e._2, _ + _)
			val probabilities = preprocessed.map{ case (v, toPow2) => (v, toPow2 / phi) }.seq
			val shuffled = Random.shuffle(probabilities)
			centers += Stats.obtainMedianFollowingWeightedDistribution[V](shuffled)
			if(i < k - 2) go(i + 1)
		}

		go(0)
		
		immutable.HashMap(centers.zipWithIndex.map{ case (center, clusterID) => (clusterID, center) }:_*)

	}
} 
Example 4
Source File: K-Means.scala    From Clustering4Ever   with Apache License 2.0 5 votes vote down vote up
package org.clustering4ever.clustering.kcenters.scala

	final def fit[D <: ContinuousDistance, GS[Y] <: GenSeq[Y]](
		data: GS[Array[Double]],
		k: Int,
		metric: D,
		minShift: Double,
		maxIterations: Int
	): KMeansModel[D] = {
		KMeans(k, metric, minShift, maxIterations, immutable.HashMap.empty[Int, ScalarVector]).fit(scalarToClusterizable(data))
	}
} 
Example 5
Source File: Statistics.scala    From Clustering4Ever   with Apache License 2.0 5 votes vote down vote up
package org.clustering4ever.stats

	final def obtainMedianFollowingWeightedDistribution[V](distribution: Seq[(V, Double)]): V = {
		val p = scala.util.Random.nextDouble * distribution.foldLeft(0D)((agg, e) => agg + e._2)
		@annotation.tailrec
		def go(accum: Double, i: Int): Int = {
			if(accum < p) go(accum + distribution(i)._2, i + 1)
			else i
		}
		val cpt = go(0D, 0)
		if(cpt == 0) distribution.head._1 else distribution(cpt - 1)._1
	}
} 
Example 6
Source File: SortingTools.scala    From Clustering4Ever   with Apache License 2.0 5 votes vote down vote up
package org.clustering4ever.utils

    final def bucketSort(toSort: Array[Double], b: Int) = {
      val buckets = parallel.mutable.ParArray.fill(b)(mutable.ArrayBuffer.empty[Double])
      val m = toSort.max
      @annotation.tailrec
      def go(i: Int) : Unit = {
        if(i < toSort.size) {
            buckets((toSort(i) / m * (b - 1)).toInt) += toSort(i)
            go(i + 1)
        }
      }
      go(0)
      buckets.flatMap(_.sorted)
    }

}