org.apache.commons.math3.ml.clustering.KMeansPlusPlusClusterer Java Exaples

Source File: MyTest2.java From ACManager with GNU General Public License v3.0

6 votes

@Test
public void test6() throws Exception {
    Clusterer<DoublePoint> clusterer = new KMeansPlusPlusClusterer<DoublePoint>(3);
    List<DoublePoint> list = new ArrayList<>();

    list.add(new DoublePoint(new double[]{1}));
    list.add(new DoublePoint(new double[]{1.5}));
    list.add(new DoublePoint(new double[]{1.8}));
    list.add(new DoublePoint(new double[]{3.5}));
    list.add(new DoublePoint(new double[]{3.6}));
    list.add(new DoublePoint(new double[]{4}));
    list.add(new DoublePoint(new double[]{4.2}));
    System.out.println(list);

    List<? extends Cluster<DoublePoint>> res = clusterer.cluster(list);
    System.out.println("!!!");
    System.out.println(res.size());
    for (Cluster<DoublePoint> re : res) {
        System.out.println(re.getPoints());
    }
}

Source File: KmeansSampling.java From AILibs with GNU Affero General Public License v3.0

5 votes

@SuppressWarnings("unchecked")
@Override
public IAlgorithmEvent nextWithException() throws AlgorithmException, InterruptedException, AlgorithmTimeoutedException, AlgorithmExecutionCanceledException {
	switch (this.getState()) {
	case CREATED:
		// Initialize variables
		try {
			this.sample = (D) this.getInput().createEmptyCopy();
		} catch (DatasetCreationException e) {
			throw new AlgorithmException("Could not create a copy of the dataset.", e);
		}

		// create cluster
		JDKRandomGenerator r = new JDKRandomGenerator();
		r.setSeed(this.seed);
		// update k if k=-1
		if (this.k == -1) {
			this.k = this.sampleSize;
		}
		if (this.clusterResults == null) {
			KMeansPlusPlusClusterer<I> kMeansCluster = new KMeansPlusPlusClusterer<>(this.k, -1, this.distanceMeassure, r);
			this.clusterResults = kMeansCluster.cluster(this.getInput()); // this is not interruptible!!
		}
		return this.activate();
	case ACTIVE:
		return this.doAlgorithmStep();
	default:
		throw new IllegalStateException("Unknown algorithm state " + this.getState());
	}
}

Source File: GMeansStratiAmountSelectorAndAssigner.java From AILibs with GNU Affero General Public License v3.0

5 votes

@Override
public void init(final IDataset<?> dataset, final int stratiAmount) {
	this.setDataset(dataset);
	if (this.clusterer == null || this.getClusters() == null) {
		// This object was not used for strati amount selection.
		// Perform k-means clustering to get the correct strati amounts.
		JDKRandomGenerator rand = new JDKRandomGenerator();
		rand.setSeed(this.randomSeed);
		KMeansPlusPlusClusterer<Clusterable> kmeans = new KMeansPlusPlusClusterer<>(stratiAmount, -1, this.distanceMeasure, rand);
		this.setClusters(kmeans.cluster(new ListView<Clusterable>(dataset)));
	}
}

Source File: KMeansStratiAssigner.java From AILibs with GNU Affero General Public License v3.0

5 votes

@Override
public void init(final IDataset<?> dataset, final int stratiAmount) {
	this.setDataset(dataset);

	// Perform initial Clustering of the dataset.
	JDKRandomGenerator rand = new JDKRandomGenerator();
	rand.setSeed(this.randomSeed);
	List<Clusterable> cDataset = (List<Clusterable>)dataset;
	KMeansPlusPlusClusterer<Clusterable> clusterer = new KMeansPlusPlusClusterer<>(stratiAmount, -1, this.distanceMeasure, rand);
	this.logger.info("Clustering dataset with {} instances.", dataset.size());
	this.setClusters(clusterer.cluster(cDataset));
	this.logger.info("Finished clustering");
}

Source File: KmeansEvaluator.java From lucene-solr with Apache License 2.0

4 votes

@Override
@SuppressWarnings({"unchecked"})
public Object doWork(Object value1, Object value2) throws IOException {

  Matrix matrix = null;
  int k = 0;

  if(value1 instanceof Matrix) {
    matrix = (Matrix)value1;
  } else {
    throw new IOException("The first parameter for kmeans should be the observation matrix.");
  }

  if(value2 instanceof Number) {
    k = ((Number)value2).intValue();
  } else {
    throw new IOException("The second parameter for kmeans should be k.");
  }


  @SuppressWarnings({"rawtypes"})
  KMeansPlusPlusClusterer<ClusterPoint> kmeans = new KMeansPlusPlusClusterer(k, maxIterations);
  List<ClusterPoint> points = new ArrayList<>();
  double[][] data = matrix.getData();

  List<String> ids = matrix.getRowLabels();

  for(int i=0; i<data.length; i++) {
    double[] vec = data[i];
    if(ids != null) {
      points.add(new ClusterPoint(ids.get(i), vec));
    } else {
      points.add(new ClusterPoint(Integer.toString(i), vec));
    }
  }

  @SuppressWarnings({"rawtypes"})
  Map fields = new HashMap();

  fields.put("k", k);
  fields.put("distance", "euclidean");
  fields.put("maxIterations", maxIterations);

  return new ClusterTuple(fields, kmeans.cluster(points), matrix.getColumnLabels());
}

Source File: MultiKmeansEvaluator.java From lucene-solr with Apache License 2.0

4 votes

@Override
@SuppressWarnings({"unchecked"})
public Object doWork(Object... values) throws IOException {

  if(values.length != 3) {
    throw new IOException("The multiKmeans function expects three parameters; a matrix to cluster, k and number of trials.");
  }

  Object value1 = values[0];
  Object value2 = values[1];
  Object value3 = values[2];

  Matrix matrix = null;
  int k = 0;
  int trials=0;

  if(value1 instanceof Matrix) {
    matrix = (Matrix)value1;
  } else {
    throw new IOException("The first parameter for multiKmeans should be the observation matrix.");
  }

  if(value2 instanceof Number) {
    k = ((Number)value2).intValue();
  } else {
    throw new IOException("The second parameter for multiKmeans should be k.");
  }

  if(value3 instanceof Number) {
    trials= ((Number)value3).intValue();
  } else {
    throw new IOException("The third parameter for multiKmeans should be trials.");
  }

  @SuppressWarnings({"rawtypes"})
  KMeansPlusPlusClusterer<KmeansEvaluator.ClusterPoint> kmeans = new KMeansPlusPlusClusterer(k, maxIterations);
  @SuppressWarnings({"rawtypes"})
  MultiKMeansPlusPlusClusterer multiKmeans = new MultiKMeansPlusPlusClusterer(kmeans, trials);

  List<KmeansEvaluator.ClusterPoint> points = new ArrayList<>();
  double[][] data = matrix.getData();

  List<String> ids = matrix.getRowLabels();

  for(int i=0; i<data.length; i++) {
    double[] vec = data[i];
    points.add(new KmeansEvaluator.ClusterPoint(ids.get(i), vec));
  }

  @SuppressWarnings({"rawtypes"})
  Map fields = new HashMap();

  fields.put("k", k);
  fields.put("trials", trials);
  fields.put("distance", "euclidean");
  fields.put("maxIterations", maxIterations);

  return new KmeansEvaluator.ClusterTuple(fields, multiKmeans.cluster(points), matrix.getColumnLabels());
}

Source File: Stats.java From gama with GNU General Public License v3.0

4 votes

@operator (
		value = "kmeans",
		can_be_const = false,
		type = IType.LIST,
		category = { IOperatorCategory.STATISTICAL },
		concept = { IConcept.STATISTIC, IConcept.CLUSTERING })
@doc (
		value = "returns the list of clusters (list of instance indices) computed with the kmeans++ "
				+ "algorithm from the first operand data according to the number of clusters to split"
				+ " the data into (k) and the maximum number of iterations to run the algorithm for "
				+ "(If negative, no maximum will be used) (maxIt). Usage: kmeans(data,k,maxit)",
		special_cases = "if the lengths of two vectors in the right-hand aren't equal, returns 0",
		examples = { @example (
				value = "kmeans ([[2,4,5], [3,8,2], [1,1,3], [4,3,4]],2,10)",
				equals = "[[0,2,3],[1]]") })
public static IList<IList> KMeansPlusplusApache(final IScope scope, final IList data, final Integer k,
		final Integer maxIt) throws GamaRuntimeException {
	final MersenneTwister rand = new MersenneTwister(scope.getRandom().getSeed().longValue());

	final List<DoublePoint> instances = new ArrayList<>();
	for (int i = 0; i < data.size(); i++) {
		final IList d = (IList) data.get(i);
		final double point[] = new double[d.size()];
		for (int j = 0; j < d.size(); j++) {
			point[j] = Cast.asFloat(scope, d.get(j));
		}
		instances.add(new Instance(i, point));
	}
	final KMeansPlusPlusClusterer<DoublePoint> kmeans =
			new KMeansPlusPlusClusterer<>(k, maxIt, new EuclideanDistance(), rand);
	final List<CentroidCluster<DoublePoint>> clusters = kmeans.cluster(instances);
	try (final Collector.AsList results = Collector.getList()) {
		for (final Cluster<DoublePoint> cl : clusters) {
			final IList clG = GamaListFactory.create();
			for (final DoublePoint pt : cl.getPoints()) {
				clG.addValue(scope, ((Instance) pt).getId());
			}
			results.add(clG);
		}
		return results.items();
	}
}

org.apache.commons.math3.ml.clustering.KMeansPlusPlusClusterer Java Examples