weka.clusterers.SimpleKMeans Java Exaples

Source File: EvaluationUtils.java From AILibs with GNU Affero General Public License v3.0

6 votes

private static double performClustering(final Instances insts) throws Exception {
	logger.debug("Starting cluster evaluation...");

	FilteredClusterer clusterer = new FilteredClusterer();

	Remove filter = new Remove();
	filter.setAttributeIndices("" + (insts.classIndex() + 1));
	filter.setInputFormat(insts);
	Instances removedClassInstances = Filter.useFilter(insts, filter);

	((SimpleKMeans) clusterer.getClusterer())
	.setOptions(new String[]{"-N", String.valueOf(insts.classAttribute().numValues())});

	clusterer.buildClusterer(removedClassInstances);

	ClusterEvaluation clusterEval = new ClusterEvaluation();
	clusterEval.setClusterer(clusterer);
	clusterEval.evaluateClusterer(insts);

	return predictAccuracy(insts, clusterEval.getClassesToClusters(), clusterEval.getClusterAssignments());
}

Source File: WekaClusterers.java From apogen with Apache License 2.0

6 votes

/**
 * Run WEKA SimpleKMeans or KMeans++ on the parameter ARFF file searching for
 * numClusters clusters
 * 
 * @param filename
 * @param numClusters
 * @param init
 * @throws Exception
 */
public static LinkedHashMap<Integer, LinkedList<String>> runKmeans(String filename, String numClusters, String init)
		throws Exception {

	String[] options = new String[10];
	options[0] = "-t";
	options[1] = filename;
	options[2] = "-init";
	options[3] = init;
	options[4] = "-N";
	options[5] = numClusters;
	options[6] = "-I";
	options[7] = "100";
	options[8] = "-c";
	options[9] = "first";

	String s = ClusterEvaluation.evaluateClusterer(new SimpleKMeans(), options);

	return parseKMeansOutput(s, numClusters);

}

Source File: KMeans.java From Java-Data-Analysis with MIT License

6 votes

public static void main(String[] args) {
    Instances dataset = load(DATA);
    SimpleKMeans skm = new SimpleKMeans();
    System.out.printf("%d clusters:%n", K);
    try {
        skm.setNumClusters(K);
        skm.buildClusterer(dataset);
        for (Instance instance : dataset) {
            System.out.printf("(%.0f,%.0f): %s%n", 
                    instance.value(0), instance.value(1), 
                    skm.clusterInstance(instance));
        }
    } catch (Exception e) {
        System.err.println(e);
    }
}

Source File: WekaClusterTest.java From Java-Data-Science-Cookbook with MIT License

6 votes

public void clusterData(){	
	kmeans = new SimpleKMeans();
	kmeans.setSeed(10);
	try {
		kmeans.setPreserveInstancesOrder(true);
		kmeans.setNumClusters(10);
		kmeans.buildClusterer(cpu);
		int[] assignments = kmeans.getAssignments();
		int i = 0;
		for(int clusterNum : assignments) {
			System.out.printf("Instance %d -> Cluster %d\n", i, clusterNum);
			i++;
		}
	} catch (Exception e1) {
	}
}

Source File: ClusterEval.java From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License

6 votes

/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try{
        DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/ClusterEval/weather.arff");
        Instances dt = src.getDataSet();
        SimpleKMeans model = new SimpleKMeans();
        model.setNumClusters(3);
        model.buildClusterer(dt);
        System.out.println(model);
        
        ClusterEvaluation eval = new ClusterEvaluation();
        DataSource src1 = new DataSource("/Users/admin/Documents/NetBeansProjects/ClusterEval/weather.test.arff");
        Instances tdt = src1.getDataSet();
        eval.setClusterer(model);
        eval.evaluateClusterer(tdt);
        
        System.out.println(eval.clusterResultsToString());
        System.out.println("# of clusters: " + eval.getNumClusters());
    }
    catch(Exception e)
    {
        System.out.println(e.getMessage());
    }
}

Source File: Clustering.java From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License

6 votes

/**
 * @param args the command line arguments
 */
public static void main(String[] args) {
    // TODO code application logic here
    try{
        DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/Clustering/weather.arff");
        Instances dt = src.getDataSet();
        SimpleKMeans model = new SimpleKMeans();
        model.setNumClusters(3);
        model.buildClusterer(dt);
        System.out.println(model);
        
    }
    catch(Exception e){
        System.out.println(e.getMessage());
    }
}

Source File: SimpleKMeansClusterer.java From mzmine2 with GNU General Public License v2.0

5 votes

@Override
public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) {

  List<Integer> clusters = new ArrayList<Integer>();
  String[] options = new String[2];
  SimpleKMeans clusterer = new SimpleKMeans();

  int numberOfGroups =
      parameters.getParameter(SimpleKMeansClustererParameters.numberOfGroups).getValue();
  options[0] = "-N";
  options[1] = String.valueOf(numberOfGroups);

  try {
    clusterer.setOptions(options);
    clusterer.buildClusterer(dataset);
    Enumeration<?> e = dataset.enumerateInstances();
    while (e.hasMoreElements()) {
      clusters.add(clusterer.clusterInstance((Instance) e.nextElement()));
    }
    ClusteringResult result = new ClusteringResult(clusters, null, clusterer.numberOfClusters(),
        parameters.getParameter(EMClustererParameters.visualization).getValue());
    return result;

  } catch (Exception ex) {
    logger.log(Level.SEVERE, null, ex);
    return null;
  }
}

Source File: SimpleKMeansClusterer.java From mzmine3 with GNU General Public License v2.0

5 votes

@Override
public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) {

  List<Integer> clusters = new ArrayList<Integer>();
  String[] options = new String[2];
  SimpleKMeans clusterer = new SimpleKMeans();

  int numberOfGroups =
      parameters.getParameter(SimpleKMeansClustererParameters.numberOfGroups).getValue();
  options[0] = "-N";
  options[1] = String.valueOf(numberOfGroups);

  try {
    clusterer.setOptions(options);
    clusterer.buildClusterer(dataset);
    Enumeration<?> e = dataset.enumerateInstances();
    while (e.hasMoreElements()) {
      clusters.add(clusterer.clusterInstance((Instance) e.nextElement()));
    }
    ClusteringResult result = new ClusteringResult(clusters, null, clusterer.numberOfClusters(),
        parameters.getParameter(EMClustererParameters.visualization).getValue());
    return result;

  } catch (Exception ex) {
    logger.log(Level.SEVERE, null, ex);
    return null;
  }
}

Source File: Clustering.java From java-ml-projects with Apache License 2.0

5 votes

private List<Series<Number, Number>> buildClusteredSeries() throws Exception {
	List<XYChart.Series<Number, Number>> clusteredSeries = new ArrayList<>();

	// to build the cluster we remove the class information
	Remove remove = new Remove();
	remove.setAttributeIndices("3");
	remove.setInputFormat(data);
	Instances dataToBeClustered = Filter.useFilter(data, remove);

	SimpleKMeans kmeans = new SimpleKMeans();
	kmeans.setSeed(10);
	kmeans.setPreserveInstancesOrder(true);
	kmeans.setNumClusters(3);
	kmeans.buildClusterer(dataToBeClustered);

	IntStream.range(0, 3).mapToObj(i -> {
		Series<Number, Number> newSeries = new XYChart.Series<>();
		newSeries.setName(String.valueOf(i));
		return newSeries;
	}).forEach(clusteredSeries::add);

	int[] assignments = kmeans.getAssignments();
	for (int i = 0; i < assignments.length; i++) {
		int clusterNum = assignments[i];
		clusteredSeries.get(clusterNum).getData().add(instancetoChartData(data.get(i)));
	}

	return clusteredSeries;
}

Source File: RBFNetwork.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Returns default capabilities of the classifier, i.e.,  and "or" of
 * Logistic and LinearRegression.
 *
 * @return      the capabilities of this classifier
 * @see         Logistic
 * @see         LinearRegression
 */
public Capabilities getCapabilities() {
  Capabilities result = new Logistic().getCapabilities();
  result.or(new LinearRegression().getCapabilities());
  Capabilities classes = result.getClassCapabilities();
  result.and(new SimpleKMeans().getCapabilities());
  result.or(classes);
  return result;
}

Source File: LearnShapelets.java From tsml with GNU General Public License v3.0

5 votes

public void initializeShapeletsKMeans() throws Exception {
    //for each scale r, i.e. for each set of K shapelets at
    // length L_min*(r+1)
    
    for (int r=0; r<R; r++) {
        double[][] segments_r = new double[train.length * numberOfSegments[r]][L[r]];
        
        //construct the segments from the train set.
        for (int i = 0; i < train.length; i++)
            for (int j = 0; j < numberOfSegments[r]; j++)
                for (int l = 0; l < L[r]; l++)
                    segments_r[i * numberOfSegments[r] + j][l] = train[i][j + l]; 
                

        // normalize segments
        for (int i = 0; i < train.length; i++)
            for (int j = 0; j < numberOfSegments[r]; j++)
                segments_r[i * numberOfSegments[r] + j] = StatisticalUtilities.normalize(segments_r[i * numberOfSegments[r] + j]);

        Instances ins = InstanceTools.toWekaInstances(segments_r); 
        
        SimpleKMeans skm = new SimpleKMeans();
        skm.setNumClusters(K);
        skm.setMaxIterations(100); 
        //skm.setInitializeUsingKMeansPlusPlusMethod(true); 
        skm.setSeed((int) (rand.nextDouble() * 1000) );
        skm.buildClusterer( ins );
        Instances centroidsWeka = skm.getClusterCentroids();
        shapelets[r] =  InstanceTools.fromWekaInstancesArray(centroidsWeka, false);
          
        // initialize the gradient history of shapelets
        if (shapelets[r] == null)
            print("P not set"); 
    }
}

Source File: LearnShapelets.java From tsml with GNU General Public License v3.0

5 votes

public void initializeShapeletsFromFile() throws Exception {
    //for each scale r, i.e. for each set of K shapelets at
    // length L_min*(r+1)
    
    for (int r=0; r<R; r++) {
        double[][] segments_r = new double[train.length * numberOfSegments[r]][L[r]];
        
        //construct the segments from the train set.
        for (int i = 0; i < train.length; i++)
            for (int j = 0; j < numberOfSegments[r]; j++)
                for (int l = 0; l < L[r]; l++)
                    segments_r[i * numberOfSegments[r] + j][l] = train[i][j + l]; 
                

        // normalize segments
        for (int i = 0; i < train.length; i++)
            for (int j = 0; j < numberOfSegments[r]; j++)
                segments_r[i * numberOfSegments[r] + j] = StatisticalUtilities.normalize(segments_r[i * numberOfSegments[r] + j]);

        Instances ins = InstanceTools.toWekaInstances(segments_r); 
        
        SimpleKMeans skm = new SimpleKMeans();
        skm.setNumClusters(K);
        skm.setMaxIterations(100); 
        //skm.setInitializeUsingKMeansPlusPlusMethod(true); 
        skm.setSeed((int) (rand.nextDouble() * 1000) );
        skm.buildClusterer( ins );
        Instances centroidsWeka = skm.getClusterCentroids();
        shapelets[r] =  InstanceTools.fromWekaInstancesArray(centroidsWeka, false);
          
        // initialize the gradient history of shapelets
        if (shapelets[r] == null)
            print("P not set"); 
    }
}

Source File: BoTSWEnsemble.java From tsml with GNU General Public License v3.0

5 votes

public static double compactnessOfClustering(SimpleKMeans kmeans, Instances input) throws Exception {
    Instances centroids = kmeans.getClusterCentroids();
    int[] assignments = kmeans.getAssignments();

    double totalSqDist = 0.0;
    for (int i = 0; i < assignments.length; ++i) {
        Instance sample = input.get(i);
        Instance centroid = centroids.get(assignments[i]);

        for (int j = 0; j < sample.numAttributes(); ++j)
            totalSqDist += (sample.value(j) - centroid.value(j)) * (sample.value(j) - centroid.value(j));
    }
    return totalSqDist;
}

Source File: ClassificationViaClustering.java From tsml with GNU General Public License v3.0

4 votes

/**
 * default constructor
 */
public ClassificationViaClustering() {
  super();
  
  m_Clusterer = new SimpleKMeans();
}

Source File: Ex06_Clusterers.java From tsml with GNU General Public License v3.0

4 votes

public static void main(String[] args) throws Exception {
    
    // We'll use this data throughout, see Ex01_Datahandling
    int seed = 0;
    Instances[] trainTest = DatasetLoading.sampleItalyPowerDemand(seed);
    Instances inst = trainTest[0];
    Instances inst2 = trainTest[1];
    inst.addAll(inst2);

    // Create an object from one of the time series or vector clusters implemented.
    // Call the buildClusterer method with your data. Most clusters will need the number of clusters k to be set.
    UnsupervisedShapelets us = new UnsupervisedShapelets();
    us.setNumberOfClusters(inst.numClasses());
    us.buildClusterer(inst);

    // You can find the cluster assignments for each data instance by calling getAssignments().
    // The index of assignments array will match the Instances object, i.e. index 0 with value 1 == first instance
    // of data assigned to cluster 1.
    int[] tsAssignments = us.getAssignments();
    System.out.println("UnsupervisedShapelets cluster assignments:");
    System.out.println(Arrays.toString(tsAssignments));

    // A popular metric for cluster evaluation is the Rand index. A utility method is available for calculating
    // this.
    double tsRandIndex = ClusteringUtilities.randIndex(tsAssignments, inst);
    System.out.println("UnsupervisedShapelets Rand index:");
    System.out.println(tsRandIndex);

    // weka also implements a range of clustering algorithms. Any class value must be removed prior to use.
    Instances copy = new Instances(inst);
    deleteClassAttribute(copy);
    SimpleKMeans km = new SimpleKMeans();
    km.setNumClusters(inst.numClasses());
    km.setPreserveInstancesOrder(true);
    km.buildClusterer(copy);

    int[] wekaAssignments = km.getAssignments();
    System.out.println("SimpleKMeans cluster assignments:");
    System.out.println(Arrays.toString(wekaAssignments));

    double wekaRandIndex = ClusteringUtilities.randIndex(wekaAssignments, inst);
    System.out.println("SimpleKMeans Rand index:");
    System.out.println(wekaRandIndex);
}

Source File: EvaluationUtils.java From AILibs with GNU Affero General Public License v3.0

4 votes

public static double performKernelClustering(final Instances instances, final int numThreads) throws Exception {
	logger.debug("Starting kernelized cluster evaluation...");

	List<Instances> split = WekaUtil.getStratifiedSplit(instances, 42, kernelSplitPortion);

	ExecutorService execService = Executors.newFixedThreadPool(numThreads);
	List<Future<Double>> futures = new ArrayList<>();
	Future<Double> result0 = execService.submit(() ->
	performClustering(new Instances(split.get(0)))
			);
	futures.add(result0);

	for (Map.Entry<Kernel, Instances> entry : getKernelsWithInstances(split.get(0))) {
		if (Thread.currentThread().isInterrupted()) {
			throw new InterruptedException(EVALUATION_STOPPED_MESSAGE);
		}

		Future<Double> result = execService.submit(() -> {
			Kernel kernel = entry.getKey();
			Instances insts = entry.getValue();

			FilteredClusterer clusterer = new FilteredClusterer();

			Remove filter = new Remove();
			filter.setAttributeIndices("" + (insts.classIndex() + 1));
			filter.setInputFormat(insts);

			Instances removedClassInstances = Filter.useFilter(insts, filter);
			Nystroem kernelFilter = new Nystroem();

			kernelFilter.setKernel(kernel);
			clusterer.setFilter(kernelFilter);
			((SimpleKMeans) clusterer.getClusterer())
			.setOptions(new String[]{"-N", String.valueOf(insts.classAttribute().numValues())});

			clusterer.buildClusterer(removedClassInstances);

			ClusterEvaluation clusterEval = new ClusterEvaluation();
			clusterEval.setClusterer(clusterer);
			clusterEval.evaluateClusterer(insts);

			return predictAccuracy(insts, clusterEval.getClassesToClusters(), clusterEval.getClusterAssignments());
		});
		futures.add(result);
	}

	return evaluateFutures(futures);
}

Source File: ClusterEvaluationTest.java From AILibs with GNU Affero General Public License v3.0

4 votes

@Test
public void evaluateTest() throws Exception {
	logger.info("Starting cluster evaluation test...");

	/* load dataset and create a train-test-split */
	OpenmlConnector connector = new OpenmlConnector();
	DataSetDescription ds = connector.dataGet(DataSetUtils.SEGMENT_ID);
	File file = connector.datasetGet(ds);
	Instances data = new Instances(new BufferedReader(new FileReader(file)));
	data.setClassIndex(data.numAttributes() - 1);
	List<Instances> split = StratifyUtil.stratifiedSplit(data, 42, .25);

	Instances insts = split.get(0);

	long timeStart = System.currentTimeMillis();

	FilteredClusterer clusterer = new FilteredClusterer();

	Remove filter = new Remove();
	filter.setAttributeIndices("" + (insts.classIndex() + 1));
	filter.setInputFormat(insts);
	Instances removedClassInstances = Filter.useFilter(insts, filter);

	((SimpleKMeans) clusterer.getClusterer()).setOptions(new String[] { "-num-slots", String.valueOf(Runtime.getRuntime().availableProcessors()), "-N", String.valueOf(insts.classAttribute().numValues()) });
	SimpleKMeans kMeans = (SimpleKMeans) clusterer.getClusterer();
	kMeans.setDistanceFunction(new EuclideanDistance());

	clusterer.buildClusterer(removedClassInstances);

	long timeStartEval = System.currentTimeMillis();

	ClusterEvaluation clusterEval = new ClusterEvaluation();
	clusterEval.setClusterer(clusterer);
	clusterEval.evaluateClusterer(insts);

	long timeTaken = System.currentTimeMillis() - timeStart;
	long timeTakenEval = System.currentTimeMillis() - timeStartEval;

	logger.debug("ClusterEvaluator results: " + clusterEval.clusterResultsToString());

	double acc = EvaluationUtils.predictAccuracy(insts, clusterEval.getClassesToClusters(), clusterEval.getClusterAssignments());
	Assert.assertTrue(acc > 0);
	logger.info("Acc: " + acc);
	logger.debug("Clustering took " + (timeTaken / 1000) + " s.");
	logger.debug("Clustering eval took " + (timeTakenEval / 1000) + " s.");
}

Source File: LearnShapeletsLearningAlgorithm.java From AILibs with GNU Affero General Public License v3.0

4 votes

/**
 * Initializes the tensor <code>S</code> storing the shapelets for each scale.
 * The initialization is done by deriving inital shapelets from all normalized
 * segments.
 *
 * @param trainingMatrix
 *            The training matrix used for the initialization of <code>S</code>.
 * @return Return the initialized tensor storing an initial guess for the
 *         shapelets based on the clustering
 * @throws TrainingException
 */
public double[][][] initializeS(final double[][] trainingMatrix) throws TrainingException {
	LOGGER.debug("Initializing S...");

	/* read config locally */
	final int scaleR = this.getConfig().scaleR();
	final long seed = this.getConfig().seed();
	final int minShapeLength = this.getConfig().minShapeletLength();

	final double[][][] result = new double[scaleR][][];

	for (int r = 0; r < scaleR; r++) {
		final int numberOfSegments = getNumberOfSegments(this.q, minShapeLength, r);
		if (numberOfSegments < 1) {
			throw new TrainingException("The number of segments is lower than 1. Can not train the LearnShapelets model.");
		}

		final int L = (r + 1) * minShapeLength;

		final double[][] tmpSegments = new double[trainingMatrix.length * numberOfSegments][L];

		// Prepare training data for finding the centroids
		for (int i = 0; i < trainingMatrix.length; i++) {
			for (int j = 0; j < numberOfSegments; j++) {
				for (int l = 0; l < L; l++) {
					tmpSegments[i * numberOfSegments + j][l] = trainingMatrix[i][j + l];
				}
				tmpSegments[i * numberOfSegments + j] = TimeSeriesUtil.zNormalize(tmpSegments[i * numberOfSegments + j], USE_BIAS_CORRECTION);
			}
		}

		// Transform instances
		Instances wekaInstances = WekaTimeseriesUtil.matrixToWekaInstances(tmpSegments);

		// Cluster using k-Means
		SimpleKMeans kMeans = new SimpleKMeans();
		try {
			kMeans.setNumClusters(this.getConfig().numShapelets());
			kMeans.setSeed((int) seed);
			kMeans.setMaxIterations(100);
			kMeans.buildClusterer(wekaInstances);
		} catch (Exception e) {
			LOGGER.warn("Could not initialize matrix S using kMeans clustering for r={} due to the following problem: {}. " + "Using zero matrix instead (possibly leading to a poor training performance).", r, e.getMessage());
			result[r] = new double[this.getConfig().numShapelets()][r * minShapeLength];
			continue;
		}
		Instances clusterCentroids = kMeans.getClusterCentroids();

		double[][] tmpResult = new double[clusterCentroids.numInstances()][clusterCentroids.numAttributes()];
		for (int j = 0; j < tmpResult.length; j++) {
			double[] instValues = clusterCentroids.get(j).toDoubleArray();
			tmpResult[j] = Arrays.copyOf(instValues, tmpResult[j].length);
		}
		result[r] = tmpResult;
	}

	LOGGER.debug("Initialized S.");

	return result;
}

Source File: ClassificationViaClustering.java From tsml with GNU General Public License v3.0

2 votes

/**
 * String describing default clusterer.
 * 
 * @return		the classname
 */
protected String defaultClustererString() {
  return SimpleKMeans.class.getName();
}

weka.clusterers.SimpleKMeans Java Examples