weka.clusterers.SimpleKMeans Java Examples
The following examples show how to use
weka.clusterers.SimpleKMeans.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: EvaluationUtils.java From AILibs with GNU Affero General Public License v3.0 | 6 votes |
private static double performClustering(final Instances insts) throws Exception { logger.debug("Starting cluster evaluation..."); FilteredClusterer clusterer = new FilteredClusterer(); Remove filter = new Remove(); filter.setAttributeIndices("" + (insts.classIndex() + 1)); filter.setInputFormat(insts); Instances removedClassInstances = Filter.useFilter(insts, filter); ((SimpleKMeans) clusterer.getClusterer()) .setOptions(new String[]{"-N", String.valueOf(insts.classAttribute().numValues())}); clusterer.buildClusterer(removedClassInstances); ClusterEvaluation clusterEval = new ClusterEvaluation(); clusterEval.setClusterer(clusterer); clusterEval.evaluateClusterer(insts); return predictAccuracy(insts, clusterEval.getClassesToClusters(), clusterEval.getClusterAssignments()); }
Example #2
Source File: WekaClusterers.java From apogen with Apache License 2.0 | 6 votes |
/** * Run WEKA SimpleKMeans or KMeans++ on the parameter ARFF file searching for * numClusters clusters * * @param filename * @param numClusters * @param init * @throws Exception */ public static LinkedHashMap<Integer, LinkedList<String>> runKmeans(String filename, String numClusters, String init) throws Exception { String[] options = new String[10]; options[0] = "-t"; options[1] = filename; options[2] = "-init"; options[3] = init; options[4] = "-N"; options[5] = numClusters; options[6] = "-I"; options[7] = "100"; options[8] = "-c"; options[9] = "first"; String s = ClusterEvaluation.evaluateClusterer(new SimpleKMeans(), options); return parseKMeansOutput(s, numClusters); }
Example #3
Source File: KMeans.java From Java-Data-Analysis with MIT License | 6 votes |
public static void main(String[] args) { Instances dataset = load(DATA); SimpleKMeans skm = new SimpleKMeans(); System.out.printf("%d clusters:%n", K); try { skm.setNumClusters(K); skm.buildClusterer(dataset); for (Instance instance : dataset) { System.out.printf("(%.0f,%.0f): %s%n", instance.value(0), instance.value(1), skm.clusterInstance(instance)); } } catch (Exception e) { System.err.println(e); } }
Example #4
Source File: WekaClusterTest.java From Java-Data-Science-Cookbook with MIT License | 6 votes |
public void clusterData(){ kmeans = new SimpleKMeans(); kmeans.setSeed(10); try { kmeans.setPreserveInstancesOrder(true); kmeans.setNumClusters(10); kmeans.buildClusterer(cpu); int[] assignments = kmeans.getAssignments(); int i = 0; for(int clusterNum : assignments) { System.out.printf("Instance %d -> Cluster %d\n", i, clusterNum); i++; } } catch (Exception e1) { } }
Example #5
Source File: ClusterEval.java From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License | 6 votes |
/** * @param args the command line arguments */ public static void main(String[] args) { // TODO code application logic here try{ DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/ClusterEval/weather.arff"); Instances dt = src.getDataSet(); SimpleKMeans model = new SimpleKMeans(); model.setNumClusters(3); model.buildClusterer(dt); System.out.println(model); ClusterEvaluation eval = new ClusterEvaluation(); DataSource src1 = new DataSource("/Users/admin/Documents/NetBeansProjects/ClusterEval/weather.test.arff"); Instances tdt = src1.getDataSet(); eval.setClusterer(model); eval.evaluateClusterer(tdt); System.out.println(eval.clusterResultsToString()); System.out.println("# of clusters: " + eval.getNumClusters()); } catch(Exception e) { System.out.println(e.getMessage()); } }
Example #6
Source File: Clustering.java From Hands-On-Artificial-Intelligence-with-Java-for-Beginners with MIT License | 6 votes |
/** * @param args the command line arguments */ public static void main(String[] args) { // TODO code application logic here try{ DataSource src = new DataSource("/Users/admin/Documents/NetBeansProjects/Clustering/weather.arff"); Instances dt = src.getDataSet(); SimpleKMeans model = new SimpleKMeans(); model.setNumClusters(3); model.buildClusterer(dt); System.out.println(model); } catch(Exception e){ System.out.println(e.getMessage()); } }
Example #7
Source File: SimpleKMeansClusterer.java From mzmine2 with GNU General Public License v2.0 | 5 votes |
@Override public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) { List<Integer> clusters = new ArrayList<Integer>(); String[] options = new String[2]; SimpleKMeans clusterer = new SimpleKMeans(); int numberOfGroups = parameters.getParameter(SimpleKMeansClustererParameters.numberOfGroups).getValue(); options[0] = "-N"; options[1] = String.valueOf(numberOfGroups); try { clusterer.setOptions(options); clusterer.buildClusterer(dataset); Enumeration<?> e = dataset.enumerateInstances(); while (e.hasMoreElements()) { clusters.add(clusterer.clusterInstance((Instance) e.nextElement())); } ClusteringResult result = new ClusteringResult(clusters, null, clusterer.numberOfClusters(), parameters.getParameter(EMClustererParameters.visualization).getValue()); return result; } catch (Exception ex) { logger.log(Level.SEVERE, null, ex); return null; } }
Example #8
Source File: SimpleKMeansClusterer.java From mzmine3 with GNU General Public License v2.0 | 5 votes |
@Override public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) { List<Integer> clusters = new ArrayList<Integer>(); String[] options = new String[2]; SimpleKMeans clusterer = new SimpleKMeans(); int numberOfGroups = parameters.getParameter(SimpleKMeansClustererParameters.numberOfGroups).getValue(); options[0] = "-N"; options[1] = String.valueOf(numberOfGroups); try { clusterer.setOptions(options); clusterer.buildClusterer(dataset); Enumeration<?> e = dataset.enumerateInstances(); while (e.hasMoreElements()) { clusters.add(clusterer.clusterInstance((Instance) e.nextElement())); } ClusteringResult result = new ClusteringResult(clusters, null, clusterer.numberOfClusters(), parameters.getParameter(EMClustererParameters.visualization).getValue()); return result; } catch (Exception ex) { logger.log(Level.SEVERE, null, ex); return null; } }
Example #9
Source File: Clustering.java From java-ml-projects with Apache License 2.0 | 5 votes |
private List<Series<Number, Number>> buildClusteredSeries() throws Exception { List<XYChart.Series<Number, Number>> clusteredSeries = new ArrayList<>(); // to build the cluster we remove the class information Remove remove = new Remove(); remove.setAttributeIndices("3"); remove.setInputFormat(data); Instances dataToBeClustered = Filter.useFilter(data, remove); SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setSeed(10); kmeans.setPreserveInstancesOrder(true); kmeans.setNumClusters(3); kmeans.buildClusterer(dataToBeClustered); IntStream.range(0, 3).mapToObj(i -> { Series<Number, Number> newSeries = new XYChart.Series<>(); newSeries.setName(String.valueOf(i)); return newSeries; }).forEach(clusteredSeries::add); int[] assignments = kmeans.getAssignments(); for (int i = 0; i < assignments.length; i++) { int clusterNum = assignments[i]; clusteredSeries.get(clusterNum).getData().add(instancetoChartData(data.get(i))); } return clusteredSeries; }
Example #10
Source File: RBFNetwork.java From tsml with GNU General Public License v3.0 | 5 votes |
/** * Returns default capabilities of the classifier, i.e., and "or" of * Logistic and LinearRegression. * * @return the capabilities of this classifier * @see Logistic * @see LinearRegression */ public Capabilities getCapabilities() { Capabilities result = new Logistic().getCapabilities(); result.or(new LinearRegression().getCapabilities()); Capabilities classes = result.getClassCapabilities(); result.and(new SimpleKMeans().getCapabilities()); result.or(classes); return result; }
Example #11
Source File: LearnShapelets.java From tsml with GNU General Public License v3.0 | 5 votes |
public void initializeShapeletsKMeans() throws Exception { //for each scale r, i.e. for each set of K shapelets at // length L_min*(r+1) for (int r=0; r<R; r++) { double[][] segments_r = new double[train.length * numberOfSegments[r]][L[r]]; //construct the segments from the train set. for (int i = 0; i < train.length; i++) for (int j = 0; j < numberOfSegments[r]; j++) for (int l = 0; l < L[r]; l++) segments_r[i * numberOfSegments[r] + j][l] = train[i][j + l]; // normalize segments for (int i = 0; i < train.length; i++) for (int j = 0; j < numberOfSegments[r]; j++) segments_r[i * numberOfSegments[r] + j] = StatisticalUtilities.normalize(segments_r[i * numberOfSegments[r] + j]); Instances ins = InstanceTools.toWekaInstances(segments_r); SimpleKMeans skm = new SimpleKMeans(); skm.setNumClusters(K); skm.setMaxIterations(100); //skm.setInitializeUsingKMeansPlusPlusMethod(true); skm.setSeed((int) (rand.nextDouble() * 1000) ); skm.buildClusterer( ins ); Instances centroidsWeka = skm.getClusterCentroids(); shapelets[r] = InstanceTools.fromWekaInstancesArray(centroidsWeka, false); // initialize the gradient history of shapelets if (shapelets[r] == null) print("P not set"); } }
Example #12
Source File: LearnShapelets.java From tsml with GNU General Public License v3.0 | 5 votes |
public void initializeShapeletsFromFile() throws Exception { //for each scale r, i.e. for each set of K shapelets at // length L_min*(r+1) for (int r=0; r<R; r++) { double[][] segments_r = new double[train.length * numberOfSegments[r]][L[r]]; //construct the segments from the train set. for (int i = 0; i < train.length; i++) for (int j = 0; j < numberOfSegments[r]; j++) for (int l = 0; l < L[r]; l++) segments_r[i * numberOfSegments[r] + j][l] = train[i][j + l]; // normalize segments for (int i = 0; i < train.length; i++) for (int j = 0; j < numberOfSegments[r]; j++) segments_r[i * numberOfSegments[r] + j] = StatisticalUtilities.normalize(segments_r[i * numberOfSegments[r] + j]); Instances ins = InstanceTools.toWekaInstances(segments_r); SimpleKMeans skm = new SimpleKMeans(); skm.setNumClusters(K); skm.setMaxIterations(100); //skm.setInitializeUsingKMeansPlusPlusMethod(true); skm.setSeed((int) (rand.nextDouble() * 1000) ); skm.buildClusterer( ins ); Instances centroidsWeka = skm.getClusterCentroids(); shapelets[r] = InstanceTools.fromWekaInstancesArray(centroidsWeka, false); // initialize the gradient history of shapelets if (shapelets[r] == null) print("P not set"); } }
Example #13
Source File: BoTSWEnsemble.java From tsml with GNU General Public License v3.0 | 5 votes |
public static double compactnessOfClustering(SimpleKMeans kmeans, Instances input) throws Exception { Instances centroids = kmeans.getClusterCentroids(); int[] assignments = kmeans.getAssignments(); double totalSqDist = 0.0; for (int i = 0; i < assignments.length; ++i) { Instance sample = input.get(i); Instance centroid = centroids.get(assignments[i]); for (int j = 0; j < sample.numAttributes(); ++j) totalSqDist += (sample.value(j) - centroid.value(j)) * (sample.value(j) - centroid.value(j)); } return totalSqDist; }
Example #14
Source File: ClassificationViaClustering.java From tsml with GNU General Public License v3.0 | 4 votes |
/** * default constructor */ public ClassificationViaClustering() { super(); m_Clusterer = new SimpleKMeans(); }
Example #15
Source File: Ex06_Clusterers.java From tsml with GNU General Public License v3.0 | 4 votes |
public static void main(String[] args) throws Exception { // We'll use this data throughout, see Ex01_Datahandling int seed = 0; Instances[] trainTest = DatasetLoading.sampleItalyPowerDemand(seed); Instances inst = trainTest[0]; Instances inst2 = trainTest[1]; inst.addAll(inst2); // Create an object from one of the time series or vector clusters implemented. // Call the buildClusterer method with your data. Most clusters will need the number of clusters k to be set. UnsupervisedShapelets us = new UnsupervisedShapelets(); us.setNumberOfClusters(inst.numClasses()); us.buildClusterer(inst); // You can find the cluster assignments for each data instance by calling getAssignments(). // The index of assignments array will match the Instances object, i.e. index 0 with value 1 == first instance // of data assigned to cluster 1. int[] tsAssignments = us.getAssignments(); System.out.println("UnsupervisedShapelets cluster assignments:"); System.out.println(Arrays.toString(tsAssignments)); // A popular metric for cluster evaluation is the Rand index. A utility method is available for calculating // this. double tsRandIndex = ClusteringUtilities.randIndex(tsAssignments, inst); System.out.println("UnsupervisedShapelets Rand index:"); System.out.println(tsRandIndex); // weka also implements a range of clustering algorithms. Any class value must be removed prior to use. Instances copy = new Instances(inst); deleteClassAttribute(copy); SimpleKMeans km = new SimpleKMeans(); km.setNumClusters(inst.numClasses()); km.setPreserveInstancesOrder(true); km.buildClusterer(copy); int[] wekaAssignments = km.getAssignments(); System.out.println("SimpleKMeans cluster assignments:"); System.out.println(Arrays.toString(wekaAssignments)); double wekaRandIndex = ClusteringUtilities.randIndex(wekaAssignments, inst); System.out.println("SimpleKMeans Rand index:"); System.out.println(wekaRandIndex); }
Example #16
Source File: EvaluationUtils.java From AILibs with GNU Affero General Public License v3.0 | 4 votes |
public static double performKernelClustering(final Instances instances, final int numThreads) throws Exception { logger.debug("Starting kernelized cluster evaluation..."); List<Instances> split = WekaUtil.getStratifiedSplit(instances, 42, kernelSplitPortion); ExecutorService execService = Executors.newFixedThreadPool(numThreads); List<Future<Double>> futures = new ArrayList<>(); Future<Double> result0 = execService.submit(() -> performClustering(new Instances(split.get(0))) ); futures.add(result0); for (Map.Entry<Kernel, Instances> entry : getKernelsWithInstances(split.get(0))) { if (Thread.currentThread().isInterrupted()) { throw new InterruptedException(EVALUATION_STOPPED_MESSAGE); } Future<Double> result = execService.submit(() -> { Kernel kernel = entry.getKey(); Instances insts = entry.getValue(); FilteredClusterer clusterer = new FilteredClusterer(); Remove filter = new Remove(); filter.setAttributeIndices("" + (insts.classIndex() + 1)); filter.setInputFormat(insts); Instances removedClassInstances = Filter.useFilter(insts, filter); Nystroem kernelFilter = new Nystroem(); kernelFilter.setKernel(kernel); clusterer.setFilter(kernelFilter); ((SimpleKMeans) clusterer.getClusterer()) .setOptions(new String[]{"-N", String.valueOf(insts.classAttribute().numValues())}); clusterer.buildClusterer(removedClassInstances); ClusterEvaluation clusterEval = new ClusterEvaluation(); clusterEval.setClusterer(clusterer); clusterEval.evaluateClusterer(insts); return predictAccuracy(insts, clusterEval.getClassesToClusters(), clusterEval.getClusterAssignments()); }); futures.add(result); } return evaluateFutures(futures); }
Example #17
Source File: ClusterEvaluationTest.java From AILibs with GNU Affero General Public License v3.0 | 4 votes |
@Test public void evaluateTest() throws Exception { logger.info("Starting cluster evaluation test..."); /* load dataset and create a train-test-split */ OpenmlConnector connector = new OpenmlConnector(); DataSetDescription ds = connector.dataGet(DataSetUtils.SEGMENT_ID); File file = connector.datasetGet(ds); Instances data = new Instances(new BufferedReader(new FileReader(file))); data.setClassIndex(data.numAttributes() - 1); List<Instances> split = StratifyUtil.stratifiedSplit(data, 42, .25); Instances insts = split.get(0); long timeStart = System.currentTimeMillis(); FilteredClusterer clusterer = new FilteredClusterer(); Remove filter = new Remove(); filter.setAttributeIndices("" + (insts.classIndex() + 1)); filter.setInputFormat(insts); Instances removedClassInstances = Filter.useFilter(insts, filter); ((SimpleKMeans) clusterer.getClusterer()).setOptions(new String[] { "-num-slots", String.valueOf(Runtime.getRuntime().availableProcessors()), "-N", String.valueOf(insts.classAttribute().numValues()) }); SimpleKMeans kMeans = (SimpleKMeans) clusterer.getClusterer(); kMeans.setDistanceFunction(new EuclideanDistance()); clusterer.buildClusterer(removedClassInstances); long timeStartEval = System.currentTimeMillis(); ClusterEvaluation clusterEval = new ClusterEvaluation(); clusterEval.setClusterer(clusterer); clusterEval.evaluateClusterer(insts); long timeTaken = System.currentTimeMillis() - timeStart; long timeTakenEval = System.currentTimeMillis() - timeStartEval; logger.debug("ClusterEvaluator results: " + clusterEval.clusterResultsToString()); double acc = EvaluationUtils.predictAccuracy(insts, clusterEval.getClassesToClusters(), clusterEval.getClusterAssignments()); Assert.assertTrue(acc > 0); logger.info("Acc: " + acc); logger.debug("Clustering took " + (timeTaken / 1000) + " s."); logger.debug("Clustering eval took " + (timeTakenEval / 1000) + " s."); }
Example #18
Source File: LearnShapeletsLearningAlgorithm.java From AILibs with GNU Affero General Public License v3.0 | 4 votes |
/** * Initializes the tensor <code>S</code> storing the shapelets for each scale. * The initialization is done by deriving inital shapelets from all normalized * segments. * * @param trainingMatrix * The training matrix used for the initialization of <code>S</code>. * @return Return the initialized tensor storing an initial guess for the * shapelets based on the clustering * @throws TrainingException */ public double[][][] initializeS(final double[][] trainingMatrix) throws TrainingException { LOGGER.debug("Initializing S..."); /* read config locally */ final int scaleR = this.getConfig().scaleR(); final long seed = this.getConfig().seed(); final int minShapeLength = this.getConfig().minShapeletLength(); final double[][][] result = new double[scaleR][][]; for (int r = 0; r < scaleR; r++) { final int numberOfSegments = getNumberOfSegments(this.q, minShapeLength, r); if (numberOfSegments < 1) { throw new TrainingException("The number of segments is lower than 1. Can not train the LearnShapelets model."); } final int L = (r + 1) * minShapeLength; final double[][] tmpSegments = new double[trainingMatrix.length * numberOfSegments][L]; // Prepare training data for finding the centroids for (int i = 0; i < trainingMatrix.length; i++) { for (int j = 0; j < numberOfSegments; j++) { for (int l = 0; l < L; l++) { tmpSegments[i * numberOfSegments + j][l] = trainingMatrix[i][j + l]; } tmpSegments[i * numberOfSegments + j] = TimeSeriesUtil.zNormalize(tmpSegments[i * numberOfSegments + j], USE_BIAS_CORRECTION); } } // Transform instances Instances wekaInstances = WekaTimeseriesUtil.matrixToWekaInstances(tmpSegments); // Cluster using k-Means SimpleKMeans kMeans = new SimpleKMeans(); try { kMeans.setNumClusters(this.getConfig().numShapelets()); kMeans.setSeed((int) seed); kMeans.setMaxIterations(100); kMeans.buildClusterer(wekaInstances); } catch (Exception e) { LOGGER.warn("Could not initialize matrix S using kMeans clustering for r={} due to the following problem: {}. " + "Using zero matrix instead (possibly leading to a poor training performance).", r, e.getMessage()); result[r] = new double[this.getConfig().numShapelets()][r * minShapeLength]; continue; } Instances clusterCentroids = kMeans.getClusterCentroids(); double[][] tmpResult = new double[clusterCentroids.numInstances()][clusterCentroids.numAttributes()]; for (int j = 0; j < tmpResult.length; j++) { double[] instValues = clusterCentroids.get(j).toDoubleArray(); tmpResult[j] = Arrays.copyOf(instValues, tmpResult[j].length); } result[r] = tmpResult; } LOGGER.debug("Initialized S."); return result; }
Example #19
Source File: ClassificationViaClustering.java From tsml with GNU General Public License v3.0 | 2 votes |
/** * String describing default clusterer. * * @return the classname */ protected String defaultClustererString() { return SimpleKMeans.class.getName(); }