Java Code Examples for weka.clusterers.HierarchicalClusterer#buildClusterer()
The following examples show how to use
weka.clusterers.HierarchicalClusterer#buildClusterer() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: WekaHierarchicalClustering.java From Java-Data-Analysis with MIT License | 6 votes |
public static void main(String[] args) { Instances dataset = load(DATA); HierarchicalClusterer hc = new HierarchicalClusterer(); hc.setLinkType(new SelectedTag(4, TAGS_LINK_TYPE)); // CENTROID hc.setNumClusters(3); try { hc.buildClusterer(dataset); for (Instance instance : dataset) { System.out.printf("(%.0f,%.0f): %s%n", instance.value(0), instance.value(1), hc.clusterInstance(instance)); } } catch (Exception e) { System.err.println(e); } }
Example 2
Source File: WekaHierarchicalClustering2.java From Java-Data-Analysis with MIT License | 6 votes |
public static void main(String[] args) { Instances dataset = load(DATA); HierarchicalClusterer hc = new HierarchicalClusterer(); hc.setLinkType(new SelectedTag(4, TAGS_LINK_TYPE)); // CENTROID hc.setNumClusters(1); try { hc.buildClusterer(dataset); for (Instance instance : dataset) { System.out.printf("(%.0f,%.0f): %s%n", instance.value(0), instance.value(1), hc.clusterInstance(instance)); } displayDendrogram(hc.graph()); } catch (Exception e) { System.err.println(e); } }
Example 3
Source File: WekaClusterers.java From apogen with Apache License 2.0 | 5 votes |
/** * Run WEKA Hierarchical clustering on the parameter ARFF file searching for * numClusters clusters * * @param filename * @param numClusters * @param linkage * @return * @throws Exception */ public static LinkedHashMap<Integer, LinkedList<String>> runHierarchical(String filename, String numClusters, String linkage) throws Exception { String[] options = new String[6]; options[0] = "-t"; options[1] = filename; options[2] = "-N"; options[3] = numClusters; options[4] = "-L"; options[5] = linkage; HierarchicalClusterer c = new HierarchicalClusterer(); c.setNumClusters(Integer.parseInt(numClusters)); c.setDebug(false); c.setPrintNewick(true); BufferedReader reader = new BufferedReader(new FileReader(filename)); ArffReader arff = new ArffReader(reader); Instances data = arff.getData(); data.setClassIndex(0); c.buildClusterer(data); LinkedHashMap<Integer, LinkedList<String>> output = new LinkedHashMap<Integer, LinkedList<String>>(); // initialize clusters map for (int i = 0; i < Integer.parseInt(numClusters); i++) { output.put(new Integer(i), new LinkedList<String>()); } for (Instance instance : data) { // System.out.println(instance.stringValue(0) + "\t" + // c.clusterInstance(instance)); output.get(c.clusterInstance(instance)).add(instance.stringValue(0)); } return output; }
Example 4
Source File: APICallClustererMAPO.java From api-mining with GNU General Public License v3.0 | 5 votes |
/** * Cluster API call sequences as described in MAPO * * @return Multimap of cluster IDs to API call sequences */ public static Multimap<Integer, String> clusterAPICallSeqs(final String arffFile, final double threshold) throws Exception { // Clusterer settings final HierarchicalClusterer clusterer = new HierarchicalClusterer(); clusterer.setOptions(new String[] { "-L", "COMPLETE" }); // link type clusterer.setDebug(true); clusterer.setNumClusters(1); clusterer.setDistanceFunction(MAPOSimilarity); clusterer.setDistanceIsBranchLength(false); // Read in API call seqs final DataSource source = new DataSource(arffFile); final Instances data = source.getDataSet(); // Cluster API call seqs clusterer.buildClusterer(data); // Assign seqs to clusters based on dendrogram final String newick = clusterer.graph().replace("Newick:", "") + ":0"; if (newick.equals("(no,clusters):0")) // Handle no clusters return HashMultimap.create(); final Multimap<Integer, String> clusters = NewickTreeParser.getClusters(newick, threshold); System.out.println("No. clusters: " + clusters.keySet().size()); final Multimap<Integer, String> assignments = HashMultimap.create(); for (int i = 0; i < data.numInstances(); i++) { for (final int id : clusters.keySet()) { if (clusters.get(id).contains(data.instance(i).stringValue(0))) assignments.put(id, data.instance(i).stringValue(1)); } } // showDendrogram(clusterer); return assignments; }
Example 5
Source File: APICallClustererUPMiner.java From api-mining with GNU General Public License v3.0 | 5 votes |
/** * Cluster API call sequences as described in UPMiner * * @return Multimap of cluster IDs to API call sequences */ public static Multimap<Integer, String> clusterAPICallSeqs(final String arffFile, final double threshold) throws Exception { // Clusterer settings final HierarchicalClusterer clusterer = new HierarchicalClusterer(); clusterer.setOptions(new String[] { "-L", "COMPLETE" }); // link type clusterer.setDebug(true); clusterer.setNumClusters(1); clusterer.setDistanceFunction(SeqSimilarity); clusterer.setDistanceIsBranchLength(false); // Read in API call seqs final DataSource source = new DataSource(arffFile); final Instances data = source.getDataSet(); // Cluster API call seqs clusterer.buildClusterer(data); // Assign seqs to clusters based on dendrogram final String newick = clusterer.graph().replace("Newick:", "") + ":0"; if (newick.equals("(no,clusters):0")) // Handle no clusters return HashMultimap.create(); final Multimap<Integer, String> clusters = NewickTreeParser.getClusters(newick, threshold); System.out.println("No. clusters: " + clusters.keySet().size()); final Multimap<Integer, String> assignments = HashMultimap.create(); for (int i = 0; i < data.numInstances(); i++) { for (final int id : clusters.keySet()) { if (clusters.get(id).contains(data.instance(i).stringValue(0))) assignments.put(id, data.instance(i).stringValue(1)); } } // showDendrogram(clusterer); return assignments; }
Example 6
Source File: HierarClusterer.java From mzmine3 with GNU General Public License v2.0 | 4 votes |
@Override public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) { HierarchicalClusterer clusterer = new HierarchicalClusterer(); String[] options = new String[5]; LinkType link = parameters.getParameter(HierarClustererParameters.linkType).getValue(); DistanceType distanceType = parameters.getParameter(HierarClustererParameters.distanceType).getValue(); options[0] = "-L"; options[1] = link.name(); options[2] = "-A"; switch (distanceType) { case EUCLIDIAN: options[3] = "weka.core.EuclideanDistance"; break; case CHEBYSHEV: options[3] = "weka.core.ChebyshevDistance"; break; case MANHATTAN: options[3] = "weka.core.ManhattanDistance"; break; case MINKOWSKI: options[3] = "weka.core.MinkowskiDistance"; break; } options[4] = "-P"; try { clusterer.setOptions(options); clusterer.setPrintNewick(true); clusterer.buildClusterer(dataset); // clusterer.graph() gives only the first cluster and in the case // there // are more than one cluster the variables in the second cluster are // missing. // I'm using clusterer.toString() which contains all the clusters in // Newick format. ClusteringResult result = new ClusteringResult(null, clusterer.toString(), clusterer.getNumClusters(), null); return result; } catch (Exception ex) { logger.log(Level.SEVERE, null, ex); return null; } }
Example 7
Source File: HierarClusterer.java From mzmine2 with GNU General Public License v2.0 | 4 votes |
@Override public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) { HierarchicalClusterer clusterer = new HierarchicalClusterer(); String[] options = new String[5]; LinkType link = parameters.getParameter(HierarClustererParameters.linkType).getValue(); DistanceType distanceType = parameters.getParameter(HierarClustererParameters.distanceType).getValue(); options[0] = "-L"; options[1] = link.name(); options[2] = "-A"; switch (distanceType) { case EUCLIDIAN: options[3] = "weka.core.EuclideanDistance"; break; case CHEBYSHEV: options[3] = "weka.core.ChebyshevDistance"; break; case MANHATTAN: options[3] = "weka.core.ManhattanDistance"; break; case MINKOWSKI: options[3] = "weka.core.MinkowskiDistance"; break; } options[4] = "-P"; try { clusterer.setOptions(options); clusterer.setPrintNewick(true); clusterer.buildClusterer(dataset); // clusterer.graph() gives only the first cluster and in the case // there // are more than one cluster the variables in the second cluster are // missing. // I'm using clusterer.toString() which contains all the clusters in // Newick format. ClusteringResult result = new ClusteringResult(null, clusterer.toString(), clusterer.getNumClusters(), null); return result; } catch (Exception ex) { logger.log(Level.SEVERE, null, ex); return null; } }