Java Code Examples for weka.clusterers.HierarchicalClusterer#buildClusterer()

The following examples show how to use weka.clusterers.HierarchicalClusterer#buildClusterer() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: WekaHierarchicalClustering.java    From Java-Data-Analysis with MIT License 6 votes vote down vote up
public static void main(String[] args) {
    Instances dataset = load(DATA);
    HierarchicalClusterer hc = new HierarchicalClusterer();
    hc.setLinkType(new SelectedTag(4, TAGS_LINK_TYPE));  // CENTROID
    hc.setNumClusters(3);
    try {
        hc.buildClusterer(dataset);
        for (Instance instance : dataset) {
            System.out.printf("(%.0f,%.0f): %s%n", 
                    instance.value(0), instance.value(1), 
                    hc.clusterInstance(instance));
        }
    } catch (Exception e) {
        System.err.println(e);
    }
}
 
Example 2
Source File: WekaHierarchicalClustering2.java    From Java-Data-Analysis with MIT License 6 votes vote down vote up
public static void main(String[] args) {
    Instances dataset = load(DATA);
    HierarchicalClusterer hc = new HierarchicalClusterer();
    hc.setLinkType(new SelectedTag(4, TAGS_LINK_TYPE));  // CENTROID
    hc.setNumClusters(1);
    try {
        hc.buildClusterer(dataset);
        for (Instance instance : dataset) {
            System.out.printf("(%.0f,%.0f): %s%n", 
                    instance.value(0), instance.value(1), 
                    hc.clusterInstance(instance));
        }
        displayDendrogram(hc.graph());
    } catch (Exception e) {
        System.err.println(e);
    }
}
 
Example 3
Source File: WekaClusterers.java    From apogen with Apache License 2.0 5 votes vote down vote up
/**
 * Run WEKA Hierarchical clustering on the parameter ARFF file searching for
 * numClusters clusters
 * 
 * @param filename
 * @param numClusters
 * @param linkage
 * @return
 * @throws Exception
 */
public static LinkedHashMap<Integer, LinkedList<String>> runHierarchical(String filename, String numClusters,
		String linkage) throws Exception {

	String[] options = new String[6];
	options[0] = "-t";
	options[1] = filename;
	options[2] = "-N";
	options[3] = numClusters;
	options[4] = "-L";
	options[5] = linkage;

	HierarchicalClusterer c = new HierarchicalClusterer();

	c.setNumClusters(Integer.parseInt(numClusters));
	c.setDebug(false);
	c.setPrintNewick(true);

	BufferedReader reader = new BufferedReader(new FileReader(filename));
	ArffReader arff = new ArffReader(reader);
	Instances data = arff.getData();
	data.setClassIndex(0);

	c.buildClusterer(data);

	LinkedHashMap<Integer, LinkedList<String>> output = new LinkedHashMap<Integer, LinkedList<String>>();

	// initialize clusters map
	for (int i = 0; i < Integer.parseInt(numClusters); i++) {
		output.put(new Integer(i), new LinkedList<String>());
	}

	for (Instance instance : data) {
		// System.out.println(instance.stringValue(0) + "\t" +
		// c.clusterInstance(instance));
		output.get(c.clusterInstance(instance)).add(instance.stringValue(0));
	}

	return output;
}
 
Example 4
Source File: APICallClustererMAPO.java    From api-mining with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Cluster API call sequences as described in MAPO
 *
 * @return Multimap of cluster IDs to API call sequences
 */
public static Multimap<Integer, String> clusterAPICallSeqs(final String arffFile, final double threshold)
		throws Exception {

	// Clusterer settings
	final HierarchicalClusterer clusterer = new HierarchicalClusterer();
	clusterer.setOptions(new String[] { "-L", "COMPLETE" }); // link type
	clusterer.setDebug(true);
	clusterer.setNumClusters(1);
	clusterer.setDistanceFunction(MAPOSimilarity);
	clusterer.setDistanceIsBranchLength(false);

	// Read in API call seqs
	final DataSource source = new DataSource(arffFile);
	final Instances data = source.getDataSet();

	// Cluster API call seqs
	clusterer.buildClusterer(data);

	// Assign seqs to clusters based on dendrogram
	final String newick = clusterer.graph().replace("Newick:", "") + ":0";
	if (newick.equals("(no,clusters):0")) // Handle no clusters
		return HashMultimap.create();
	final Multimap<Integer, String> clusters = NewickTreeParser.getClusters(newick, threshold);
	System.out.println("No. clusters: " + clusters.keySet().size());
	final Multimap<Integer, String> assignments = HashMultimap.create();
	for (int i = 0; i < data.numInstances(); i++) {
		for (final int id : clusters.keySet()) {
			if (clusters.get(id).contains(data.instance(i).stringValue(0)))
				assignments.put(id, data.instance(i).stringValue(1));
		}
	}

	// showDendrogram(clusterer);

	return assignments;
}
 
Example 5
Source File: APICallClustererUPMiner.java    From api-mining with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Cluster API call sequences as described in UPMiner
 *
 * @return Multimap of cluster IDs to API call sequences
 */
public static Multimap<Integer, String> clusterAPICallSeqs(final String arffFile, final double threshold)
		throws Exception {

	// Clusterer settings
	final HierarchicalClusterer clusterer = new HierarchicalClusterer();
	clusterer.setOptions(new String[] { "-L", "COMPLETE" }); // link type
	clusterer.setDebug(true);
	clusterer.setNumClusters(1);
	clusterer.setDistanceFunction(SeqSimilarity);
	clusterer.setDistanceIsBranchLength(false);

	// Read in API call seqs
	final DataSource source = new DataSource(arffFile);
	final Instances data = source.getDataSet();

	// Cluster API call seqs
	clusterer.buildClusterer(data);

	// Assign seqs to clusters based on dendrogram
	final String newick = clusterer.graph().replace("Newick:", "") + ":0";
	if (newick.equals("(no,clusters):0")) // Handle no clusters
		return HashMultimap.create();
	final Multimap<Integer, String> clusters = NewickTreeParser.getClusters(newick, threshold);
	System.out.println("No. clusters: " + clusters.keySet().size());
	final Multimap<Integer, String> assignments = HashMultimap.create();
	for (int i = 0; i < data.numInstances(); i++) {
		for (final int id : clusters.keySet()) {
			if (clusters.get(id).contains(data.instance(i).stringValue(0)))
				assignments.put(id, data.instance(i).stringValue(1));
		}
	}

	// showDendrogram(clusterer);

	return assignments;
}
 
Example 6
Source File: HierarClusterer.java    From mzmine3 with GNU General Public License v2.0 4 votes vote down vote up
@Override
public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) {
  HierarchicalClusterer clusterer = new HierarchicalClusterer();
  String[] options = new String[5];
  LinkType link = parameters.getParameter(HierarClustererParameters.linkType).getValue();
  DistanceType distanceType =
      parameters.getParameter(HierarClustererParameters.distanceType).getValue();
  options[0] = "-L";
  options[1] = link.name();
  options[2] = "-A";
  switch (distanceType) {
    case EUCLIDIAN:
      options[3] = "weka.core.EuclideanDistance";
      break;
    case CHEBYSHEV:
      options[3] = "weka.core.ChebyshevDistance";
      break;
    case MANHATTAN:
      options[3] = "weka.core.ManhattanDistance";
      break;
    case MINKOWSKI:
      options[3] = "weka.core.MinkowskiDistance";
      break;
  }

  options[4] = "-P";
  try {
    clusterer.setOptions(options);
    clusterer.setPrintNewick(true);
    clusterer.buildClusterer(dataset);
    // clusterer.graph() gives only the first cluster and in the case
    // there
    // are more than one cluster the variables in the second cluster are
    // missing.
    // I'm using clusterer.toString() which contains all the clusters in
    // Newick format.
    ClusteringResult result =
        new ClusteringResult(null, clusterer.toString(), clusterer.getNumClusters(), null);
    return result;
  } catch (Exception ex) {
    logger.log(Level.SEVERE, null, ex);
    return null;
  }
}
 
Example 7
Source File: HierarClusterer.java    From mzmine2 with GNU General Public License v2.0 4 votes vote down vote up
@Override
public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) {
  HierarchicalClusterer clusterer = new HierarchicalClusterer();
  String[] options = new String[5];
  LinkType link = parameters.getParameter(HierarClustererParameters.linkType).getValue();
  DistanceType distanceType =
      parameters.getParameter(HierarClustererParameters.distanceType).getValue();
  options[0] = "-L";
  options[1] = link.name();
  options[2] = "-A";
  switch (distanceType) {
    case EUCLIDIAN:
      options[3] = "weka.core.EuclideanDistance";
      break;
    case CHEBYSHEV:
      options[3] = "weka.core.ChebyshevDistance";
      break;
    case MANHATTAN:
      options[3] = "weka.core.ManhattanDistance";
      break;
    case MINKOWSKI:
      options[3] = "weka.core.MinkowskiDistance";
      break;
  }

  options[4] = "-P";
  try {
    clusterer.setOptions(options);
    clusterer.setPrintNewick(true);
    clusterer.buildClusterer(dataset);
    // clusterer.graph() gives only the first cluster and in the case
    // there
    // are more than one cluster the variables in the second cluster are
    // missing.
    // I'm using clusterer.toString() which contains all the clusters in
    // Newick format.
    ClusteringResult result =
        new ClusteringResult(null, clusterer.toString(), clusterer.getNumClusters(), null);
    return result;
  } catch (Exception ex) {
    logger.log(Level.SEVERE, null, ex);
    return null;
  }
}