net.sf.javaml.core.Dataset Java Examples

The following examples show how to use net.sf.javaml.core.Dataset. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: KMeans2.java    From HMMRATAC with GNU General Public License v3.0 6 votes vote down vote up
public void makeClusteredList3Signals(Dataset[] clusters,HashMap<Integer,MatrixNodeForKMeans> map){
	clusterList = new ArrayList<ATACClusterNode>();
	ATACClusterNode temp = null;
	covMat = new ArrayList<double[][]>();
	for (int i = 0; i < clusters.length;i++){
		Dataset cluster = clusters[i];
		StorelessCovariance cov = new StorelessCovariance(3);
		
		for (int x = 0; x< cluster.size();x++){
			Instance ins = cluster.get(x);
			MatrixNodeForKMeans node = map.get(ins.getID());
			temp = new ATACClusterNode(node.getChrom(),node.getPos(),node.getEnrich1(),node.getEnrich2(),
					node.getEnrich3(),node.getIndex(),i);
			clusterList.add(temp);
			double[] row1 = new double[3];
			row1[0] = node.getEnrich1();
			row1[1] = node.getEnrich2();
			row1[2] = node.getEnrich3();
			cov.increment(row1);
		}
		double[][] covM = cov.getCovarianceMatrix().getData();
		covMat.add(covM);
	}
	clusters = null; map = null;
	Collections.sort(clusterList,ATACClusterNode.positionComparator);
}
 
Example #2
Source File: KMeans2.java    From HMMRATAC with GNU General Public License v3.0 6 votes vote down vote up
public void makeClusteredList2Signals(Dataset[] clusters,HashMap<Integer,MatrixNodeForKMeans> map){
	clusterList = new ArrayList<ATACClusterNode>();
	ATACClusterNode temp = null;
	covMat = new ArrayList<double[][]>();
	for (int i = 0; i < clusters.length;i++){
		Dataset cluster = clusters[i];
		StorelessCovariance cov = new StorelessCovariance(2);
		
		for (int x = 0; x< cluster.size();x++){
			Instance ins = cluster.get(x);
			MatrixNodeForKMeans node = map.get(ins.getID());
			temp = new ATACClusterNode(node.getChrom(),node.getPos(),node.getEnrich1(),node.getEnrich2(),
					node.getEnrich3(),node.getIndex(),i);
			clusterList.add(temp);
			double[] row1 = new double[2];
			row1[0] = node.getEnrich1();
			row1[1] = node.getEnrich2();
			cov.increment(row1);
		}
		double[][] covM = cov.getCovarianceMatrix().getData();
		covMat.add(covM);
	}
	clusters = null; map = null;
	Collections.sort(clusterList,ATACClusterNode.positionComparator);
}
 
Example #3
Source File: JMLNeurophSample.java    From NeurophFramework with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {
    try {
        //create jml dataset
        Dataset jmlDataset = FileHandler.loadDataset(new File("datasets/iris.data"), 4, ",");

        // normalize dataset
        NormalizeMidrange nmr=new NormalizeMidrange(0,1);
        nmr.build(jmlDataset);         
        nmr.filter(jmlDataset);
        
        //print data as read from file
        System.out.println(jmlDataset);

        //convert jml dataset to neuroph
        DataSet neurophDataset = JMLDataSetConverter.convertJMLToNeurophDataset(jmlDataset, 4, 3);
        
        //convert neuroph dataset to jml
        Dataset jml = JMLDataSetConverter.convertNeurophToJMLDataset(neurophDataset);

        //print out both to compare them
        System.out.println("Java-ML data set read from file");
        printDataset(jmlDataset);
        System.out.println("Neuroph data set converted from Java-ML data set");
        printDataset(neurophDataset);
        System.out.println("Java-ML data set reconverted from Neuroph data set");
        printDataset(jml);

        System.out.println("JMLNeuroph classifier test");
        //test NeurophJMLClassifier
        testJMLNeurophClassifier(jmlDataset);

    } catch (Exception ex) {
        Logger.getLogger(JMLNeurophSample.class.getName()).log(Level.SEVERE, null, ex);
    }

}
 
Example #4
Source File: JMLNeurophSample.java    From NeurophFramework with Apache License 2.0 5 votes vote down vote up
/**
 * Prints Java-ML data set
 *
 * @param jmlDataset Dataset Java-ML data set
 */
public static void printDataset(Dataset jmlDataset) {
    System.out.println("JML dataset");
    Iterator iterator = jmlDataset.iterator();

    while (iterator.hasNext()) {
        Instance instance = (Instance) iterator.next();
        System.out.println("inputs");
        System.out.println(instance.values());
        System.out.println(instance.classValue());
    }
}
 
Example #5
Source File: JMLNeurophSample.java    From NeurophFramework with Apache License 2.0 5 votes vote down vote up
/**
 * Converts Java-ML data set to Map
 *
 * @param jmlDataset Dataset Java-ML data set
 * @return Map converted from Java-ML data set
 */
private static Map<double[], String> convertJMLDatasetToMap(Dataset jmlDataset) {

    //number of attributes without class attribute
    int numOfAttributes = jmlDataset.noAttributes();

    //initialize map
    Map<double[], String> itemClassMap = new HashMap<double[], String>();

    //iterate through jml dataset
    for (Instance dataRow : jmlDataset) {

        //initialize double array for values from dataset
        double[] values = new double[numOfAttributes];
        int ind = 0;

        //iterate through values in dataset instance an adding them in double array
        for (Double val : dataRow) {
            values[ind] = val;
            ind++;
        }

        //put attribute values and class value in map
        itemClassMap.put(values, dataRow.classValue().toString());
    }
    return itemClassMap;
}
 
Example #6
Source File: JMLNeurophSample.java    From NeurophFramework with Apache License 2.0 5 votes vote down vote up
/**
 * Test JMLNeurophClassifier
 *
 * @param jmlDataset Dataset Java-ML data set
 */
private static void testJMLNeurophClassifier(Dataset jmlDataset) {
    MultiLayerPerceptron neuralNet = new MultiLayerPerceptron(4, 16, 3);
    
    // set labels for output neurons
    neuralNet.getOutputNeurons().get(0).setLabel("Setosa");
    neuralNet.getOutputNeurons().get(1).setLabel("Versicolor");
    neuralNet.getOutputNeurons().get(2).setLabel("Virginica");

    // initialize NeurophJMLClassifier
    JMLNeurophClassifier jmlnClassifier = new JMLNeurophClassifier(neuralNet);

    // Process Java-ML data set
    jmlnClassifier.buildClassifier(jmlDataset);

    // test item
    //double[] item = {5.1, 3.5, 1.4, 0.2}; // normalized item is below
    double[] item = {-0.27777777777777773, 0.1249999999999999, -0.4322033898305085, -0.45833333333333337};

    // Java-ML instance out of test item
    Instance instance = new DenseInstance(item);

    // why are these not normalised?
    System.out.println("NeurophJMLClassifier - classify of {0.22222222222222213, 0.6249999999999999, 0.06779661016949151, 0.04166666666666667}");
    System.out.println(jmlnClassifier.classify(instance));
    System.out.println("NeurophJMLClassifier - classDistribution of {0.22222222222222213, 0.6249999999999999, 0.06779661016949151, 0.04166666666666667}");
    System.out.println(jmlnClassifier.classDistribution(instance));
}
 
Example #7
Source File: JavaMLClusterers.java    From apogen with Apache License 2.0 5 votes vote down vote up
public static LinkedHashMap<Integer, LinkedList<String>> runKmedoid(String filename, String numClusters,
		boolean distance) throws IOException {

	LinkedHashMap<Integer, LinkedList<String>> output = null;
	Clusterer c = new KMedoids(Integer.parseInt(numClusters), 500, new EuclideanDistance());

	// if (distance) {
	//// c = new KMedoidsDistance(Integer.parseInt(numClusters), 500,
	//// new EuclideanDistance());
	// c = new KMedoids(Integer.parseInt(numClusters), 500, new
	// EuclideanDistance());
	// } else {
	// c = new KMedoids(Integer.parseInt(numClusters), 500,
	// new EuclideanDistance());
	// }

	Dataset data = FileHandler.loadDataset(new File(filename), 0, ",");

	Dataset[] clusters = c.cluster(data);

	output = convert(clusters);

	return output;

}
 
Example #8
Source File: KMeans2.java    From HMMRATAC with GNU General Public License v3.0 5 votes vote down vote up
public KMeans2(Dataset DATA,int K,int iter){
	data = DATA;
	k=K;
	numIter = iter;
	kmeans = new KMeans(k,numIter);
	//kmeans.setUniformInitialCentroids();
}
 
Example #9
Source File: TrackHolder.java    From HMMRATAC with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Access the data as a Dataset, for kmeans
 * @return a Dataset representing the data for kmeans and javaml applications
 */
public Dataset getDataSet(){
	Dataset data = new DefaultDataset();
	for (int i = 0;i < tracks.size();i++){
		DenseInstance ins = new DenseInstance(tracks.get(i));
		//for (int a = 0;a < tracks.get(i).length;a++){
			//System.out.println(tracks.get(i)[a]);
		//}
		data.add(ins);
	}
	
	return data;
}
 
Example #10
Source File: WordFrequency.java    From apogen with Apache License 2.0 4 votes vote down vote up
/**
 * create the dataset for thal frequencies
 * 
 * @return
 */
public Dataset createDatasetThal() {

	for (String k : wordsThalFrequenciesMap.keySet()) {

		Collection<BigDecimal> v = wordsThalFrequenciesMap.get(k).values();
		double[] features = new double[v.size()];
		int count = 0;

		for (BigDecimal bd : v) {
			features[count] = bd.doubleValue();
			count++;
		}

		Instance instance = new DenseInstance(features, k);
		dataThal.add(instance);

	}

	return dataThal;

}
 
Example #11
Source File: KMeans2.java    From HMMRATAC with GNU General Public License v3.0 4 votes vote down vote up
public KMeans2(Dataset DATA,int K){
	data = DATA;
	k=K;
	kmeans = new KMeans(k);
}
 
Example #12
Source File: KMeans2.java    From HMMRATAC with GNU General Public License v3.0 4 votes vote down vote up
public KMeans2(Dataset DATA){
	data = DATA;
	kmeans = new KMeans();
}
 
Example #13
Source File: TagFrequency.java    From apogen with Apache License 2.0 4 votes vote down vote up
/**
 * exports the tags frequencies map in a Java-ML Dataset
 * 
 * @return
 */
public Dataset createDataset() {

	for (String k : tagsFrequenciesMap.keySet()) {

		Collection<BigDecimal> v = tagsFrequenciesMap.get(k).values();
		double[] features = new double[v.size()];
		int count = 0;

		for (BigDecimal bd : v) {
			features[count] = bd.doubleValue();
			count++;
		}

		Instance instance = new DenseInstance(features, k);
		data.add(instance);

	}

	return data;

}
 
Example #14
Source File: JMLNeurophClassifier.java    From NeurophFramework with Apache License 2.0 4 votes vote down vote up
/**
 * Neural network learns from Java-ML data set
 * @param dataSetJML Dataset Java-ML data set
 */
@Override
public void buildClassifier(Dataset dataSetJML) {
    DataSet dataSet = JMLDataSetConverter.convertJMLToNeurophDataset(dataSetJML, neuralNet.getInputsCount(), neuralNet.getOutputsCount());
    neuralNet.learn(dataSet);
}
 
Example #15
Source File: WordFrequency.java    From apogen with Apache License 2.0 4 votes vote down vote up
/**
 * create the dataset for body frequencies
 * 
 * @return
 */
public Dataset createDatasetBody() {

	for (String k : wordsBodyFrequenciesMap.keySet()) {

		Collection<BigDecimal> v = wordsBodyFrequenciesMap.get(k).values();
		double[] features = new double[v.size()];
		int count = 0;

		for (BigDecimal bd : v) {
			features[count] = bd.doubleValue();
			count++;
		}

		Instance instance = new DenseInstance(features, k);
		dataBody.add(instance);

	}

	return dataBody;

}
 
Example #16
Source File: JavaMLClusterers.java    From apogen with Apache License 2.0 4 votes vote down vote up
public static LinkedHashMap<Integer, LinkedList<String>> convert(Dataset[] clusters) {

		LinkedHashMap<Integer, LinkedList<String>> output = new LinkedHashMap<Integer, LinkedList<String>>();

		for (int i = 0; i < clusters.length; i++) {

			LinkedList<String> list = new LinkedList<String>();

			for (int j = 0; j < clusters[i].size(); j++) {
				// System.out.println("\t" + clusters[i].classValue(j));
				list.add("" + clusters[i].classValue(j));
			}

			output.put(new Integer(i), list);
		}

		return output;
	}
 
Example #17
Source File: UrlDistance.java    From apogen with Apache License 2.0 4 votes vote down vote up
/**
 * create the URL distances matrix
 * 
 * @return
 */
public Dataset createDataset() {

	for (String k : urlDistancesMap.keySet()) {

		Collection<BigDecimal> v = urlDistancesMap.get(k).values();
		double[] features = new double[v.size()];
		int count = 0;

		for (BigDecimal bd : v) {
			features[count] = bd.doubleValue();
			count++;
		}

		Instance instance = new DenseInstance(features, k);
		data.add(instance);

	}

	return data;

}
 
Example #18
Source File: DomDistance.java    From apogen with Apache License 2.0 4 votes vote down vote up
public Dataset createDataset() {

		for (String k : domDistancesMap.keySet()) {

			Collection<BigDecimal> v = domDistancesMap.get(k).values();
			double[] features = new double[v.size()];
			int count = 0;

			for (BigDecimal bd : v) {
				features[count] = bd.doubleValue();
				count++;
			}

			Instance instance = new DenseInstance(features, k);
			data.add(instance);

		}

		return data;

	}
 
Example #19
Source File: KMeansToHMM.java    From HMMRATAC with GNU General Public License v3.0 3 votes vote down vote up
/**
 * Constructor for creating new KMeansToHMM object
 * @param d a Dataset containing the data
 * @param K an integer representing the number of states to cluster
 * @param numIter an integer representing the number of iterations for kmeans clustering
 * @param diag a boolean to determine whether the resulting covariance matrix should be diagonal
 * @param equal a boolean to determine whether the initial probability vector should be equal
 * @param equal2 a boolean to determine whether the transition probability matrix should be equal
 */
@SuppressWarnings("unchecked")
public KMeansToHMM(Dataset d,int K,int numIter,boolean diag,boolean equal,boolean equal2){
	build(d,K,numIter, diag, equal,equal2);
	sort((Hmm<ObservationVector>) hmm);
	
}
 
Example #20
Source File: KMeans2.java    From HMMRATAC with GNU General Public License v3.0 2 votes vote down vote up
public Dataset[] cluster(){
	clusteredData = kmeans.cluster(data);
	
	return clusteredData;
	
}