weka.core.EuclideanDistance Java Exaples

Source File: ED1NN.java From tsml with GNU General Public License v3.0

6 votes

public final double distance(Instance first, Instance second, double cutoff) {

        // base case - we're assuming class val is last. If this is true, this method is fine,
        // if not, we'll default to the DTW class
        if (first.classIndex() != first.numAttributes() - 1 || second.classIndex() != second.numAttributes() - 1) {
            EuclideanDistance temp = new EuclideanDistance();
            temp.setDontNormalize(true);
            return temp.distance(first, second, cutoff);
        }

        double sum = 0;
        for (int a = 0; a < first.numAttributes() - 1; a++) {
            sum += (first.value(a) - second.value(a)) * (first.value(a) - second.value(a));
        }

//        return Math.sqrt(sum);
        return sum;
    }

Source File: LLGC.java From collective-classification-weka-package with GNU General Public License v3.0

6 votes

/**
 * performs initialization of members
 */
@Override
protected void initializeMembers() {
  super.initializeMembers();
  
  m_TrainsetNew          = null;
  m_TestsetNew           = null;
  m_Alpha                = 0.99;
  m_Sigma                = 1.0;
  m_Repeats              = 0;
  m_SequenceLimit        = SEQ_LIMIT_GRAPHKERNEL;
  m_filterType           = SMO.FILTER_NORMALIZE;
  m_IncludeNumAttributes = true;
  m_MatrixY              = null;
  m_MatrixW              = null;
  m_MatrixD              = null;
  m_MatrixS              = null;
  m_MatrixFStar          = null;
  m_Data                 = null;
  m_DistanceFunction     = new EuclideanDistance();
}

Source File: KNNAugSpaceSampler.java From AILibs with GNU Affero General Public License v3.0

6 votes

/**
 * @param nearestNeighbour The nearest neighbour search algorithm to use.
 * @author Michael
 *
 */
public KNNAugSpaceSampler(final Instances preciseInsts, final Random rng, final int k, final NearestNeighbourSearch nearestNeighbour) {
	super(preciseInsts, rng);
	this.k = k;
	DistanceFunction dist = new EuclideanDistance(preciseInsts);
	String distOptionColumns = String.format("-R first-%d", preciseInsts.numAttributes() - 1);
	String[] distOptions = {distOptionColumns};

	try {
		dist.setOptions(distOptions);
		nearestNeighbour.setDistanceFunction(dist);
		nearestNeighbour.setInstances(preciseInsts);
	} catch (Exception e) {
		logger.error("Could not configure distance function or setup nearest neighbour: {}", e);
	}
	nearestNeighbour.setMeasurePerformance(false);
	this.nearestNeighbour = nearestNeighbour;
}

Source File: TopDownConstructor.java From tsml with GNU General Public License v3.0

6 votes

/**
 * Builds the ball tree top down. 
 * @return The root node of the tree. 
 * @throws Exception If there is problem building
 * the tree.
 */
public BallNode buildTree() throws Exception {
  BallNode root;
  
  m_NumNodes = m_MaxDepth = 0;
  m_NumLeaves = 1;
  
  m_Splitter.setInstances(m_Instances);
  m_Splitter.setInstanceList(m_InstList);
  m_Splitter.
  setEuclideanDistanceFunction((EuclideanDistance)m_DistanceFunction);
  
  root = new BallNode(0, m_InstList.length-1, 0);
  root.setPivot(BallNode.calcCentroidPivot(m_InstList, m_Instances));
  root.setRadius(BallNode.calcRadius(m_InstList, m_Instances, root.getPivot(), m_DistanceFunction));
  
  splitNodes(root, m_MaxDepth+1, root.m_Radius);
  
  return root; 
}

Source File: NearestNeighbourSearch.java From tsml with GNU General Public License v3.0

6 votes

/**
 * Parses a given list of options. Valid options are:
 *
 <!-- options-start -->
 <!-- options-end -->
 *
 * @param options 	the list of options as an array of strings
 * @throws Exception 	if an option is not supported
 */
public void setOptions(String[] options) throws Exception {
  String nnSearchClass = Utils.getOption('A', options);
  if(nnSearchClass.length() != 0) {
    String nnSearchClassSpec[] = Utils.splitOptions(nnSearchClass);
    if(nnSearchClassSpec.length == 0) { 
      throw new Exception("Invalid DistanceFunction specification string."); 
    }
    String className = nnSearchClassSpec[0];
    nnSearchClassSpec[0] = "";

    setDistanceFunction( (DistanceFunction)
                          Utils.forName( DistanceFunction.class, 
                                         className, nnSearchClassSpec) );
  }
  else {
    setDistanceFunction(new EuclideanDistance());
  }
  
  setMeasurePerformance(Utils.getFlag('P',options));
}

Source File: BallTree.java From tsml with GNU General Public License v3.0

6 votes

/**
 * Builds the BallTree on the supplied set of 
 * instances/points (supplied with setInstances(Instances) 
 * method and referenced by the m_Instances field). This
 * method should not be called by outside classes. They
 * should only use setInstances(Instances) method.
 * 
 * @throws Exception If no instances are supplied 
 * (m_Instances is null), or if some other error in the 
 * supplied BallTreeConstructor occurs while building 
 * the tree.  
 */
protected void buildTree() throws Exception {
  if(m_Instances==null)
    throw new Exception("No instances supplied yet. Have to call " +
                        "setInstances(instances) with a set of Instances " +
                        "first.");
  m_InstList = new int[m_Instances.numInstances()];
  
  for(int i=0; i<m_InstList.length; i++) {
    m_InstList[i] = i;
  } //end for
  
  m_DistanceFunction.setInstances(m_Instances);
  m_TreeConstructor.setInstances(m_Instances);
  m_TreeConstructor.setInstanceList(m_InstList);
  m_TreeConstructor.setEuclideanDistanceFunction(
                    (EuclideanDistance)m_DistanceFunction);
  
  m_Root = m_TreeConstructor.buildTree();
}

Source File: SimpleKMeans.java From tsml with GNU General Public License v3.0

5 votes

/**
 * sets the distance function to use for instance comparison.
 * 
 * @param df the new distance function to use
 * @throws Exception if instances cannot be processed
 */
public void setDistanceFunction(DistanceFunction df) throws Exception {
  if (!(df instanceof EuclideanDistance)
      && !(df instanceof ManhattanDistance)) {
    throw new Exception(
        "SimpleKMeans currently only supports the Euclidean and Manhattan distances.");
  }
  m_DistanceFunction = df;
}

Source File: SimpleKMeans.java From tsml with GNU General Public License v3.0

5 votes

/**
 * clusters an instance that has been through the filters.
 * 
 * @param instance the instance to assign a cluster to
 * @param updateErrors if true, update the within clusters sum of errors
 * @param useFastDistCalc whether to use the fast distance calculation or not
 * @return a cluster number
 */
private int clusterProcessedInstance(Instance instance, boolean updateErrors,
    boolean useFastDistCalc) {
  double minDist = Integer.MAX_VALUE;
  int bestCluster = 0;
  for (int i = 0; i < m_NumClusters; i++) {
    double dist;
    if (useFastDistCalc)
      dist = m_DistanceFunction.distance(instance,
          m_ClusterCentroids.instance(i), minDist);
    else
      dist = m_DistanceFunction.distance(instance,
          m_ClusterCentroids.instance(i));
    if (dist < minDist) {
      minDist = dist;
      bestCluster = i;
    }
  }
  if (updateErrors) {
    if (m_DistanceFunction instanceof EuclideanDistance) {
      // Euclidean distance to Squared Euclidean distance
      minDist *= minDist;
    }
    m_squaredErrors[bestCluster] += minDist;
  }
  return bestCluster;
}

Source File: EnsembleProvider.java From AILibs with GNU Affero General Public License v3.0

5 votes

/**
 * Initializes the CAWPE ensemble model consisting of five classifiers (SMO,
 * KNN, J48, Logistic and MLP) using a majority voting strategy. The ensemble
 * uses Weka classifiers. It refers to "Heterogeneous ensemble of standard
 * classification algorithms" (HESCA) as described in Lines, Jason & Taylor,
 * Sarah & Bagnall, Anthony. (2018). Time Series Classification with HIVE-COTE:
 * The Hierarchical Vote Collective of Transformation-Based Ensembles. ACM
 * Transactions on Knowledge Discovery from Data. 12. 1-35. 10.1145/3182382.
 *
 * @param seed
 *            Seed used within the classifiers and the majority confidence
 *            voting scheme
 * @param numFolds
 *            Number of folds used within the determination of the classifier
 *            weights for the {@link MajorityConfidenceVote}
 * @return Returns an initialized (but untrained) ensemble model.
 * @throws Exception
 *             Thrown when the initialization has failed
 */
public static Classifier provideCAWPEEnsembleModel(final int seed, final int numFolds) throws Exception {
	Classifier[] classifiers = new Classifier[5];

	Vote voter = new MajorityConfidenceVote(numFolds, seed);

	SMO smo = new SMO();
	smo.turnChecksOff();
	smo.setBuildCalibrationModels(true);
	PolyKernel kl = new PolyKernel();
	kl.setExponent(1);
	smo.setKernel(kl);
	smo.setRandomSeed(seed);
	classifiers[0] = smo;

	IBk k = new IBk(100);
	k.setCrossValidate(true);
	EuclideanDistance ed = new EuclideanDistance();
	ed.setDontNormalize(true);
	k.getNearestNeighbourSearchAlgorithm().setDistanceFunction(ed);
	classifiers[1] = k;

	J48 c45 = new J48();
	c45.setSeed(seed);
	classifiers[2] = c45;

	classifiers[3] = new Logistic();

	classifiers[4] = new MultilayerPerceptron();

	voter.setClassifiers(classifiers);
	return voter;
}

Source File: CAWPE.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Uses the 'basic UCI' set up:
 * Comps: SVML, MLP, NN, Logistic, C4.5
 * Weight: TrainAcc(4) (train accuracies to the power 4)
 * Vote: MajorityConfidence (summing probability distributions)
 */
public final void setupDefaultSettings_NoLogistic() {
    this.ensembleName = "CAWPE-NoLogistic";
    
    this.weightingScheme = new TrainAcc(4);
    this.votingScheme = new MajorityConfidence();
    
    CrossValidationEvaluator cv = new CrossValidationEvaluator(seed, false, false, false, false); 
    cv.setNumFolds(10);
    this.trainEstimator = cv; 
    
    Classifier[] classifiers = new Classifier[4];
    String[] classifierNames = new String[4];

    SMO smo = new SMO();
    smo.turnChecksOff();
    smo.setBuildLogisticModels(true);
    PolyKernel kl = new PolyKernel();
    kl.setExponent(1);
    smo.setKernel(kl);
    smo.setRandomSeed(seed);
    classifiers[0] = smo;
    classifierNames[0] = "SVML";

    kNN k=new kNN(100);
    k.setCrossValidate(true);
    k.normalise(false);
    k.setDistanceFunction(new EuclideanDistance());
    classifiers[1] = k;
    classifierNames[1] = "NN";

    classifiers[2] = new J48();
    classifierNames[2] = "C4.5";

    classifiers[3] = new MultilayerPerceptron();
    classifierNames[3] = "MLP";

    setClassifiers(classifiers, classifierNames, null);
}

Source File: KDTree.java From tsml with GNU General Public License v3.0

5 votes

/**
 * sets the distance function to use for nearest neighbour search.
 * 
 * @param df		the distance function to use
 * @throws Exception	if not EuclideanDistance
 */
public void setDistanceFunction(DistanceFunction df) throws Exception {
  if (!(df instanceof EuclideanDistance))
    throw new Exception("KDTree currently only works with "
        + "EuclideanDistanceFunction.");
  m_DistanceFunction = m_EuclideanDistance = (EuclideanDistance) df;
}

Source File: KDTree.java From tsml with GNU General Public License v3.0

5 votes

/**
 * Builds the KDTree on the supplied set of instances/points. It 
 * is adviseable to run the replace missing attributes filter 
 * on the passed instances first.
 * NOTE: This method should not be called from outside this 
 * class. Outside classes should call setInstances(Instances)
 * instead.
 * 
 * @param instances	The instances to build the tree on
 * @throws Exception	if something goes wrong
 */
protected void buildKDTree(Instances instances) throws Exception {

  checkMissing(instances);
  if (m_EuclideanDistance == null)
    m_DistanceFunction = m_EuclideanDistance = new EuclideanDistance(
        instances);
  else
    m_EuclideanDistance.setInstances(instances);

  m_Instances = instances;
  int numInst = m_Instances.numInstances();

  // Make the global index list
  m_InstList = new int[numInst];

  for (int i = 0; i < numInst; i++) {
    m_InstList[i] = i;
  }

  double[][] universe = m_EuclideanDistance.getRanges();

  // initializing internal fields of KDTreeSplitter
  m_Splitter.setInstances(m_Instances);
  m_Splitter.setInstanceList(m_InstList);
  m_Splitter.setEuclideanDistanceFunction(m_EuclideanDistance);
  m_Splitter.setNodeWidthNormalization(m_NormalizeNodeWidth);

  // building tree
  m_NumNodes = m_NumLeaves = 1;
  m_MaxDepth = 0;
  m_Root = new KDTreeNode(m_NumNodes, 0, m_Instances.numInstances() - 1,
      universe);

  splitNodes(m_Root, universe, m_MaxDepth + 1);
}

Source File: CoverTree.java From tsml with GNU General Public License v3.0

5 votes

/** 
 * Builds the tree on the given set of instances.
 * P.S.: For internal use only. Outside classes 
 * should call setInstances(). 
 * @param insts The instances on which to build 
 * the cover tree.
 * @throws Exception If the supplied set of 
 * Instances is empty, or if there are missing
 * values. 
 */
protected void buildCoverTree(Instances insts) throws Exception {
  if (insts.numInstances() == 0)
    throw new Exception(
 "CoverTree: Empty set of instances. Cannot build tree.");
  checkMissing(insts);
  if (m_EuclideanDistance == null)
    m_DistanceFunction = m_EuclideanDistance = new EuclideanDistance(insts);
  else
    m_EuclideanDistance.setInstances(insts);
  
  Stack<DistanceNode> point_set = new Stack<DistanceNode>();
  Stack<DistanceNode> consumed_set = new Stack<DistanceNode>();

  Instance point_p = insts.instance(0); int p_idx = 0;
  double max_dist=-1, dist=0.0; Instance max_q=point_p;
  
  for (int i = 1; i < insts.numInstances(); i++) {
    DistanceNode temp = new DistanceNode();
    temp.dist = new Stack<Double>();
    dist = Math.sqrt(m_DistanceFunction.distance(point_p, insts.instance(i), Double.POSITIVE_INFINITY));
    if(dist > max_dist) {
      max_dist = dist; max_q = insts.instance(i);
    }
    temp.dist.push(dist);
    temp.idx = i;
    point_set.push(temp);
  }
  
    max_dist = max_set(point_set);
    m_Root = batch_insert(p_idx, get_scale(max_dist), get_scale(max_dist),
                          point_set, consumed_set);
}

Source File: SimpleKMeansWithSilhouette.java From apogen with Apache License 2.0

5 votes

/**
 * clusters an instance that has been through the filters.
 * 
 * @param instance
 *            the instance to assign a cluster to
 * @param updateErrors
 *            if true, update the within clusters sum of errors
 * @param useFastDistCalc
 *            whether to use the fast distance calculation or not
 * @param instanceCanopies
 *            the canopies covering the instance to be clustered, or null if not
 *            using the option to reduce the number of distance computations via
 *            canopies
 * @return a cluster number
 */
private int clusterProcessedInstance(Instance instance, boolean updateErrors, boolean useFastDistCalc,
		long[] instanceCanopies) {
	double minDist = Integer.MAX_VALUE;
	int bestCluster = 0;
	for (int i = 0; i < m_NumClusters; i++) {
		double dist;
		if (useFastDistCalc) {
			if (m_speedUpDistanceCompWithCanopies && instanceCanopies != null && instanceCanopies.length > 0) {
				try {
					if (!Canopy.nonEmptyCanopySetIntersection(m_centroidCanopyAssignments.get(i),
							instanceCanopies)) {
						continue;
					}
				} catch (Exception ex) {
					ex.printStackTrace();
				}
				dist = m_DistanceFunction.distance(instance, m_ClusterCentroids.instance(i), minDist);
			} else {
				dist = m_DistanceFunction.distance(instance, m_ClusterCentroids.instance(i), minDist);
			}
		} else {
			dist = m_DistanceFunction.distance(instance, m_ClusterCentroids.instance(i));
		}
		if (dist < minDist) {
			minDist = dist;
			bestCluster = i;
		}
	}
	if (updateErrors) {
		if (m_DistanceFunction instanceof EuclideanDistance) {
			// Euclidean distance to Squared Euclidean distance
			minDist *= minDist * instance.weight();
		}
		m_squaredErrors[bestCluster] += minDist;
	}
	return bestCluster;
}

Source File: BasicDTW.java From tsml with GNU General Public License v3.0

4 votes

public static void main(String[] args){
//Test BasicDTW
        Instances test = DatasetLoading.loadDataNullable("C:\\Users\\ajb\\Dropbox\\test\\Beef");
        BasicDTW dtw=new BasicDTW(test);
        EuclideanDistance ed=new EuclideanDistance(test);
        ed.setDontNormalize(true);
        System.out.println(" DATA \n"+test.toString());
        System.out.println(" ED ="+ed.distance(test.instance(0),test.instance(1)));
        
        
        
        System.out.println(" ED ="+ed.distance(test.instance(0),test.instance(1),2));
        System.out.println(" DTW ="+dtw.distance(test.instance(0),test.instance(1)));
        System.out.println(" DTW ="+dtw.distance(test.instance(0),test.instance(1),1));



//Test Early abandon
        
        
        
    }

Source File: CAWPE.java From tsml with GNU General Public License v3.0

4 votes

/**
     * Comps: NN, SVML, SVMQ, C4.5, NB,  RotF, RandF, BN,
     * Weight: TrainAcc
     * Vote: MajorityVote
     *
     * As used originally in ST_HESCA, COTE.
     * NOTE the original also contained Bayes Net (BN). We have removed it because the classifier crashes
     * unpredictably when discretising features (due to lack of variance in the feature, but not easily detected and
     * dealt with
     *
     */
    public final void setupOriginalHESCASettings() {
        this.ensembleName = "HESCA";
        
        this.weightingScheme = new TrainAcc();
        this.votingScheme = new MajorityVote();
        
        CrossValidationEvaluator cv = new CrossValidationEvaluator(seed, false, false, false, false); 
        cv.setNumFolds(10);
        this.trainEstimator = cv; 
        int numClassifiers=7;
        Classifier[] classifiers = new Classifier[numClassifiers];
        String[] classifierNames = new String[numClassifiers];

        kNN k=new kNN(100);
        k.setCrossValidate(true);
        k.normalise(false);
        k.setDistanceFunction(new EuclideanDistance());
        classifiers[0] = k;
        classifierNames[0] = "NN";

        classifiers[1] = new NaiveBayes();
        classifierNames[1] = "NB";

        classifiers[2] = new J48();
        classifierNames[2] = "C45";

        SMO svml = new SMO();
        svml.turnChecksOff();
        PolyKernel kl = new PolyKernel();
        kl.setExponent(1);
        svml.setKernel(kl);
        svml.setRandomSeed(seed);
        classifiers[3] = svml;
        classifierNames[3] = "SVML";

        SMO svmq =new SMO();
//Assumes no missing, all real valued and a discrete class variable
        svmq.turnChecksOff();
        PolyKernel kq = new PolyKernel();
        kq.setExponent(2);
        svmq.setKernel(kq);
        svmq.setRandomSeed(seed);
        classifiers[4] =svmq;
        classifierNames[4] = "SVMQ";

        RandomForest r=new RandomForest();
        r.setNumTrees(500);
        r.setSeed(seed);
        classifiers[5] = r;
        classifierNames[5] = "RandF";

        RotationForest rf=new RotationForest();
        rf.setNumIterations(50);
        rf.setSeed(seed);
        classifiers[6] = rf;
        classifierNames[6] = "RotF";

//        classifiers[7] = new BayesNet();
//        classifierNames[7] = "bayesNet";

        setClassifiers(classifiers, classifierNames, null);
    }

Source File: CAWPE.java From tsml with GNU General Public License v3.0

4 votes

/**
 * Uses the 'basic UCI' set up:
 * Comps: SVML, MLP, NN, Logistic, C4.5
 * Weight: TrainAcc(4) (train accuracies to the power 4)
 * Vote: MajorityConfidence (summing probability distributions)
 */
@Override //Abstract Ensemble 
public final void setupDefaultEnsembleSettings() {
    this.ensembleName = "CAWPE";
    
    this.weightingScheme = new TrainAcc(4);
    this.votingScheme = new MajorityConfidence();
    this.transform = null;
    
    CrossValidationEvaluator cv = new CrossValidationEvaluator(seed, false, false, false, false); 
    cv.setNumFolds(10);
    this.trainEstimator = cv; 

    Classifier[] classifiers = new Classifier[5];
    String[] classifierNames = new String[5];

    SMO smo = new SMO();
    smo.turnChecksOff();
    smo.setBuildLogisticModels(true);
    PolyKernel kl = new PolyKernel();
    kl.setExponent(1);
    smo.setKernel(kl);
    smo.setRandomSeed(seed);
    classifiers[0] = smo;
    classifierNames[0] = "SVML";

    kNN k=new kNN(100);
    k.setCrossValidate(true);
    k.normalise(false);
    k.setDistanceFunction(new EuclideanDistance());
    classifiers[1] = k;
    classifierNames[1] = "NN";

    classifiers[2] = new J48();
    classifierNames[2] = "C4.5";

    classifiers[3] = new Logistic();
    classifierNames[3] = "Logistic";

    classifiers[4] = new MultilayerPerceptron();
    classifierNames[4] = "MLP";
    
    setClassifiers(classifiers, classifierNames, null);
}

Source File: HierarchicalClusterer.java From tsml with GNU General Public License v3.0

4 votes

/**
 * Parses a given list of options. <p/>
 *
  <!-- options-start -->
 * Valid options are: <p/>
 * 
  <!-- options-end -->
 *
 * @param options the list of options as an array of strings
 * @throws Exception if an option is not supported
 */
public void setOptions(String[] options) throws Exception {
  m_bPrintNewick = Utils.getFlag('P', options);

  String optionString = Utils.getOption('N', options); 
  if (optionString.length() != 0) {
    Integer temp = new Integer(optionString);
    setNumClusters(temp);
  }
  else {
    setNumClusters(2);
  }

  setDebug(Utils.getFlag('D', options));
  setDistanceIsBranchLength(Utils.getFlag('B', options));

  String sLinkType = Utils.getOption('L', options);


  if (sLinkType.compareTo("SINGLE") == 0) {setLinkType(new SelectedTag(SINGLE, TAGS_LINK_TYPE));}
  if (sLinkType.compareTo("COMPLETE") == 0) {setLinkType(new SelectedTag(COMPLETE, TAGS_LINK_TYPE));}
  if (sLinkType.compareTo("AVERAGE") == 0) {setLinkType(new SelectedTag(AVERAGE, TAGS_LINK_TYPE));}
  if (sLinkType.compareTo("MEAN") == 0) {setLinkType(new SelectedTag(MEAN, TAGS_LINK_TYPE));}
  if (sLinkType.compareTo("CENTROID") == 0) {setLinkType(new SelectedTag(CENTROID, TAGS_LINK_TYPE));}
  if (sLinkType.compareTo("WARD") == 0) {setLinkType(new SelectedTag(WARD, TAGS_LINK_TYPE));}
  if (sLinkType.compareTo("ADJCOMLPETE") == 0) {setLinkType(new SelectedTag(ADJCOMLPETE, TAGS_LINK_TYPE));}
  if (sLinkType.compareTo("NEIGHBOR_JOINING") == 0) {setLinkType(new SelectedTag(NEIGHBOR_JOINING, TAGS_LINK_TYPE));}

  String nnSearchClass = Utils.getOption('A', options);
  if(nnSearchClass.length() != 0) {
    String nnSearchClassSpec[] = Utils.splitOptions(nnSearchClass);
    if(nnSearchClassSpec.length == 0) { 
      throw new Exception("Invalid DistanceFunction specification string."); 
    }
    String className = nnSearchClassSpec[0];
    nnSearchClassSpec[0] = "";

    setDistanceFunction( (DistanceFunction)
        Utils.forName( DistanceFunction.class, 
            className, nnSearchClassSpec) );
  }
  else {
    setDistanceFunction(new EuclideanDistance());
  }

  Utils.checkForRemainingOptions(options);
}

Source File: KMedoids.java From apogen with Apache License 2.0

4 votes

/**
 * Sets the options
 * 
 * @param options
 *            a list of options as an array of strings
 * @throws Exception
 *             if an option is not support
 */
public void setOptions(String[] options) throws Exception {

	// Set the number of the cluster
	String optionString = Utils.getOption('N', options);
	if (optionString.length() != 0) {
		setNumClusters(Integer.parseInt(optionString));
	}

	// Set the number of the maximum iterations
	optionString = Utils.getOption("I", options);
	if (optionString.length() != 0) {
		setMaxIterations(Integer.parseInt(optionString));
	}

	// Set the repeat times
	optionString = Utils.getOption("J", options);
	if (optionString.length() != 0) {
		setRepeatTimes(Integer.parseInt(optionString));
	}

	// Set the distance function
	String distFunctionClass = Utils.getOption('A', options);
	if (distFunctionClass.length() != 0) {
		String distFunctionClassSpec[] = Utils.splitOptions(distFunctionClass);
		if (distFunctionClassSpec.length == 0) {
			throw new Exception("Invalid DistanceFunction specification string.");
		}
		String className = distFunctionClassSpec[0];
		distFunctionClassSpec[0] = "";

		setDistanceFunction(
				(DistanceFunction) Utils.forName(DistanceFunction.class, className, distFunctionClassSpec));
	} else {
		setDistanceFunction(new EuclideanDistance());
	}

	// Set whether to output the cluster result
	m_SaveClusterResult = Utils.getFlag("s", options);

	// Other options
	super.setOptions(options);
}

Source File: ClusterEvaluationTest.java From AILibs with GNU Affero General Public License v3.0

4 votes

@Test
public void evaluateTest() throws Exception {
	logger.info("Starting cluster evaluation test...");

	/* load dataset and create a train-test-split */
	OpenmlConnector connector = new OpenmlConnector();
	DataSetDescription ds = connector.dataGet(DataSetUtils.SEGMENT_ID);
	File file = connector.datasetGet(ds);
	Instances data = new Instances(new BufferedReader(new FileReader(file)));
	data.setClassIndex(data.numAttributes() - 1);
	List<Instances> split = StratifyUtil.stratifiedSplit(data, 42, .25);

	Instances insts = split.get(0);

	long timeStart = System.currentTimeMillis();

	FilteredClusterer clusterer = new FilteredClusterer();

	Remove filter = new Remove();
	filter.setAttributeIndices("" + (insts.classIndex() + 1));
	filter.setInputFormat(insts);
	Instances removedClassInstances = Filter.useFilter(insts, filter);

	((SimpleKMeans) clusterer.getClusterer()).setOptions(new String[] { "-num-slots", String.valueOf(Runtime.getRuntime().availableProcessors()), "-N", String.valueOf(insts.classAttribute().numValues()) });
	SimpleKMeans kMeans = (SimpleKMeans) clusterer.getClusterer();
	kMeans.setDistanceFunction(new EuclideanDistance());

	clusterer.buildClusterer(removedClassInstances);

	long timeStartEval = System.currentTimeMillis();

	ClusterEvaluation clusterEval = new ClusterEvaluation();
	clusterEval.setClusterer(clusterer);
	clusterEval.evaluateClusterer(insts);

	long timeTaken = System.currentTimeMillis() - timeStart;
	long timeTakenEval = System.currentTimeMillis() - timeStartEval;

	logger.debug("ClusterEvaluator results: " + clusterEval.clusterResultsToString());

	double acc = EvaluationUtils.predictAccuracy(insts, clusterEval.getClassesToClusters(), clusterEval.getClusterAssignments());
	Assert.assertTrue(acc > 0);
	logger.info("Acc: " + acc);
	logger.debug("Clustering took " + (timeTaken / 1000) + " s.");
	logger.debug("Clustering eval took " + (timeTakenEval / 1000) + " s.");
}

Source File: ED1NN.java From tsml with GNU General Public License v3.0

4 votes

public static void runComparison() throws Exception {
        String tscProbDir = "C:/users/sjx07ngu/Dropbox/TSC Problems/";

//        String datasetName = "ItalyPowerDemand";
//        String datasetName = "GunPoint";
//        String datasetName = "Beef";
//        String datasetName = "Coffee";
        String datasetName = "SonyAiboRobotSurface1";

        double r = 0.1;
        Instances train = DatasetLoading.loadDataNullable(tscProbDir + datasetName + "/" + datasetName + "_TRAIN");
        Instances test = DatasetLoading.loadDataNullable(tscProbDir + datasetName + "/" + datasetName + "_TEST");

        // old version
        kNN knn = new kNN(); //efaults to k = 1 without any normalisation
        EuclideanDistance oldED = new EuclideanDistance();
        oldED.setDontNormalize(true);
        knn.setDistanceFunction(oldED);
        knn.buildClassifier(train);

        // new version
        ED1NN edNew = new ED1NN();
        edNew.buildClassifier(train);

        int correctOld = 0;
        int correctNew = 0;

        long start, end, oldTime, newTime;
        double pred;

        // classification with old MSM class and kNN
        start = System.nanoTime();

        correctOld = 0;
        for (int i = 0; i < test.numInstances(); i++) {
            pred = knn.classifyInstance(test.instance(i));
            if (pred == test.instance(i).classValue()) {
                correctOld++;
            }
        }
        end = System.nanoTime();
        oldTime = end - start;

        // classification with new MSM and own 1NN
        start = System.nanoTime();
        correctNew = 0;
        for (int i = 0; i < test.numInstances(); i++) {
            pred = edNew.classifyInstance(test.instance(i));
            if (pred == test.instance(i).classValue()) {
                correctNew++;
            }
        }
        end = System.nanoTime();
        newTime = end - start;

        System.out.println("Comparison of MSM: " + datasetName);
        System.out.println("==========================================");
        System.out.println("Old acc:    " + ((double) correctOld / test.numInstances()));
        System.out.println("New acc:    " + ((double) correctNew / test.numInstances()));
        System.out.println("Old timing: " + oldTime);
        System.out.println("New timing: " + newTime);
        System.out.println("Relative Performance: " + ((double) newTime / oldTime));
    }

Source File: CoverTree.java From tsml with GNU General Public License v3.0

3 votes

/**
 * Sets the distance function to use for nearest neighbour search.
 * Currently only EuclideanDistance is supported.
 * 
 * @param df 		the distance function to use 
 * @throws Exception 	if not EuclideanDistance
 */
public void setDistanceFunction(DistanceFunction df) throws Exception {
  if (!(df instanceof EuclideanDistance))
    throw new Exception("CoverTree currently only works with "
 + "EuclideanDistanceFunction.");
  m_DistanceFunction = m_EuclideanDistance = (EuclideanDistance) df;
}

Source File: SimpleKMeansWithSilhouette.java From apogen with Apache License 2.0

3 votes

/**
 * sets the distance function to use for instance comparison.
 * 
 * @param df
 *            the new distance function to use
 * @throws Exception
 *             if instances cannot be processed
 */
public void setDistanceFunction(DistanceFunction df) throws Exception {
	if (!(df instanceof EuclideanDistance) && !(df instanceof ManhattanDistance)) {
		throw new Exception("SimpleKMeans currently only supports the Euclidean and Manhattan distances.");
	}
	m_DistanceFunction = df;
}

Source File: KMedoids.java From apogen with Apache License 2.0

3 votes

/**
 * Sets the distance function to use for instance comparison.
 * 
 * @param df
 *            the new distance function to use
 * @throws Exception
 *             if df is not EuclideanDistance or ManhattanDistance
 */
public void setDistanceFunction(DistanceFunction df) throws Exception {
	if ((df instanceof EuclideanDistance) || (df instanceof ManhattanDistance)) {
		m_DistanceFunction = df;
	} else {
		throw new Exception("MyPAM only support the Euclidean or Manhattan distance.");
	}
}

Source File: KDTreeNodeSplitter.java From tsml with GNU General Public License v3.0

2 votes

/**
 * Sets the EuclideanDistance object to use for 
 * splitting nodes.
 * @param func The EuclideanDistance object.
 */
public void setEuclideanDistanceFunction(EuclideanDistance func) {
  m_EuclideanDistance = func;
}

Source File: KDTreeNodeSplitter.java From tsml with GNU General Public License v3.0

2 votes

/**
 * Creates a new instance of KDTreeNodeSplitter.
 * @param instList Reference of the master index array.
 * @param insts The set of training instances on which 
 * the tree is built.
 * @param e The EuclideanDistance object that is used
 * in tree contruction.
 */
public KDTreeNodeSplitter(int[] instList, Instances insts, EuclideanDistance e) { 
  m_InstList = instList;
  m_Instances = insts;
  m_EuclideanDistance = e;
}

Source File: BallTreeConstructor.java From tsml with GNU General Public License v3.0

2 votes

/**
 * Sets the distance function to use to build the 
 * tree.
 * @param func The distance function.
 */
public void setEuclideanDistanceFunction(EuclideanDistance func) {
  m_DistanceFunction = func;
}

Source File: MedianOfWidestDimension.java From tsml with GNU General Public License v3.0

2 votes

/**
 * Constructor. 
 * @param instList The master index array.
 * @param insts The instances on which the tree
 * is (or is to be) built.
 * @param e The Euclidean distance function to 
 * use for splitting.
 */
public MedianOfWidestDimension(int[] instList, Instances insts, 
                               EuclideanDistance e) {
  super(instList, insts, e);
}

Source File: BallSplitter.java From tsml with GNU General Public License v3.0

2 votes

/**
 * Sets the distance function used to (or to be used 
 * to) build the tree. 
 * @param func The distance function. 
 */
public void setEuclideanDistanceFunction(EuclideanDistance func) {
  m_DistanceFunction = func;
}

Source File: BallSplitter.java From tsml with GNU General Public License v3.0

2 votes

/**
 * Creates a new instance of BallSplitter.
 * @param instList The master index array.
 * @param insts The instances on which the tree
 * is (or is to be) built.
 * @param e The Euclidean distance function to 
 * use for splitting.
 */
public BallSplitter(int[] instList, Instances insts, EuclideanDistance e) { 
  m_Instlist = instList;
  m_Instances = insts;
  m_DistanceFunction = e;
}

weka.core.EuclideanDistance Java Examples