org.apache.commons.math3.ml.distance.EuclideanDistance Java Exaples

Source File: EuclideanDistanceEvaluator.java From lucene-solr with Apache License 2.0

6 votes

@Override
@SuppressWarnings({"unchecked"})
public Object doWork(Object first, Object second) throws IOException{
  if(null == first){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the first value",toExpression(constructingFactory)));
  }
  if(null == second){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the second value",toExpression(constructingFactory)));
  }
  if(!(first instanceof List<?>)){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for the first value, expecting a list of numbers",toExpression(constructingFactory), first.getClass().getSimpleName()));
  }
  if(!(second instanceof List<?>)){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for the second value, expecting a list of numbers",toExpression(constructingFactory), first.getClass().getSimpleName()));
  }

  EuclideanDistance distance = new EuclideanDistance();
  return distance.compute(
    ((List)first).stream().mapToDouble(value -> ((BigDecimal)value).doubleValue()).toArray(),
    ((List)second).stream().mapToDouble(value -> ((BigDecimal)value).doubleValue()).toArray()
  );
}

Source File: WeightVectorNeighborhood.java From jMetal with MIT License

6 votes

private void initializeNeighborhood() {
  EuclideanDistance euclideanDistance = new EuclideanDistance();
  double[] x = new double[numberOfWeightVectors];
  int[] idx = new int[numberOfWeightVectors];

  for (int i = 0; i < numberOfWeightVectors; i++) {
    // calculate the distances based on weight vectors
    for (int j = 0; j < numberOfWeightVectors; j++) {
      x[j] = euclideanDistance.compute(weightVector[i], weightVector[j]);
      idx[j] = j;
    }

    // find 'niche' nearest neighboring subproblems
    minFastSort(x, idx, numberOfWeightVectors, neighborhoodSize);

    System.arraycopy(idx, 0, neighborhood[i], 0, neighborhoodSize);
  }
}

Source File: HierarchicalClustering.java From HMMRATAC with GNU General Public License v3.0

6 votes

private void iterate(){
	ArrayList<ClusterNode> temp = new ArrayList<ClusterNode>();
	EuclideanDistance ed = new EuclideanDistance();
	for (int i = 0; i < clusters.size();i++){
		double min = Double.POSITIVE_INFINITY;
		int best = -1;
		for (int a = 0; a < clusters.size();a++){
			if (i != a){
				double dis = ed.compute(clusters.get(i).getKey(), clusters.get(a).getKey());
				if (dis < min){
					min = dis;
					best = a;
				}
			}
		}
		
	}
}

Source File: KMeansPlusPlusClustererTest.java From astor with GNU General Public License v2.0

6 votes

/**
 * 2 variables cannot be clustered into 3 clusters. See issue MATH-436.
 */
@Test(expected=NumberIsTooSmallException.class)
public void testPerformClusterAnalysisToManyClusters() {
    KMeansPlusPlusClusterer<DoublePoint> transformer = 
        new KMeansPlusPlusClusterer<DoublePoint>(3, 1, new EuclideanDistance(), random);
    
    DoublePoint[] points = new DoublePoint[] {
        new DoublePoint(new int[] {
            1959, 325100
        }), new DoublePoint(new int[] {
            1960, 373200
        })
    };
    
    transformer.cluster(Arrays.asList(points));

}

Source File: KMeansPlusPlusClustererTest.java From astor with GNU General Public License v2.0

6 votes

/**
 * 2 variables cannot be clustered into 3 clusters. See issue MATH-436.
 */
@Test(expected=NumberIsTooSmallException.class)
public void testPerformClusterAnalysisToManyClusters() {
    KMeansPlusPlusClusterer<DoublePoint> transformer = 
        new KMeansPlusPlusClusterer<DoublePoint>(3, 1, new EuclideanDistance(), random);
    
    DoublePoint[] points = new DoublePoint[] {
        new DoublePoint(new int[] {
            1959, 325100
        }), new DoublePoint(new int[] {
            1960, 373200
        })
    };
    
    transformer.cluster(Arrays.asList(points));

}

Source File: KMeansPlusPlusClustererTest.java From astor with GNU General Public License v2.0

6 votes

/**
 * 2 variables cannot be clustered into 3 clusters. See issue MATH-436.
 */
@Test(expected=NumberIsTooSmallException.class)
public void testPerformClusterAnalysisToManyClusters() {
    KMeansPlusPlusClusterer<DoublePoint> transformer = 
        new KMeansPlusPlusClusterer<DoublePoint>(3, 1, new EuclideanDistance(), random);
    
    DoublePoint[] points = new DoublePoint[] {
        new DoublePoint(new int[] {
            1959, 325100
        }), new DoublePoint(new int[] {
            1960, 373200
        })
    };
    
    transformer.cluster(Arrays.asList(points));

}

Source File: KMeansPlusPlusClustererTest.java From astor with GNU General Public License v2.0

6 votes

/**
 * 2 variables cannot be clustered into 3 clusters. See issue MATH-436.
 */
@Test(expected=NumberIsTooSmallException.class)
public void testPerformClusterAnalysisToManyClusters() {
    KMeansPlusPlusClusterer<DoublePoint> transformer = 
        new KMeansPlusPlusClusterer<DoublePoint>(3, 1, new EuclideanDistance(), random);
    
    DoublePoint[] points = new DoublePoint[] {
        new DoublePoint(new int[] {
            1959, 325100
        }), new DoublePoint(new int[] {
            1960, 373200
        })
    };
    
    transformer.cluster(Arrays.asList(points));

}

Source File: DBScanModel.java From egads with GNU General Public License v3.0

5 votes

@Override
public void tune(DataSequence observedSeries,
                 DataSequence expectedSeries) throws Exception {
    // Compute the time-series of errors.
    HashMap<String, ArrayList<Float>> allErrors = aes.initAnomalyErrors(observedSeries, expectedSeries);
    List<IdentifiedDoublePoint> points = new ArrayList<IdentifiedDoublePoint>();
    EuclideanDistance ed = new EuclideanDistance();
    int n = observedSeries.size();
    
    for (int i = 0; i < n; i++) {
        double[] d = new double[(aes.getIndexToError().keySet()).size()];
       
        for (int e = 0; e < (aes.getIndexToError().keySet()).size(); e++) {
             d[e] = allErrors.get(aes.getIndexToError().get(e)).get(i);
        }
        points.add(new IdentifiedDoublePoint(d, i));
    }
    
    double sum = 0.0;
    double count = 0.0;
    for (int i = 0; i < n; i++) {
        for (int j = 0; j < n; j++) {
            sum += ed.compute(points.get(i).getPoint(), points.get(j).getPoint());
            count++;
        }
    }
    eps = ((double) this.sDAutoSensitivity) * (sum / count);   
    minPoints = ((int) Math.ceil(((double) this.amntAutoSensitivity) * ((double) n)));     
    dbscan = new DBSCANClusterer<IdentifiedDoublePoint>(eps, minPoints);
}

Source File: FuzzyKmeansEvaluator.java From lucene-solr with Apache License 2.0

4 votes

@Override
@SuppressWarnings({"unchecked"})
public Object doWork(Object value1, Object value2) throws IOException {


  Matrix matrix = null;
  int k = 0;


  if(value1 instanceof Matrix) {
    matrix = (Matrix)value1;
  } else {
    throw new IOException("The first parameter for fuzzyKmeans should be the observation matrix.");
  }

  if(value2 instanceof Number) {
    k = ((Number)value2).intValue();
  } else {
    throw new IOException("The second parameter for fuzzyKmeans should be k.");
  }

  @SuppressWarnings({"rawtypes"})
  FuzzyKMeansClusterer<KmeansEvaluator.ClusterPoint> kmeans = new FuzzyKMeansClusterer(k,
                                                                                       fuzziness,
                                                                                       maxIterations,
                                                                                       new EuclideanDistance());
  List<KmeansEvaluator.ClusterPoint> points = new ArrayList<>();
  double[][] data = matrix.getData();

  List<String> ids = matrix.getRowLabels();

  for(int i=0; i<data.length; i++) {
    double[] vec = data[i];
    points.add(new KmeansEvaluator.ClusterPoint(ids.get(i), vec));
  }

  @SuppressWarnings({"rawtypes"})
  Map fields = new HashMap();

  fields.put("k", k);
  fields.put("fuzziness", fuzziness);
  fields.put("distance", "euclidean");
  fields.put("maxIterations", maxIterations);

  List<CentroidCluster<KmeansEvaluator.ClusterPoint>> clusters = kmeans.cluster(points);
  RealMatrix realMatrix = kmeans.getMembershipMatrix();
  double[][] mmData = realMatrix.getData();
  Matrix mmMatrix = new Matrix(mmData);
  mmMatrix.setRowLabels(matrix.getRowLabels());
  List<String> clusterCols = new ArrayList<>();
  for(int i=0; i<clusters.size(); i++) {
    clusterCols.add("cluster"+ ZplotStream.pad(Integer.toString(i), clusters.size()));
  }
  mmMatrix.setRowLabels(clusterCols);
  return new KmeansEvaluator.ClusterTuple(fields, clusters, matrix.getColumnLabels(),mmMatrix);
}

Source File: SumOfClusterVariancesTest.java From astor with GNU General Public License v2.0

4 votes

@Before
public void setUp() {
    evaluator = new SumOfClusterVariances<DoublePoint>(new EuclideanDistance());
}

Source File: SumOfClusterVariancesTest.java From astor with GNU General Public License v2.0

4 votes

@Before
public void setUp() {
    evaluator = new SumOfClusterVariances<DoublePoint>(new EuclideanDistance());
}

Source File: EuclideanEvaluator.java From lucene-solr with Apache License 2.0

4 votes

@Override
public Object evaluate(Tuple tuple) throws IOException {
  return new EuclideanDistance();
}

Source File: Stats.java From gama with GNU General Public License v3.0

4 votes

@operator (
		value = "kmeans",
		can_be_const = false,
		type = IType.LIST,
		category = { IOperatorCategory.STATISTICAL },
		concept = { IConcept.STATISTIC, IConcept.CLUSTERING })
@doc (
		value = "returns the list of clusters (list of instance indices) computed with the kmeans++ "
				+ "algorithm from the first operand data according to the number of clusters to split"
				+ " the data into (k) and the maximum number of iterations to run the algorithm for "
				+ "(If negative, no maximum will be used) (maxIt). Usage: kmeans(data,k,maxit)",
		special_cases = "if the lengths of two vectors in the right-hand aren't equal, returns 0",
		examples = { @example (
				value = "kmeans ([[2,4,5], [3,8,2], [1,1,3], [4,3,4]],2,10)",
				equals = "[[0,2,3],[1]]") })
public static IList<IList> KMeansPlusplusApache(final IScope scope, final IList data, final Integer k,
		final Integer maxIt) throws GamaRuntimeException {
	final MersenneTwister rand = new MersenneTwister(scope.getRandom().getSeed().longValue());

	final List<DoublePoint> instances = new ArrayList<>();
	for (int i = 0; i < data.size(); i++) {
		final IList d = (IList) data.get(i);
		final double point[] = new double[d.size()];
		for (int j = 0; j < d.size(); j++) {
			point[j] = Cast.asFloat(scope, d.get(j));
		}
		instances.add(new Instance(i, point));
	}
	final KMeansPlusPlusClusterer<DoublePoint> kmeans =
			new KMeansPlusPlusClusterer<>(k, maxIt, new EuclideanDistance(), rand);
	final List<CentroidCluster<DoublePoint>> clusters = kmeans.cluster(instances);
	try (final Collector.AsList results = Collector.getList()) {
		for (final Cluster<DoublePoint> cl : clusters) {
			final IList clG = GamaListFactory.create();
			for (final DoublePoint pt : cl.getPoints()) {
				clG.addValue(scope, ((Instance) pt).getId());
			}
			results.add(clG);
		}
		return results.items();
	}
}

Source File: FuzzyKMeansClusterer.java From astor with GNU General Public License v2.0

2 votes

/**
 * Creates a new instance of a FuzzyKMeansClusterer.
 * <p>
 * The euclidean distance will be used as default distance measure.
 *
 * @param k the number of clusters to split the data into
 * @param fuzziness the fuzziness factor, must be &gt; 1.0
 * @throws NumberIsTooSmallException if {@code fuzziness <= 1.0}
 */
public FuzzyKMeansClusterer(final int k, final double fuzziness) throws NumberIsTooSmallException {
    this(k, fuzziness, -1, new EuclideanDistance());
}

Source File: DBSCANClusterer.java From egads with GNU General Public License v3.0

2 votes

/**
 * Creates a new instance of a DBSCANClusterer.
 * <p>
 * The euclidean distance will be used as default distance measure.
 *
 * @param eps maximum radius of the neighborhood to be considered
 * @param minPts minimum number of points needed for a cluster
 * @throws NotPositiveException if {@code eps < 0.0} or {@code minPts < 0}
 */
public DBSCANClusterer(final double eps, final int minPts)
    throws NotPositiveException {
    this(eps, minPts, new EuclideanDistance());
}

Source File: FuzzyKMeansClusterer.java From astor with GNU General Public License v2.0

2 votes

/**
 * Creates a new instance of a FuzzyKMeansClusterer.
 * <p>
 * The euclidean distance will be used as default distance measure.
 *
 * @param k the number of clusters to split the data into
 * @param fuzziness the fuzziness factor, must be &gt; 1.0
 * @throws NumberIsTooSmallException if {@code fuzziness <= 1.0}
 */
public FuzzyKMeansClusterer(final int k, final double fuzziness) throws NumberIsTooSmallException {
    this(k, fuzziness, -1, new EuclideanDistance());
}

Source File: KMeansPlusPlusClusterer.java From astor with GNU General Public License v2.0

2 votes

/** Build a clusterer.
 * <p>
 * The default strategy for handling empty clusters that may appear during
 * algorithm iterations is to split the cluster with largest distance variance.
 * <p>
 * The euclidean distance will be used as default distance measure.
 *
 * @param k the number of clusters to split the data into
 * @param maxIterations the maximum number of iterations to run the algorithm for.
 *   If negative, no maximum will be used.
 */
public KMeansPlusPlusClusterer(final int k, final int maxIterations) {
    this(k, maxIterations, new EuclideanDistance());
}

Source File: DBSCANClusterer.java From astor with GNU General Public License v2.0

2 votes

/**
 * Creates a new instance of a DBSCANClusterer.
 * <p>
 * The euclidean distance will be used as default distance measure.
 *
 * @param eps maximum radius of the neighborhood to be considered
 * @param minPts minimum number of points needed for a cluster
 * @throws NotPositiveException if {@code eps < 0.0} or {@code minPts < 0}
 */
public DBSCANClusterer(final double eps, final int minPts)
    throws NotPositiveException {
    this(eps, minPts, new EuclideanDistance());
}

Source File: ClusterEvaluator.java From astor with GNU General Public License v2.0

2 votes

/**
 * Creates a new cluster evaluator with an {@link EuclideanDistance}
 * as distance measure.
 */
public ClusterEvaluator() {
    this(new EuclideanDistance());
}

Source File: ClusterEvaluator.java From astor with GNU General Public License v2.0

2 votes

/**
 * Creates a new cluster evaluator with an {@link EuclideanDistance}
 * as distance measure.
 */
public ClusterEvaluator() {
    this(new EuclideanDistance());
}

Source File: FuzzyKMeansClusterer.java From astor with GNU General Public License v2.0

2 votes

/**
 * Creates a new instance of a FuzzyKMeansClusterer.
 * <p>
 * The euclidean distance will be used as default distance measure.
 *
 * @param k the number of clusters to split the data into
 * @param fuzziness the fuzziness factor, must be &gt; 1.0
 * @throws NumberIsTooSmallException if {@code fuzziness <= 1.0}
 */
public FuzzyKMeansClusterer(final int k, final double fuzziness) throws NumberIsTooSmallException {
    this(k, fuzziness, -1, new EuclideanDistance());
}

Source File: KMeansPlusPlusClusterer.java From astor with GNU General Public License v2.0

2 votes

/** Build a clusterer.
 * <p>
 * The default strategy for handling empty clusters that may appear during
 * algorithm iterations is to split the cluster with largest distance variance.
 * <p>
 * The euclidean distance will be used as default distance measure.
 *
 * @param k the number of clusters to split the data into
 * @param maxIterations the maximum number of iterations to run the algorithm for.
 *   If negative, no maximum will be used.
 */
public KMeansPlusPlusClusterer(final int k, final int maxIterations) {
    this(k, maxIterations, new EuclideanDistance());
}

Source File: DBSCANClusterer.java From astor with GNU General Public License v2.0

2 votes

/**
 * Creates a new instance of a DBSCANClusterer.
 * <p>
 * The euclidean distance will be used as default distance measure.
 *
 * @param eps maximum radius of the neighborhood to be considered
 * @param minPts minimum number of points needed for a cluster
 * @throws NotPositiveException if {@code eps < 0.0} or {@code minPts < 0}
 */
public DBSCANClusterer(final double eps, final int minPts)
    throws NotPositiveException {
    this(eps, minPts, new EuclideanDistance());
}

Source File: DBSCANClusterer.java From astor with GNU General Public License v2.0

2 votes

/**
 * Creates a new instance of a DBSCANClusterer.
 * <p>
 * The euclidean distance will be used as default distance measure.
 *
 * @param eps maximum radius of the neighborhood to be considered
 * @param minPts minimum number of points needed for a cluster
 * @throws NotPositiveException if {@code eps < 0.0} or {@code minPts < 0}
 */
public DBSCANClusterer(final double eps, final int minPts)
    throws NotPositiveException {
    this(eps, minPts, new EuclideanDistance());
}

Source File: FuzzyKMeansClusterer.java From astor with GNU General Public License v2.0

2 votes

/**
 * Creates a new instance of a FuzzyKMeansClusterer.
 * <p>
 * The euclidean distance will be used as default distance measure.
 *
 * @param k the number of clusters to split the data into
 * @param fuzziness the fuzziness factor, must be &gt; 1.0
 * @throws NumberIsTooSmallException if {@code fuzziness <= 1.0}
 */
public FuzzyKMeansClusterer(final int k, final double fuzziness) throws NumberIsTooSmallException {
    this(k, fuzziness, -1, new EuclideanDistance());
}

Source File: KMeansPlusPlusClusterer.java From astor with GNU General Public License v2.0

2 votes

/** Build a clusterer.
 * <p>
 * The default strategy for handling empty clusters that may appear during
 * algorithm iterations is to split the cluster with largest distance variance.
 * <p>
 * The euclidean distance will be used as default distance measure.
 *
 * @param k the number of clusters to split the data into
 * @param maxIterations the maximum number of iterations to run the algorithm for.
 *   If negative, no maximum will be used.
 */
public KMeansPlusPlusClusterer(final int k, final int maxIterations) {
    this(k, maxIterations, new EuclideanDistance());
}

Source File: DBSCANClusterer.java From astor with GNU General Public License v2.0

2 votes

/**
 * Creates a new instance of a DBSCANClusterer.
 * <p>
 * The euclidean distance will be used as default distance measure.
 *
 * @param eps maximum radius of the neighborhood to be considered
 * @param minPts minimum number of points needed for a cluster
 * @throws NotPositiveException if {@code eps < 0.0} or {@code minPts < 0}
 */
public DBSCANClusterer(final double eps, final int minPts)
    throws NotPositiveException {
    this(eps, minPts, new EuclideanDistance());
}

Source File: KMeansPlusPlusClusterer.java From astor with GNU General Public License v2.0

2 votes

/** Build a clusterer.
 * <p>
 * The default strategy for handling empty clusters that may appear during
 * algorithm iterations is to split the cluster with largest distance variance.
 * <p>
 * The euclidean distance will be used as default distance measure.
 *
 * @param k the number of clusters to split the data into
 * @param maxIterations the maximum number of iterations to run the algorithm for.
 *   If negative, no maximum will be used.
 */
public KMeansPlusPlusClusterer(final int k, final int maxIterations) {
    this(k, maxIterations, new EuclideanDistance());
}

org.apache.commons.math3.ml.distance.EuclideanDistance Java Examples