Python Examples of scipy.spatial.distance.mahalanobis

Source File: diagnostics.py From pliers with BSD 3-Clause "New" or "Revised" License

6 votes

def mahalanobis_distances(df, axis=0):
    '''
    Returns a pandas Series with Mahalanobis distances for each sample on the
    axis.

    Note: does not work well when # of observations < # of dimensions
    Will either return NaN in answer
    or (in the extreme case) fail with a Singular Matrix LinAlgError

    Args:
        df: pandas DataFrame with columns to run diagnostics on
        axis: 0 to find outlier rows, 1 to find outlier columns
    '''
    df = df.transpose() if axis == 1 else df
    means = df.mean()
    try:
        inv_cov = np.linalg.inv(df.cov())
    except LinAlgError:
        return pd.Series([np.NAN] * len(df.index), df.index,
                         name='Mahalanobis')
    dists = []
    for i, sample in df.iterrows():
        dists.append(mahalanobis(sample, means, inv_cov))

    return pd.Series(dists, df.index, name='Mahalanobis')

Source File: test_kf.py From filterpy with MIT License

6 votes

def test_steadystate():

    dim = 7

    cv = kinematic_kf(dim=dim, order=5)
    print(cv)

    cv.x[1] = 1.0

    for i in range(100):
        cv.predict()
        cv.update([i])

    for i in range(100):
        cv.predict_steadystate()
        cv.update_steadystate([i])
        # test mahalanobis
        a = np.zeros(cv.y.shape)
        maha = scipy_mahalanobis(a, cv.y, cv.SI)
        assert cv.mahalanobis == approx(maha)

Source File: sample_distortion_metric.py From AIF360 with Apache License 2.0

6 votes

def mahalanobis_distance(self, privileged=None, returned=False):
        """Compute the average Mahalanobis distance between the samples from the
        two datasets.
        """
        condition = self._to_condition(privileged)
        X_orig = self.dataset.features
        X_distort = self.distorted_dataset.features
        dist_fun = partial(scdist.mahalanobis,
            VI=np.linalg.inv(np.cov(np.vstack([X_orig, X_distort]).T)).T)
        distance, mask = utils.compute_distance(X_orig, X_distort,
            self.dataset.protected_attributes,
            self.dataset.protected_attribute_names, dist_fun=dist_fun,
            condition=condition)
        if returned:
            return distance, self.dataset.instance_weights[mask]
        return distance

Source File: test_measures.py From Stone-Soup with MIT License

5 votes

def test_mahalanobis():
    measure = measures.Mahalanobis()
    assert measure(state_u, state_v) == distance.mahalanobis(u,
                                                             v,
                                                             np.linalg.inv(ui))

Source File: test_measures.py From Stone-Soup with MIT License

5 votes

def test_mahalanobis_full_mapping():
    mapping = np.arange(len(u))
    measure = measures.Mahalanobis(mapping=mapping)
    assert measure(state_u, state_v) == distance.mahalanobis(u,
                                                             v,
                                                             np.linalg.inv(ui))

Source File: test_measures.py From Stone-Soup with MIT License

5 votes

def test_mahalanobis_partial_mapping():
    mapping = np.array([0, 1])
    measure = measures.Mahalanobis(mapping=mapping)
    reduced_ui = CovarianceMatrix(np.diag([100, 10]))
    assert measure(state_u, state_v) == \
        distance.mahalanobis([[10], [1]],
                             [[11], [10]], np.linalg.inv(reduced_ui))
    mapping = np.array([0, 3])
    reduced_ui = CovarianceMatrix(np.diag([100, 10]))
    measure = measures.Mahalanobis(mapping=mapping)
    assert measure(state_u, state_v) == \
        distance.mahalanobis([[10], [1]],
                             [[11], [2]], np.linalg.inv(reduced_ui))

Source File: measures.py From Stone-Soup with MIT License

5 votes

def __call__(self, state1, state2):
        r"""Calculate the Mahalanobis distance between a pair of state objects

        Parameters
        ----------
        state1 : :class:`~.State`
        state2 : :class:`~.State`

        Returns
        -------
        float
            Mahalanobis distance between a pair of input :class:`~.State`
            objects

        """
        if self.mapping is not None:
            u = state1.state_vector[self.mapping]
            v = state2.state_vector[self.mapping]
            # extract the mapped covariance data
            rows = np.array(self.mapping, dtype=np.intp)
            columns = np.array(self.mapping, dtype=np.intp)
            cov = state1.covar[rows[:, np.newaxis], columns]
        else:
            u = state1.state_vector
            v = state2.state_vector
            cov = state1.covar

        vi = np.linalg.inv(cov)

        return distance.mahalanobis(u, v, vi)

Source File: SOMClustering.py From susi with BSD 3-Clause "New" or "Revised" License

4 votes

def get_node_distance_matrix(self, datapoint, som_array):
        """Get distance of datapoint and node using Euclidean distance.

        Parameters
        ----------
        datapoint : np.array, shape=(X.shape[1])
            Datapoint = one row of the dataset `X`
        som_array : np.array
            Weight vectors of the SOM,
            shape = (self.n_rows, self.n_columns, X.shape[1])

        Returns
        -------
        distmat : np.array of float
            Distance between datapoint and each SOM node

        """
        # algorithms on the full matrix
        if self.distance_metric == "euclidean":
            return np.linalg.norm(som_array - datapoint, axis=2)

        # node-by-node algorithms
        distmat = np.zeros((self.n_rows, self.n_columns))
        if self.distance_metric == "manhattan":
            for node in self.node_list_:
                distmat[node] = dist.cityblock(
                    som_array[node[0], node[1]], datapoint)

        elif self.distance_metric == "mahalanobis":
            for node in self.node_list_:
                som_node = som_array[node[0], node[1]]
                cov = np.cov(np.stack((datapoint, som_node), axis=0),
                             rowvar=False)
                cov_pinv = np.linalg.pinv(cov)   # pseudo-inverse
                distmat[node] = dist.mahalanobis(
                    datapoint, som_node, cov_pinv)

        elif self.distance_metric == "tanimoto":
            # Note that this is a binary distance measure.
            # Therefore, the vectors have to be converted.
            # Source: Melssen 2006, Supervised Kohonen networks for
            #         classification problems
            # VERY SLOW ALGORITHM!!!
            threshold = 0.5
            for node in self.node_list_:
                som_node = som_array[node[0], node[1]]
                distmat[node] = dist.rogerstanimoto(
                    binarize(datapoint.reshape(1, -1), threshold, copy=True),
                    binarize(som_node.reshape(1, -1), threshold, copy=True))

        return distmat

Source File: test_stats.py From filterpy with MIT License

4 votes

def test_mahalanobis():
    global a, b, S
    # int test
    a, b, S = 3, 1, 2
    assert abs(mahalanobis(a, b, S) - scipy_mahalanobis(a, b, 1/S)) < 1.e-12


    # int list
    assert abs(mahalanobis([a], [b], [S]) - scipy_mahalanobis(a, b, 1/S)) < 1.e-12
    assert abs(mahalanobis([a], b, S) - scipy_mahalanobis(a, b, 1/S)) < 1.e-12


    # float
    a, b, S = 3.123, 3.235235, .01234
    assert abs(mahalanobis(a, b, S) - scipy_mahalanobis(a, b, 1/S)) < 1.e-12
    assert abs(mahalanobis([a], [b], [S]) - scipy_mahalanobis(a, b, 1/S)) < 1.e-12
    assert abs(mahalanobis([a], b, S) - scipy_mahalanobis(a, b, 1/S)) < 1.e-12

    #float array
    assert abs(mahalanobis(np.array([a]), b, S) - scipy_mahalanobis(a, b, 1/S)) < 1.e-12

    #1d array
    a = np.array([1., 2.])
    b = np.array([1.4, 1.2])
    S = np.array([[1., 2.], [2., 4.001]])

    assert abs(mahalanobis(a, b, S) - scipy_mahalanobis(a, b, inv(S))) < 1.e-12

    #2d array
    a = np.array([[1., 2.]])
    b = np.array([[1.4, 1.2]])
    S = np.array([[1., 2.], [2., 4.001]])

    assert abs(mahalanobis(a, b, S) - scipy_mahalanobis(a, b, inv(S))) < 1.e-12
    assert abs(mahalanobis(a.T, b, S) - scipy_mahalanobis(a, b, inv(S))) < 1.e-12
    assert abs(mahalanobis(a, b.T, S) - scipy_mahalanobis(a, b, inv(S))) < 1.e-12
    assert abs(mahalanobis(a.T, b.T, S) - scipy_mahalanobis(a, b, inv(S))) < 1.e-12

    try:
        # mismatched shapes
        mahalanobis([1], b, S)
        assert "didn't catch vectors of different lengths"
    except ValueError:
        pass
    except:
        assert "raised exception other than ValueError"

    # okay, now check for numerical accuracy
    for _ in range(ITERS):
        N = np.random.randint(1, 20)
        a = np.random.randn(N)
        b = np.random.randn(N)
        S = np.random.randn(N, N)
        S = np.dot(S, S.T) #ensure positive semi-definite
        assert abs(mahalanobis(a, b, S) - scipy_mahalanobis(a, b, inv(S))) < 1.e-12

Source File: test_kf.py From filterpy with MIT License

4 votes

def test_noisy_1d():
    f = KalmanFilter(dim_x=2, dim_z=1)

    f.x = np.array([[2.],
                    [0.]])       # initial state (location and velocity)

    f.F = np.array([[1., 1.],
                    [0., 1.]])    # state transition matrix

    f.H = np.array([[1., 0.]])    # Measurement function
    f.P *= 1000.                  # covariance matrix
    f.R = 5                       # state uncertainty
    f.Q = 0.0001                  # process uncertainty

    measurements = []
    results = []

    zs = []
    for t in range(100):
        # create measurement = t plus white noise
        z = t + random.randn()*20
        zs.append(z)

        # perform kalman filtering
        f.update(z)
        f.predict()

        # save data
        results.append(f.x[0, 0])
        measurements.append(z)

        # test mahalanobis
        a = np.zeros(f.y.shape)
        maha = scipy_mahalanobis(a, f.y, f.SI)
        assert f.mahalanobis == approx(maha)


    # now do a batch run with the stored z values so we can test that
    # it is working the same as the recursive implementation.
    # give slightly different P so result is slightly different
    f.x = np.array([[2., 0]]).T
    f.P = np.eye(2) * 100.
    s = Saver(f)
    m, c, _, _ = f.batch_filter(zs, update_first=False, saver=s)
    s.to_array()
    assert len(s.x) == len(zs)
    assert len(s.x) == len(s)

    # plot data
    if DO_PLOT:
        p1, = plt.plot(measurements, 'r', alpha=0.5)
        p2, = plt.plot(results, 'b')
        p4, = plt.plot(m[:, 0], 'm')
        p3, = plt.plot([0, 100], [0, 100], 'g')  # perfect result
        plt.legend([p1, p2, p3, p4],
                   ["noisy measurement", "KF output", "ideal", "batch"], loc=4)
        plt.show()

Source File: test_kf.py From filterpy with MIT License

4 votes

def test_noisy_11d():
    f = KalmanFilter(dim_x=2, dim_z=1)

    f.x = np.array([2., 0])      # initial state (location and velocity)

    f.F = np.array([[1., 1.],
                    [0., 1.]])    # state transition matrix

    f.H = np.array([[1., 0.]])    # Measurement function
    f.P *= 1000.                  # covariance matrix
    f.R = 5                       # state uncertainty
    f.Q = 0.0001                  # process uncertainty

    measurements = []
    results = []

    zs = []
    for t in range(100):
        # create measurement = t plus white noise
        z = t + random.randn()*20
        zs.append(z)

        # perform kalman filtering
        f.update(z)
        f.predict()

        # save data
        results.append(f.x[0])
        measurements.append(z)

        # test mahalanobis
        a = np.zeros(f.y.shape)
        maha = scipy_mahalanobis(a, f.y, f.SI)
        assert f.mahalanobis == approx(maha)

    # now do a batch run with the stored z values so we can test that
    # it is working the same as the recursive implementation.
    # give slightly different P so result is slightly different
    f.x = np.array([[2., 0]]).T
    f.P = np.eye(2) * 100.
    m, c, _, _ = f.batch_filter(zs, update_first=False)

    # plot data
    if DO_PLOT:
        p1, = plt.plot(measurements, 'r', alpha=0.5)
        p2, = plt.plot(results, 'b')
        p4, = plt.plot(m[:, 0], 'm')
        p3, = plt.plot([0, 100], [0, 100], 'g')  # perfect result
        plt.legend([p1, p2, p3, p4],
                   ["noisy measurement", "KF output", "ideal", "batch"], loc=4)

        plt.show()

Source File: test_ukf.py From filterpy with MIT License

4 votes

def test_radar():
    def fx(x, dt):
        A = np.eye(3) + dt * np.array([[0, 1, 0],
                                       [0, 0, 0],
                                       [0, 0, 0]])
        return A.dot(x)

    def hx(x):
        return [np.sqrt(x[0]**2 + x[2]**2)]

    dt = 0.05

    sp = JulierSigmaPoints(n=3, kappa=0.)
    kf = UnscentedKalmanFilter(3, 1, dt, fx=fx, hx=hx, points=sp)
    assert np.allclose(kf.x, kf.x_prior)
    assert np.allclose(kf.P, kf.P_prior)

    # test __repr__ doesn't crash
    str(kf)

    kf.Q *= 0.01
    kf.R = 10
    kf.x = np.array([0., 90., 1100.])
    kf.P *= 100.
    radar = RadarSim(dt)

    t = np.arange(0, 20+dt, dt)
    n = len(t)
    xs = np.zeros((n, 3))

    random.seed(200)
    rs = []
    for i in range(len(t)):
        r = radar.get_range()
        kf.predict()
        kf.update(z=[r])

        xs[i, :] = kf.x
        rs.append(r)

        # test mahalanobis
        a = np.zeros(kf.y.shape)
        maha = scipy_mahalanobis(a, kf.y, kf.SI)
        assert kf.mahalanobis == approx(maha)

    if DO_PLOT:
        print(xs[:, 0].shape)

        plt.figure()
        plt.subplot(311)
        plt.plot(t, xs[:, 0])
        plt.subplot(312)
        plt.plot(t, xs[:, 1])
        plt.subplot(313)
        plt.plot(t, xs[:, 2])

Source File: test_ckf.py From filterpy with MIT License

4 votes

def test_1d():
    def fx(x, dt):
        F = np.array([[1., dt],
                      [0,  1]])

        return np.dot(F, x)

    def hx(x):
        return x[0:1]

    ckf = CKF(dim_x=2, dim_z=1, dt=0.1, hx=hx, fx=fx)

    ckf.x = np.array([[1.], [2.]])
    ckf.P = np.array([[1, 1.1],
                      [1.1, 3]])

    ckf.R = np.eye(1) * .05
    ckf.Q = np.array([[0., 0], [0., .001]])

    dt = 0.1
    points = MerweScaledSigmaPoints(2, .1, 2., -1)
    kf = UKF(dim_x=2, dim_z=1, dt=dt, fx=fx, hx=hx, points=points)

    kf.x = np.array([1, 2])
    kf.P = np.array([[1, 1.1],
                     [1.1, 3]])
    kf.R *= 0.05
    kf.Q = np.array([[0., 0], [0., .001]])

    s = Saver(kf)
    for i in range(50):
        z = np.array([[i+randn()*0.1]])
        ckf.predict()
        ckf.update(z)
        kf.predict()
        kf.update(z[0])
        assert abs(ckf.x[0] - kf.x[0]) < 1e-10
        assert abs(ckf.x[1] - kf.x[1]) < 1e-10
        s.save()

        # test mahalanobis
        a = np.zeros(kf.y.shape)
        maha = scipy_mahalanobis(a, kf.y, kf.SI)
        assert kf.mahalanobis == approx(maha)

    s.to_array()

Source File: test_fm.py From filterpy with MIT License

4 votes

def test_noisy_1d():
    f = FadingKalmanFilter(3., dim_x=2, dim_z=1)

    f.x = np.array([[2.],
                    [0.]])       # initial state (location and velocity)

    f.F = np.array([[1.,1.],
                    [0.,1.]])    # state transition matrix

    f.H = np.array([[1.,0.]])     # Measurement function
    f.P *= 1000.                  # covariance matrix
    f.R = 5.**2                    # state uncertainty
    f.Q = np.array([[0, 0],
                    [0, 0.0001]]) # process uncertainty

    measurements = []
    results = []

    zs = []
    for t in range (100):
        # create measurement = t plus white noise
        z = t + random.randn() * np.sqrt(f.R)
        zs.append(z)

        # perform kalman filtering
        f.update(z)
        f.predict()

        # save data
        results.append(f.x[0, 0])
        measurements.append(z)

        # test mahalanobis
        a = np.zeros(f.y.shape)
        maha = scipy_mahalanobis(a, f.y, f.SI)
        assert f.mahalanobis == approx(maha)
        print(z, maha, f.y, f.S)
        assert maha < 4


    # now do a batch run with the stored z values so we can test that
    # it is working the same as the recursive implementation.
    # give slightly different P so result is slightly different
    f.X = np.array([[2.,0]]).T
    f.P = np.eye(2)*100.
    m, c, _, _ = f.batch_filter(zs,update_first=False)

    # plot data
    if DO_PLOT:
        p1, = plt.plot(measurements,'r', alpha=0.5)
        p2, = plt.plot (results,'b')
        p4, = plt.plot(m[:,0], 'm')
        p3, = plt.plot ([0, 100],[0, 100], 'g') # perfect result
        plt.legend([p1,p2, p3, p4],
                   ["noisy measurement", "KF output", "ideal", "batch"], loc=4)


        plt.show()

Python scipy.spatial.distance.mahalanobis() Examples