Python scipy.spatial.distance.mahalanobis() Examples

The following are 14 code examples of scipy.spatial.distance.mahalanobis(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.spatial.distance , or try the search function .
Example #1
Source File: diagnostics.py    From pliers with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def mahalanobis_distances(df, axis=0):
    '''
    Returns a pandas Series with Mahalanobis distances for each sample on the
    axis.

    Note: does not work well when # of observations < # of dimensions
    Will either return NaN in answer
    or (in the extreme case) fail with a Singular Matrix LinAlgError

    Args:
        df: pandas DataFrame with columns to run diagnostics on
        axis: 0 to find outlier rows, 1 to find outlier columns
    '''
    df = df.transpose() if axis == 1 else df
    means = df.mean()
    try:
        inv_cov = np.linalg.inv(df.cov())
    except LinAlgError:
        return pd.Series([np.NAN] * len(df.index), df.index,
                         name='Mahalanobis')
    dists = []
    for i, sample in df.iterrows():
        dists.append(mahalanobis(sample, means, inv_cov))

    return pd.Series(dists, df.index, name='Mahalanobis') 
Example #2
Source File: test_kf.py    From filterpy with MIT License 6 votes vote down vote up
def test_steadystate():

    dim = 7

    cv = kinematic_kf(dim=dim, order=5)
    print(cv)

    cv.x[1] = 1.0

    for i in range(100):
        cv.predict()
        cv.update([i])

    for i in range(100):
        cv.predict_steadystate()
        cv.update_steadystate([i])
        # test mahalanobis
        a = np.zeros(cv.y.shape)
        maha = scipy_mahalanobis(a, cv.y, cv.SI)
        assert cv.mahalanobis == approx(maha) 
Example #3
Source File: sample_distortion_metric.py    From AIF360 with Apache License 2.0 6 votes vote down vote up
def mahalanobis_distance(self, privileged=None, returned=False):
        """Compute the average Mahalanobis distance between the samples from the
        two datasets.
        """
        condition = self._to_condition(privileged)
        X_orig = self.dataset.features
        X_distort = self.distorted_dataset.features
        dist_fun = partial(scdist.mahalanobis,
            VI=np.linalg.inv(np.cov(np.vstack([X_orig, X_distort]).T)).T)
        distance, mask = utils.compute_distance(X_orig, X_distort,
            self.dataset.protected_attributes,
            self.dataset.protected_attribute_names, dist_fun=dist_fun,
            condition=condition)
        if returned:
            return distance, self.dataset.instance_weights[mask]
        return distance 
Example #4
Source File: test_measures.py    From Stone-Soup with MIT License 5 votes vote down vote up
def test_mahalanobis():
    measure = measures.Mahalanobis()
    assert measure(state_u, state_v) == distance.mahalanobis(u,
                                                             v,
                                                             np.linalg.inv(ui)) 
Example #5
Source File: test_measures.py    From Stone-Soup with MIT License 5 votes vote down vote up
def test_mahalanobis_full_mapping():
    mapping = np.arange(len(u))
    measure = measures.Mahalanobis(mapping=mapping)
    assert measure(state_u, state_v) == distance.mahalanobis(u,
                                                             v,
                                                             np.linalg.inv(ui)) 
Example #6
Source File: test_measures.py    From Stone-Soup with MIT License 5 votes vote down vote up
def test_mahalanobis_partial_mapping():
    mapping = np.array([0, 1])
    measure = measures.Mahalanobis(mapping=mapping)
    reduced_ui = CovarianceMatrix(np.diag([100, 10]))
    assert measure(state_u, state_v) == \
        distance.mahalanobis([[10], [1]],
                             [[11], [10]], np.linalg.inv(reduced_ui))
    mapping = np.array([0, 3])
    reduced_ui = CovarianceMatrix(np.diag([100, 10]))
    measure = measures.Mahalanobis(mapping=mapping)
    assert measure(state_u, state_v) == \
        distance.mahalanobis([[10], [1]],
                             [[11], [2]], np.linalg.inv(reduced_ui)) 
Example #7
Source File: measures.py    From Stone-Soup with MIT License 5 votes vote down vote up
def __call__(self, state1, state2):
        r"""Calculate the Mahalanobis distance between a pair of state objects

        Parameters
        ----------
        state1 : :class:`~.State`
        state2 : :class:`~.State`

        Returns
        -------
        float
            Mahalanobis distance between a pair of input :class:`~.State`
            objects

        """
        if self.mapping is not None:
            u = state1.state_vector[self.mapping]
            v = state2.state_vector[self.mapping]
            # extract the mapped covariance data
            rows = np.array(self.mapping, dtype=np.intp)
            columns = np.array(self.mapping, dtype=np.intp)
            cov = state1.covar[rows[:, np.newaxis], columns]
        else:
            u = state1.state_vector
            v = state2.state_vector
            cov = state1.covar

        vi = np.linalg.inv(cov)

        return distance.mahalanobis(u, v, vi) 
Example #8
Source File: SOMClustering.py    From susi with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def get_node_distance_matrix(self, datapoint, som_array):
        """Get distance of datapoint and node using Euclidean distance.

        Parameters
        ----------
        datapoint : np.array, shape=(X.shape[1])
            Datapoint = one row of the dataset `X`
        som_array : np.array
            Weight vectors of the SOM,
            shape = (self.n_rows, self.n_columns, X.shape[1])

        Returns
        -------
        distmat : np.array of float
            Distance between datapoint and each SOM node

        """
        # algorithms on the full matrix
        if self.distance_metric == "euclidean":
            return np.linalg.norm(som_array - datapoint, axis=2)

        # node-by-node algorithms
        distmat = np.zeros((self.n_rows, self.n_columns))
        if self.distance_metric == "manhattan":
            for node in self.node_list_:
                distmat[node] = dist.cityblock(
                    som_array[node[0], node[1]], datapoint)

        elif self.distance_metric == "mahalanobis":
            for node in self.node_list_:
                som_node = som_array[node[0], node[1]]
                cov = np.cov(np.stack((datapoint, som_node), axis=0),
                             rowvar=False)
                cov_pinv = np.linalg.pinv(cov)   # pseudo-inverse
                distmat[node] = dist.mahalanobis(
                    datapoint, som_node, cov_pinv)

        elif self.distance_metric == "tanimoto":
            # Note that this is a binary distance measure.
            # Therefore, the vectors have to be converted.
            # Source: Melssen 2006, Supervised Kohonen networks for
            #         classification problems
            # VERY SLOW ALGORITHM!!!
            threshold = 0.5
            for node in self.node_list_:
                som_node = som_array[node[0], node[1]]
                distmat[node] = dist.rogerstanimoto(
                    binarize(datapoint.reshape(1, -1), threshold, copy=True),
                    binarize(som_node.reshape(1, -1), threshold, copy=True))

        return distmat 
Example #9
Source File: test_stats.py    From filterpy with MIT License 4 votes vote down vote up
def test_mahalanobis():
    global a, b, S
    # int test
    a, b, S = 3, 1, 2
    assert abs(mahalanobis(a, b, S) - scipy_mahalanobis(a, b, 1/S)) < 1.e-12


    # int list
    assert abs(mahalanobis([a], [b], [S]) - scipy_mahalanobis(a, b, 1/S)) < 1.e-12
    assert abs(mahalanobis([a], b, S) - scipy_mahalanobis(a, b, 1/S)) < 1.e-12


    # float
    a, b, S = 3.123, 3.235235, .01234
    assert abs(mahalanobis(a, b, S) - scipy_mahalanobis(a, b, 1/S)) < 1.e-12
    assert abs(mahalanobis([a], [b], [S]) - scipy_mahalanobis(a, b, 1/S)) < 1.e-12
    assert abs(mahalanobis([a], b, S) - scipy_mahalanobis(a, b, 1/S)) < 1.e-12

    #float array
    assert abs(mahalanobis(np.array([a]), b, S) - scipy_mahalanobis(a, b, 1/S)) < 1.e-12

    #1d array
    a = np.array([1., 2.])
    b = np.array([1.4, 1.2])
    S = np.array([[1., 2.], [2., 4.001]])

    assert abs(mahalanobis(a, b, S) - scipy_mahalanobis(a, b, inv(S))) < 1.e-12

    #2d array
    a = np.array([[1., 2.]])
    b = np.array([[1.4, 1.2]])
    S = np.array([[1., 2.], [2., 4.001]])

    assert abs(mahalanobis(a, b, S) - scipy_mahalanobis(a, b, inv(S))) < 1.e-12
    assert abs(mahalanobis(a.T, b, S) - scipy_mahalanobis(a, b, inv(S))) < 1.e-12
    assert abs(mahalanobis(a, b.T, S) - scipy_mahalanobis(a, b, inv(S))) < 1.e-12
    assert abs(mahalanobis(a.T, b.T, S) - scipy_mahalanobis(a, b, inv(S))) < 1.e-12

    try:
        # mismatched shapes
        mahalanobis([1], b, S)
        assert "didn't catch vectors of different lengths"
    except ValueError:
        pass
    except:
        assert "raised exception other than ValueError"

    # okay, now check for numerical accuracy
    for _ in range(ITERS):
        N = np.random.randint(1, 20)
        a = np.random.randn(N)
        b = np.random.randn(N)
        S = np.random.randn(N, N)
        S = np.dot(S, S.T) #ensure positive semi-definite
        assert abs(mahalanobis(a, b, S) - scipy_mahalanobis(a, b, inv(S))) < 1.e-12 
Example #10
Source File: test_kf.py    From filterpy with MIT License 4 votes vote down vote up
def test_noisy_1d():
    f = KalmanFilter(dim_x=2, dim_z=1)

    f.x = np.array([[2.],
                    [0.]])       # initial state (location and velocity)

    f.F = np.array([[1., 1.],
                    [0., 1.]])    # state transition matrix

    f.H = np.array([[1., 0.]])    # Measurement function
    f.P *= 1000.                  # covariance matrix
    f.R = 5                       # state uncertainty
    f.Q = 0.0001                  # process uncertainty

    measurements = []
    results = []

    zs = []
    for t in range(100):
        # create measurement = t plus white noise
        z = t + random.randn()*20
        zs.append(z)

        # perform kalman filtering
        f.update(z)
        f.predict()

        # save data
        results.append(f.x[0, 0])
        measurements.append(z)

        # test mahalanobis
        a = np.zeros(f.y.shape)
        maha = scipy_mahalanobis(a, f.y, f.SI)
        assert f.mahalanobis == approx(maha)


    # now do a batch run with the stored z values so we can test that
    # it is working the same as the recursive implementation.
    # give slightly different P so result is slightly different
    f.x = np.array([[2., 0]]).T
    f.P = np.eye(2) * 100.
    s = Saver(f)
    m, c, _, _ = f.batch_filter(zs, update_first=False, saver=s)
    s.to_array()
    assert len(s.x) == len(zs)
    assert len(s.x) == len(s)

    # plot data
    if DO_PLOT:
        p1, = plt.plot(measurements, 'r', alpha=0.5)
        p2, = plt.plot(results, 'b')
        p4, = plt.plot(m[:, 0], 'm')
        p3, = plt.plot([0, 100], [0, 100], 'g')  # perfect result
        plt.legend([p1, p2, p3, p4],
                   ["noisy measurement", "KF output", "ideal", "batch"], loc=4)
        plt.show() 
Example #11
Source File: test_kf.py    From filterpy with MIT License 4 votes vote down vote up
def test_noisy_11d():
    f = KalmanFilter(dim_x=2, dim_z=1)

    f.x = np.array([2., 0])      # initial state (location and velocity)

    f.F = np.array([[1., 1.],
                    [0., 1.]])    # state transition matrix

    f.H = np.array([[1., 0.]])    # Measurement function
    f.P *= 1000.                  # covariance matrix
    f.R = 5                       # state uncertainty
    f.Q = 0.0001                  # process uncertainty

    measurements = []
    results = []

    zs = []
    for t in range(100):
        # create measurement = t plus white noise
        z = t + random.randn()*20
        zs.append(z)

        # perform kalman filtering
        f.update(z)
        f.predict()

        # save data
        results.append(f.x[0])
        measurements.append(z)

        # test mahalanobis
        a = np.zeros(f.y.shape)
        maha = scipy_mahalanobis(a, f.y, f.SI)
        assert f.mahalanobis == approx(maha)

    # now do a batch run with the stored z values so we can test that
    # it is working the same as the recursive implementation.
    # give slightly different P so result is slightly different
    f.x = np.array([[2., 0]]).T
    f.P = np.eye(2) * 100.
    m, c, _, _ = f.batch_filter(zs, update_first=False)

    # plot data
    if DO_PLOT:
        p1, = plt.plot(measurements, 'r', alpha=0.5)
        p2, = plt.plot(results, 'b')
        p4, = plt.plot(m[:, 0], 'm')
        p3, = plt.plot([0, 100], [0, 100], 'g')  # perfect result
        plt.legend([p1, p2, p3, p4],
                   ["noisy measurement", "KF output", "ideal", "batch"], loc=4)

        plt.show() 
Example #12
Source File: test_ukf.py    From filterpy with MIT License 4 votes vote down vote up
def test_radar():
    def fx(x, dt):
        A = np.eye(3) + dt * np.array([[0, 1, 0],
                                       [0, 0, 0],
                                       [0, 0, 0]])
        return A.dot(x)

    def hx(x):
        return [np.sqrt(x[0]**2 + x[2]**2)]

    dt = 0.05

    sp = JulierSigmaPoints(n=3, kappa=0.)
    kf = UnscentedKalmanFilter(3, 1, dt, fx=fx, hx=hx, points=sp)
    assert np.allclose(kf.x, kf.x_prior)
    assert np.allclose(kf.P, kf.P_prior)

    # test __repr__ doesn't crash
    str(kf)

    kf.Q *= 0.01
    kf.R = 10
    kf.x = np.array([0., 90., 1100.])
    kf.P *= 100.
    radar = RadarSim(dt)

    t = np.arange(0, 20+dt, dt)
    n = len(t)
    xs = np.zeros((n, 3))

    random.seed(200)
    rs = []
    for i in range(len(t)):
        r = radar.get_range()
        kf.predict()
        kf.update(z=[r])

        xs[i, :] = kf.x
        rs.append(r)

        # test mahalanobis
        a = np.zeros(kf.y.shape)
        maha = scipy_mahalanobis(a, kf.y, kf.SI)
        assert kf.mahalanobis == approx(maha)

    if DO_PLOT:
        print(xs[:, 0].shape)

        plt.figure()
        plt.subplot(311)
        plt.plot(t, xs[:, 0])
        plt.subplot(312)
        plt.plot(t, xs[:, 1])
        plt.subplot(313)
        plt.plot(t, xs[:, 2]) 
Example #13
Source File: test_ckf.py    From filterpy with MIT License 4 votes vote down vote up
def test_1d():
    def fx(x, dt):
        F = np.array([[1., dt],
                      [0,  1]])

        return np.dot(F, x)

    def hx(x):
        return x[0:1]

    ckf = CKF(dim_x=2, dim_z=1, dt=0.1, hx=hx, fx=fx)

    ckf.x = np.array([[1.], [2.]])
    ckf.P = np.array([[1, 1.1],
                      [1.1, 3]])

    ckf.R = np.eye(1) * .05
    ckf.Q = np.array([[0., 0], [0., .001]])

    dt = 0.1
    points = MerweScaledSigmaPoints(2, .1, 2., -1)
    kf = UKF(dim_x=2, dim_z=1, dt=dt, fx=fx, hx=hx, points=points)

    kf.x = np.array([1, 2])
    kf.P = np.array([[1, 1.1],
                     [1.1, 3]])
    kf.R *= 0.05
    kf.Q = np.array([[0., 0], [0., .001]])

    s = Saver(kf)
    for i in range(50):
        z = np.array([[i+randn()*0.1]])
        ckf.predict()
        ckf.update(z)
        kf.predict()
        kf.update(z[0])
        assert abs(ckf.x[0] - kf.x[0]) < 1e-10
        assert abs(ckf.x[1] - kf.x[1]) < 1e-10
        s.save()

        # test mahalanobis
        a = np.zeros(kf.y.shape)
        maha = scipy_mahalanobis(a, kf.y, kf.SI)
        assert kf.mahalanobis == approx(maha)

    s.to_array() 
Example #14
Source File: test_fm.py    From filterpy with MIT License 4 votes vote down vote up
def test_noisy_1d():
    f = FadingKalmanFilter(3., dim_x=2, dim_z=1)

    f.x = np.array([[2.],
                    [0.]])       # initial state (location and velocity)

    f.F = np.array([[1.,1.],
                    [0.,1.]])    # state transition matrix

    f.H = np.array([[1.,0.]])     # Measurement function
    f.P *= 1000.                  # covariance matrix
    f.R = 5.**2                    # state uncertainty
    f.Q = np.array([[0, 0],
                    [0, 0.0001]]) # process uncertainty

    measurements = []
    results = []

    zs = []
    for t in range (100):
        # create measurement = t plus white noise
        z = t + random.randn() * np.sqrt(f.R)
        zs.append(z)

        # perform kalman filtering
        f.update(z)
        f.predict()

        # save data
        results.append(f.x[0, 0])
        measurements.append(z)

        # test mahalanobis
        a = np.zeros(f.y.shape)
        maha = scipy_mahalanobis(a, f.y, f.SI)
        assert f.mahalanobis == approx(maha)
        print(z, maha, f.y, f.S)
        assert maha < 4


    # now do a batch run with the stored z values so we can test that
    # it is working the same as the recursive implementation.
    # give slightly different P so result is slightly different
    f.X = np.array([[2.,0]]).T
    f.P = np.eye(2)*100.
    m, c, _, _ = f.batch_filter(zs,update_first=False)

    # plot data
    if DO_PLOT:
        p1, = plt.plot(measurements,'r', alpha=0.5)
        p2, = plt.plot (results,'b')
        p4, = plt.plot(m[:,0], 'm')
        p3, = plt.plot ([0, 100],[0, 100], 'g') # perfect result
        plt.legend([p1,p2, p3, p4],
                   ["noisy measurement", "KF output", "ideal", "batch"], loc=4)


        plt.show()