Python Examples of sklearn.covariance.MinCovDet

Source File: getFilteredSkels.py From tierpsy-tracker with MIT License

6 votes

def _h_getMahalanobisRobust(dat, critical_alpha=0.01, good_rows=np.zeros(0)):
    '''Calculate the Mahalanobis distance from the sample vector.'''
    if good_rows.size == 0:
        good_rows = np.any(~np.isnan(dat), axis=1)

    try:
        dat2fit = dat[good_rows]
        assert not np.any(np.isnan(dat2fit))

        robust_cov = MinCovDet().fit(dat2fit)
        mahalanobis_dist = np.sqrt(robust_cov.mahalanobis(dat))
    except ValueError:
        # this step will fail if the covariance matrix is not singular. This happens if the data is not
        # a unimodal symetric distribution. For example there is too many small noisy particles. Therefore
        # I will take a safe option and return zeros in the mahalanobis
        # distance if this is the case.
        mahalanobis_dist = np.zeros(dat.shape[0])

    # critial distance of the maholanobis distance using the chi-square distirbution
    # https://en.wikiversity.org/wiki/Mahalanobis%27_distance
    # http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.chi2.html
    maha_lim = chi2.ppf(1 - critical_alpha, dat.shape[1])
    outliers = mahalanobis_dist > maha_lim

    return mahalanobis_dist, outliers, maha_lim

Source File: mcd.py From pyod with BSD 2-Clause "Simplified" License

5 votes

def fit(self, X, y=None):
        """Fit detector. y is ignored in unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        self : object
            Fitted estimator.
        """
        # Validate inputs X and y (optional)
        X = check_array(X)
        self._set_n_classes(y)

        self.detector_ = MinCovDet(store_precision=self.store_precision,
                                   assume_centered=self.assume_centered,
                                   support_fraction=self.support_fraction,
                                   random_state=self.random_state)
        self.detector_.fit(X=X, y=y)

        # Use mahalanabis distance as the outlier score
        self.decision_scores_ = self.detector_.dist_
        self._process_decision_scores()
        return self

Source File: mcd.py From pyod with BSD 2-Clause "Simplified" License

5 votes

def raw_location_(self):
        """The raw robust estimated location before correction and
        re-weighting.

        Decorator for scikit-learn MinCovDet attributes.
        """
        return self.detector_.raw_location_

Source File: mcd.py From pyod with BSD 2-Clause "Simplified" License

5 votes

def raw_covariance_(self):
        """The raw robust estimated location before correction and
        re-weighting.

        Decorator for scikit-learn MinCovDet attributes.
        """
        return self.detector_.raw_covariance_

Source File: mcd.py From pyod with BSD 2-Clause "Simplified" License

5 votes

def raw_support_(self):
        """A mask of the observations that have been used to compute
        the raw robust estimates of location and shape, before correction
        and re-weighting.

        Decorator for scikit-learn MinCovDet attributes.
        """
        return self.detector_.raw_support_

Source File: mcd.py From pyod with BSD 2-Clause "Simplified" License

5 votes

def location_(self):
        """Estimated robust location.

        Decorator for scikit-learn MinCovDet attributes.
        """
        return self.detector_.location_

Source File: mcd.py From pyod with BSD 2-Clause "Simplified" License

5 votes

def precision_(self):
        """ Estimated pseudo inverse matrix.
        (stored only if store_precision is True)

        Decorator for scikit-learn MinCovDet attributes.
        """
        return self.detector_.precision_

Source File: mcd.py From pyod with BSD 2-Clause "Simplified" License

5 votes

def support_(self):
        """A mask of the observations that have been used to compute
        the robust estimates of location and shape.

        Decorator for scikit-learn MinCovDet attributes.
        """
        return self.detector_.support_

Source File: test_robust_covariance.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_mcd_class_on_invalid_input():
    X = np.arange(100)
    mcd = MinCovDet()
    assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead',
                         mcd.fit, X)

Source File: test_robust_covariance.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def launch_mcd_on_dataset(n_samples, n_features, n_outliers, tol_loc, tol_cov,
                          tol_support):

    rand_gen = np.random.RandomState(0)
    data = rand_gen.randn(n_samples, n_features)
    # add some outliers
    outliers_index = rand_gen.permutation(n_samples)[:n_outliers]
    outliers_offset = 10. * \
        (rand_gen.randint(2, size=(n_outliers, n_features)) - 0.5)
    data[outliers_index] += outliers_offset
    inliers_mask = np.ones(n_samples).astype(bool)
    inliers_mask[outliers_index] = False

    pure_data = data[inliers_mask]
    # compute MCD by fitting an object
    mcd_fit = MinCovDet(random_state=rand_gen).fit(data)
    T = mcd_fit.location_
    S = mcd_fit.covariance_
    H = mcd_fit.support_
    # compare with the estimates learnt from the inliers
    error_location = np.mean((pure_data.mean(0) - T) ** 2)
    assert(error_location < tol_loc)
    error_cov = np.mean((empirical_covariance(pure_data) - S) ** 2)
    assert(error_cov < tol_cov)
    assert(np.sum(H) >= tol_support)
    assert_array_almost_equal(mcd_fit.mahalanobis(data), mcd_fit.dist_)

Source File: test_robust_covariance.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_mcd_issue1127():
    # Check that the code does not break with X.shape = (3, 1)
    # (i.e. n_support = n_samples)
    rnd = np.random.RandomState(0)
    X = rnd.normal(size=(3, 1))
    mcd = MinCovDet()
    mcd.fit(X)

Source File: test_robust_covariance.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_mcd_issue3367():
    # Check that MCD completes when the covariance matrix is singular
    # i.e. one of the rows and columns are all zeros
    rand_gen = np.random.RandomState(0)

    # Think of these as the values for X and Y -> 10 values between -5 and 5
    data_values = np.linspace(-5, 5, 10).tolist()
    # Get the cartesian product of all possible coordinate pairs from above set
    data = np.array(list(itertools.product(data_values, data_values)))

    # Add a third column that's all zeros to make our data a set of point
    # within a plane, which means that the covariance matrix will be singular
    data = np.hstack((data, np.zeros((data.shape[0], 1))))

    # The below line of code should raise an exception if the covariance matrix
    # is singular. As a further test, since we have points in XYZ, the
    # principle components (Eigenvectors) of these directly relate to the
    # geometry of the points. Since it's a plane, we should be able to test
    # that the Eigenvector that corresponds to the smallest Eigenvalue is the
    # plane normal, specifically [0, 0, 1], since everything is in the XY plane
    # (as I've set it up above). To do this one would start by:
    #
    #     evals, evecs = np.linalg.eigh(mcd_fit.covariance_)
    #     normal = evecs[:, np.argmin(evals)]
    #
    # After which we need to assert that our `normal` is equal to [0, 0, 1].
    # Do note that there is floating point error associated with this, so it's
    # best to subtract the two and then compare some small tolerance (e.g.
    # 1e-12).
    MinCovDet(random_state=rand_gen).fit(data)

Source File: test_robust_covariance.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_mcd_support_covariance_is_zero():
    # Check that MCD returns a ValueError with informative message when the
    # covariance of the support data is equal to 0.
    X_1 = np.array([0.5, 0.1, 0.1, 0.1, 0.957, 0.1, 0.1, 0.1, 0.4285, 0.1])
    X_1 = X_1.reshape(-1, 1)
    X_2 = np.array([0.5, 0.3, 0.3, 0.3, 0.957, 0.3, 0.3, 0.3, 0.4285, 0.3])
    X_2 = X_2.reshape(-1, 1)
    msg = ('The covariance matrix of the support data is equal to 0, try to '
           'increase support_fraction')
    for X in [X_1, X_2]:
        assert_raise_message(ValueError, msg, MinCovDet().fit, X)

Source File: stats_of_cloth.py From TextileDefectDetection with GNU Affero General Public License v3.0

5 votes

def compute_MCD_weft(weftsPickled, target_path):

    weft_points_list = floatPointList()
    for pickled_path in weftsPickled:
        weft_points_list.extend(pickle.load(open(pickled_path, "rb" )))

    x_vals = [fp.x for fp in weft_points_list]
    y_vals = [fp.y for fp in weft_points_list]

    mean_hor_dist = weft_points_list.getMedianWeftDist()

    min_x = min(x_vals) + 1.5 * mean_hor_dist
    max_x = max(x_vals) - 1.5 * mean_hor_dist
    min_y = min(y_vals) + 1.5 * mean_hor_dist
    max_y = max(y_vals) - 1.5 * mean_hor_dist

    inner_points = floatPointList()
    for pt in weft_points_list:
        if min_x < pt.x < max_x and min_y < pt.y < max_y:
            inner_points.append(pt)

    X = np.zeros([len(inner_points), 3])

    for idx, pt in enumerate(inner_points):
        X[idx,0] = pt.area
        X[idx,1] = pt.right_dist
        X[idx,2] = pt.left_dist

    Y = X[~(X<=0).any(axis=1)]

    robust_cov = MinCovDet(support_fraction=0.8).fit(Y)
    pickle.dump(robust_cov, open(target_path, "wb"))

Source File: stats_of_cloth.py From TextileDefectDetection with GNU Affero General Public License v3.0

5 votes

def compute_MCD_warp(warpsPickled, target_path):

    warp_points_list = floatPointList()
    for pickled_path in warpsPickled:
        warp_points_list.extend(pickle.load(open(pickled_path, "rb" )))

    x_vals = [fp.x for fp in warp_points_list]
    y_vals = [fp.y for fp in warp_points_list]

    mean_ver_dist = warp_points_list.getMedianWarpDist()

    min_x = min(x_vals) + 1.5 * mean_ver_dist
    max_x = max(x_vals) - 1.5 * mean_ver_dist
    min_y = min(y_vals) + 1.5 * mean_ver_dist
    max_y = max(y_vals) - 1.5 * mean_ver_dist

    inner_points = floatPointList()
    for pt in warp_points_list:
        if min_x < pt.x < max_x and min_y < pt.y < max_y:
            inner_points.append(pt)


    #####CHANGED
    #print("attention, only 2D!!!!!")
    X = np.zeros([len(inner_points), 3])

    for idx, pt in enumerate(inner_points):
        X[idx,0] = pt.area
        X[idx,1] = pt.lower_dist
        X[idx,2] = pt.upper_dist


    Y = X[~(X<=0).any(axis=1)]

    robust_cov = MinCovDet(support_fraction=0.8).fit(Y)
    pickle.dump(robust_cov, open(target_path, "wb"))

Source File: test_covariance.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.covariance.EmpiricalCovariance, covariance.EmpiricalCovariance)
        self.assertIs(df.covariance.EllipticEnvelope, covariance.EllipticEnvelope)
        self.assertIs(df.covariance.GraphLasso, covariance.GraphLasso)
        self.assertIs(df.covariance.GraphLassoCV, covariance.GraphLassoCV)
        self.assertIs(df.covariance.LedoitWolf, covariance.LedoitWolf)
        self.assertIs(df.covariance.MinCovDet, covariance.MinCovDet)
        self.assertIs(df.covariance.OAS, covariance.OAS)
        self.assertIs(df.covariance.ShrunkCovariance, covariance.ShrunkCovariance)

        self.assertIs(df.covariance.shrunk_covariance, covariance.shrunk_covariance)
        self.assertIs(df.covariance.graph_lasso, covariance.graph_lasso)

Python sklearn.covariance.MinCovDet() Examples