Python sklearn.covariance.MinCovDet() Examples
The following are 16
code examples of sklearn.covariance.MinCovDet().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.covariance
, or try the search function
.
Example #1
Source File: getFilteredSkels.py From tierpsy-tracker with MIT License | 6 votes |
def _h_getMahalanobisRobust(dat, critical_alpha=0.01, good_rows=np.zeros(0)): '''Calculate the Mahalanobis distance from the sample vector.''' if good_rows.size == 0: good_rows = np.any(~np.isnan(dat), axis=1) try: dat2fit = dat[good_rows] assert not np.any(np.isnan(dat2fit)) robust_cov = MinCovDet().fit(dat2fit) mahalanobis_dist = np.sqrt(robust_cov.mahalanobis(dat)) except ValueError: # this step will fail if the covariance matrix is not singular. This happens if the data is not # a unimodal symetric distribution. For example there is too many small noisy particles. Therefore # I will take a safe option and return zeros in the mahalanobis # distance if this is the case. mahalanobis_dist = np.zeros(dat.shape[0]) # critial distance of the maholanobis distance using the chi-square distirbution # https://en.wikiversity.org/wiki/Mahalanobis%27_distance # http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.chi2.html maha_lim = chi2.ppf(1 - critical_alpha, dat.shape[1]) outliers = mahalanobis_dist > maha_lim return mahalanobis_dist, outliers, maha_lim
Example #2
Source File: mcd.py From pyod with BSD 2-Clause "Simplified" License | 5 votes |
def fit(self, X, y=None): """Fit detector. y is ignored in unsupervised methods. Parameters ---------- X : numpy array of shape (n_samples, n_features) The input samples. y : Ignored Not used, present for API consistency by convention. Returns ------- self : object Fitted estimator. """ # Validate inputs X and y (optional) X = check_array(X) self._set_n_classes(y) self.detector_ = MinCovDet(store_precision=self.store_precision, assume_centered=self.assume_centered, support_fraction=self.support_fraction, random_state=self.random_state) self.detector_.fit(X=X, y=y) # Use mahalanabis distance as the outlier score self.decision_scores_ = self.detector_.dist_ self._process_decision_scores() return self
Example #3
Source File: mcd.py From pyod with BSD 2-Clause "Simplified" License | 5 votes |
def raw_location_(self): """The raw robust estimated location before correction and re-weighting. Decorator for scikit-learn MinCovDet attributes. """ return self.detector_.raw_location_
Example #4
Source File: mcd.py From pyod with BSD 2-Clause "Simplified" License | 5 votes |
def raw_covariance_(self): """The raw robust estimated location before correction and re-weighting. Decorator for scikit-learn MinCovDet attributes. """ return self.detector_.raw_covariance_
Example #5
Source File: mcd.py From pyod with BSD 2-Clause "Simplified" License | 5 votes |
def raw_support_(self): """A mask of the observations that have been used to compute the raw robust estimates of location and shape, before correction and re-weighting. Decorator for scikit-learn MinCovDet attributes. """ return self.detector_.raw_support_
Example #6
Source File: mcd.py From pyod with BSD 2-Clause "Simplified" License | 5 votes |
def location_(self): """Estimated robust location. Decorator for scikit-learn MinCovDet attributes. """ return self.detector_.location_
Example #7
Source File: mcd.py From pyod with BSD 2-Clause "Simplified" License | 5 votes |
def precision_(self): """ Estimated pseudo inverse matrix. (stored only if store_precision is True) Decorator for scikit-learn MinCovDet attributes. """ return self.detector_.precision_
Example #8
Source File: mcd.py From pyod with BSD 2-Clause "Simplified" License | 5 votes |
def support_(self): """A mask of the observations that have been used to compute the robust estimates of location and shape. Decorator for scikit-learn MinCovDet attributes. """ return self.detector_.support_
Example #9
Source File: test_robust_covariance.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_mcd_class_on_invalid_input(): X = np.arange(100) mcd = MinCovDet() assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead', mcd.fit, X)
Example #10
Source File: test_robust_covariance.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def launch_mcd_on_dataset(n_samples, n_features, n_outliers, tol_loc, tol_cov, tol_support): rand_gen = np.random.RandomState(0) data = rand_gen.randn(n_samples, n_features) # add some outliers outliers_index = rand_gen.permutation(n_samples)[:n_outliers] outliers_offset = 10. * \ (rand_gen.randint(2, size=(n_outliers, n_features)) - 0.5) data[outliers_index] += outliers_offset inliers_mask = np.ones(n_samples).astype(bool) inliers_mask[outliers_index] = False pure_data = data[inliers_mask] # compute MCD by fitting an object mcd_fit = MinCovDet(random_state=rand_gen).fit(data) T = mcd_fit.location_ S = mcd_fit.covariance_ H = mcd_fit.support_ # compare with the estimates learnt from the inliers error_location = np.mean((pure_data.mean(0) - T) ** 2) assert(error_location < tol_loc) error_cov = np.mean((empirical_covariance(pure_data) - S) ** 2) assert(error_cov < tol_cov) assert(np.sum(H) >= tol_support) assert_array_almost_equal(mcd_fit.mahalanobis(data), mcd_fit.dist_)
Example #11
Source File: test_robust_covariance.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_mcd_issue1127(): # Check that the code does not break with X.shape = (3, 1) # (i.e. n_support = n_samples) rnd = np.random.RandomState(0) X = rnd.normal(size=(3, 1)) mcd = MinCovDet() mcd.fit(X)
Example #12
Source File: test_robust_covariance.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_mcd_issue3367(): # Check that MCD completes when the covariance matrix is singular # i.e. one of the rows and columns are all zeros rand_gen = np.random.RandomState(0) # Think of these as the values for X and Y -> 10 values between -5 and 5 data_values = np.linspace(-5, 5, 10).tolist() # Get the cartesian product of all possible coordinate pairs from above set data = np.array(list(itertools.product(data_values, data_values))) # Add a third column that's all zeros to make our data a set of point # within a plane, which means that the covariance matrix will be singular data = np.hstack((data, np.zeros((data.shape[0], 1)))) # The below line of code should raise an exception if the covariance matrix # is singular. As a further test, since we have points in XYZ, the # principle components (Eigenvectors) of these directly relate to the # geometry of the points. Since it's a plane, we should be able to test # that the Eigenvector that corresponds to the smallest Eigenvalue is the # plane normal, specifically [0, 0, 1], since everything is in the XY plane # (as I've set it up above). To do this one would start by: # # evals, evecs = np.linalg.eigh(mcd_fit.covariance_) # normal = evecs[:, np.argmin(evals)] # # After which we need to assert that our `normal` is equal to [0, 0, 1]. # Do note that there is floating point error associated with this, so it's # best to subtract the two and then compare some small tolerance (e.g. # 1e-12). MinCovDet(random_state=rand_gen).fit(data)
Example #13
Source File: test_robust_covariance.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_mcd_support_covariance_is_zero(): # Check that MCD returns a ValueError with informative message when the # covariance of the support data is equal to 0. X_1 = np.array([0.5, 0.1, 0.1, 0.1, 0.957, 0.1, 0.1, 0.1, 0.4285, 0.1]) X_1 = X_1.reshape(-1, 1) X_2 = np.array([0.5, 0.3, 0.3, 0.3, 0.957, 0.3, 0.3, 0.3, 0.4285, 0.3]) X_2 = X_2.reshape(-1, 1) msg = ('The covariance matrix of the support data is equal to 0, try to ' 'increase support_fraction') for X in [X_1, X_2]: assert_raise_message(ValueError, msg, MinCovDet().fit, X)
Example #14
Source File: stats_of_cloth.py From TextileDefectDetection with GNU Affero General Public License v3.0 | 5 votes |
def compute_MCD_weft(weftsPickled, target_path): weft_points_list = floatPointList() for pickled_path in weftsPickled: weft_points_list.extend(pickle.load(open(pickled_path, "rb" ))) x_vals = [fp.x for fp in weft_points_list] y_vals = [fp.y for fp in weft_points_list] mean_hor_dist = weft_points_list.getMedianWeftDist() min_x = min(x_vals) + 1.5 * mean_hor_dist max_x = max(x_vals) - 1.5 * mean_hor_dist min_y = min(y_vals) + 1.5 * mean_hor_dist max_y = max(y_vals) - 1.5 * mean_hor_dist inner_points = floatPointList() for pt in weft_points_list: if min_x < pt.x < max_x and min_y < pt.y < max_y: inner_points.append(pt) X = np.zeros([len(inner_points), 3]) for idx, pt in enumerate(inner_points): X[idx,0] = pt.area X[idx,1] = pt.right_dist X[idx,2] = pt.left_dist Y = X[~(X<=0).any(axis=1)] robust_cov = MinCovDet(support_fraction=0.8).fit(Y) pickle.dump(robust_cov, open(target_path, "wb"))
Example #15
Source File: stats_of_cloth.py From TextileDefectDetection with GNU Affero General Public License v3.0 | 5 votes |
def compute_MCD_warp(warpsPickled, target_path): warp_points_list = floatPointList() for pickled_path in warpsPickled: warp_points_list.extend(pickle.load(open(pickled_path, "rb" ))) x_vals = [fp.x for fp in warp_points_list] y_vals = [fp.y for fp in warp_points_list] mean_ver_dist = warp_points_list.getMedianWarpDist() min_x = min(x_vals) + 1.5 * mean_ver_dist max_x = max(x_vals) - 1.5 * mean_ver_dist min_y = min(y_vals) + 1.5 * mean_ver_dist max_y = max(y_vals) - 1.5 * mean_ver_dist inner_points = floatPointList() for pt in warp_points_list: if min_x < pt.x < max_x and min_y < pt.y < max_y: inner_points.append(pt) #####CHANGED #print("attention, only 2D!!!!!") X = np.zeros([len(inner_points), 3]) for idx, pt in enumerate(inner_points): X[idx,0] = pt.area X[idx,1] = pt.lower_dist X[idx,2] = pt.upper_dist Y = X[~(X<=0).any(axis=1)] robust_cov = MinCovDet(support_fraction=0.8).fit(Y) pickle.dump(robust_cov, open(target_path, "wb"))
Example #16
Source File: test_covariance.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.covariance.EmpiricalCovariance, covariance.EmpiricalCovariance) self.assertIs(df.covariance.EllipticEnvelope, covariance.EllipticEnvelope) self.assertIs(df.covariance.GraphLasso, covariance.GraphLasso) self.assertIs(df.covariance.GraphLassoCV, covariance.GraphLassoCV) self.assertIs(df.covariance.LedoitWolf, covariance.LedoitWolf) self.assertIs(df.covariance.MinCovDet, covariance.MinCovDet) self.assertIs(df.covariance.OAS, covariance.OAS) self.assertIs(df.covariance.ShrunkCovariance, covariance.ShrunkCovariance) self.assertIs(df.covariance.shrunk_covariance, covariance.shrunk_covariance) self.assertIs(df.covariance.graph_lasso, covariance.graph_lasso)