Python sklearn.utils.extmath.row_norms() Examples
The following are 17
code examples of sklearn.utils.extmath.row_norms().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.utils.extmath
, or try the search function
.
Example #1
Source File: test_extmath.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_row_norms(): X = np.random.RandomState(42).randn(100, 100) for dtype in (np.float32, np.float64): if dtype is np.float32: precision = 4 else: precision = 5 X = X.astype(dtype) sq_norm = (X ** 2).sum(axis=1) assert_array_almost_equal(sq_norm, row_norms(X, squared=True), precision) assert_array_almost_equal(np.sqrt(sq_norm), row_norms(X), precision) Xcsr = sparse.csr_matrix(X, dtype=dtype) assert_array_almost_equal(sq_norm, row_norms(Xcsr, squared=True), precision) assert_array_almost_equal(np.sqrt(sq_norm), row_norms(Xcsr), precision)
Example #2
Source File: equal_groups.py From Same-Size-K-Means with BSD 3-Clause "New" or "Revised" License | 6 votes |
def predict(self, X): """Predict the closest cluster each sample in X belongs to. In the vector quantization literature, `cluster_centers_` is called the code book and each value returned by `predict` is the index of the closest code in the code book. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] New data to predict. Returns ------- labels : array, shape [n_samples,] Index of the cluster each sample belongs to. """ check_is_fitted(self, 'cluster_centers_') X = self._check_test_data(X) x_squared_norms = row_norms(X, squared=True) return _labels_inertia(X, x_squared_norms, self.cluster_centers_)[0]
Example #3
Source File: equal_groups.py From Same-Size-K-Means with BSD 3-Clause "New" or "Revised" License | 6 votes |
def score(self, X, y=None): """Opposite of the value of X on the K-means objective. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] New data. Returns ------- score : float Opposite of the value of X on the K-means objective. """ check_is_fitted(self, 'cluster_centers_') X = self._check_test_data(X) x_squared_norms = row_norms(X, squared=True) return -_labels_inertia(X, x_squared_norms, self.cluster_centers_)[1]
Example #4
Source File: _k_means_0_22.py From daal4py with Apache License 2.0 | 5 votes |
def predict(self, X, sample_weight=None): """Predict the closest cluster each sample in X belongs to. In the vector quantization literature, `cluster_centers_` is called the code book and each value returned by `predict` is the index of the closest code in the code book. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] New data to predict. sample_weight : array-like, shape (n_samples,), optional The weights for each observation in X. If None, all observations are assigned equal weight (default: None) Returns ------- labels : array, shape [n_samples,] Index of the cluster each sample belongs to. """ check_is_fitted(self) X = self._check_test_data(X) daal_ready = sample_weight is None and hasattr(X, '__array__') # or sp.isspmatrix_csr(X) if daal_ready: logging.info("sklearn.cluster.KMeans.predict: " + method_uses_daal) return _daal4py_k_means_predict(X, self.n_clusters, self.cluster_centers_)[0] else: logging.info("sklearn.cluster.KMeans.predict: " + method_uses_sklearn) x_squared_norms = row_norms(X, squared=True) return _labels_inertia(X, sample_weight, x_squared_norms, self.cluster_centers_)[0]
Example #5
Source File: test_k_means.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_labels_assignment_and_inertia(): # pure numpy implementation as easily auditable reference gold # implementation rng = np.random.RandomState(42) noisy_centers = centers + rng.normal(size=centers.shape) labels_gold = - np.ones(n_samples, dtype=np.int) mindist = np.empty(n_samples) mindist.fill(np.infty) for center_id in range(n_clusters): dist = np.sum((X - noisy_centers[center_id]) ** 2, axis=1) labels_gold[dist < mindist] = center_id mindist = np.minimum(dist, mindist) inertia_gold = mindist.sum() assert_true((mindist >= 0.0).all()) assert_true((labels_gold != -1).all()) # perform label assignment using the dense array input x_squared_norms = (X ** 2).sum(axis=1) labels_array, inertia_array = _labels_inertia( X, x_squared_norms, noisy_centers) assert_array_almost_equal(inertia_array, inertia_gold) assert_array_equal(labels_array, labels_gold) # perform label assignment using the sparse CSR input x_squared_norms_from_csr = row_norms(X_csr, squared=True) labels_csr, inertia_csr = _labels_inertia( X_csr, x_squared_norms_from_csr, noisy_centers) assert_array_almost_equal(inertia_csr, inertia_gold) assert_array_equal(labels_csr, labels_gold)
Example #6
Source File: utils.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def row_norms(X, squared=False): if isinstance(X, np.ndarray): return skm.row_norms(X, squared=squared) return X.map_blocks( skm.row_norms, chunks=(X.chunks[0],), drop_axis=1, squared=squared )
Example #7
Source File: _k_means_0_21.py From daal4py with Apache License 2.0 | 5 votes |
def predict(self, X, sample_weight=None): """Predict the closest cluster each sample in X belongs to. In the vector quantization literature, `cluster_centers_` is called the code book and each value returned by `predict` is the index of the closest code in the code book. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] New data to predict. sample_weight : array-like, shape (n_samples,), optional The weights for each observation in X. If None, all observations are assigned equal weight (default: None) Returns ------- labels : array, shape [n_samples,] Index of the cluster each sample belongs to. """ check_is_fitted(self, 'cluster_centers_') X = self._check_test_data(X) daal_ready = sample_weight is None and hasattr(X, '__array__') # or sp.isspmatrix_csr(X) if daal_ready: logging.info("sklearn.cluster.KMeans.predict: " + method_uses_daal) return _daal4py_k_means_predict(X, self.n_clusters, self.cluster_centers_)[0] else: logging.info("sklearn.cluster.KMeans.predict: " + method_uses_sklearn) x_squared_norms = row_norms(X, squared=True) return _labels_inertia(X, sample_weight, x_squared_norms, self.cluster_centers_)[0]
Example #8
Source File: test_extmath.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_row_norms(dtype): X = np.random.RandomState(42).randn(100, 100) if dtype is np.float32: precision = 4 else: precision = 5 X = X.astype(dtype, copy=False) sq_norm = (X ** 2).sum(axis=1) assert_array_almost_equal(sq_norm, row_norms(X, squared=True), precision) assert_array_almost_equal(np.sqrt(sq_norm), row_norms(X), precision) for csr_index_dtype in [np.int32, np.int64]: Xcsr = sparse.csr_matrix(X, dtype=dtype) # csr_matrix will use int32 indices by default, # up-casting those to int64 when necessary if csr_index_dtype is np.int64: Xcsr.indptr = Xcsr.indptr.astype(csr_index_dtype, copy=False) Xcsr.indices = Xcsr.indices.astype(csr_index_dtype, copy=False) assert Xcsr.indices.dtype == csr_index_dtype assert Xcsr.indptr.dtype == csr_index_dtype assert_array_almost_equal(sq_norm, row_norms(Xcsr, squared=True), precision) assert_array_almost_equal(np.sqrt(sq_norm), row_norms(Xcsr), precision)
Example #9
Source File: _k_means_0_23.py From daal4py with Apache License 2.0 | 5 votes |
def predict(self, X, sample_weight=None): """Predict the closest cluster each sample in X belongs to. In the vector quantization literature, `cluster_centers_` is called the code book and each value returned by `predict` is the index of the closest code in the code book. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] New data to predict. sample_weight : array-like, shape (n_samples,), optional The weights for each observation in X. If None, all observations are assigned equal weight (default: None) Returns ------- labels : array, shape [n_samples,] Index of the cluster each sample belongs to. """ check_is_fitted(self) X = self._check_test_data(X) daal_ready = sample_weight is None and hasattr(X, '__array__') # or sp.isspmatrix_csr(X) if daal_ready: logging.info("sklearn.cluster.KMeans.predict: " + method_uses_daal) return _daal4py_k_means_predict(X, self.n_clusters, self.cluster_centers_)[0] else: logging.info("sklearn.cluster.KMeans.predict: " + method_uses_sklearn) x_squared_norms = row_norms(X, squared=True) return _labels_inertia(X, sample_weight, x_squared_norms, self.cluster_centers_)[0]
Example #10
Source File: test_k_means.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_labels_assignment_and_inertia(): # pure numpy implementation as easily auditable reference gold # implementation rng = np.random.RandomState(42) noisy_centers = centers + rng.normal(size=centers.shape) labels_gold = np.full(n_samples, -1, dtype=np.int) mindist = np.empty(n_samples) mindist.fill(np.infty) for center_id in range(n_clusters): dist = np.sum((X - noisy_centers[center_id]) ** 2, axis=1) labels_gold[dist < mindist] = center_id mindist = np.minimum(dist, mindist) inertia_gold = mindist.sum() assert (mindist >= 0.0).all() assert (labels_gold != -1).all() sample_weight = None # perform label assignment using the dense array input x_squared_norms = (X ** 2).sum(axis=1) labels_array, inertia_array = _labels_inertia( X, sample_weight, x_squared_norms, noisy_centers) assert_array_almost_equal(inertia_array, inertia_gold) assert_array_equal(labels_array, labels_gold) # perform label assignment using the sparse CSR input x_squared_norms_from_csr = row_norms(X_csr, squared=True) labels_csr, inertia_csr = _labels_inertia( X_csr, sample_weight, x_squared_norms_from_csr, noisy_centers) assert_array_almost_equal(inertia_csr, inertia_gold) assert_array_equal(labels_csr, labels_gold)
Example #11
Source File: _coordinate_descent_0_21.py From daal4py with Apache License 2.0 | 4 votes |
def _daal4py_check(self, X, y, check_input): _fptype = getFPType(X) #check alpha if self.alpha == 0: warnings.warn("With alpha=0, this algorithm does not converge " "well. You are advised to use the LinearRegression " "estimator", stacklevel=2) #check precompute if isinstance(self.precompute, np.ndarray): if check_input: check_array(self.precompute, dtype=_fptype) self.precompute = make2d(self.precompute) #only for compliance with Sklearn if self.fit_intercept: X_offset = np.average(X, axis=0, weights=None) if self.normalize: X_scale = row_norms(X) if np.isscalar(X_scale): if X_scale == .0: X_scale = 1. elif isinstance(X_scale, np.ndarray): X_scale[X_scale == 0.0] = 1.0 else: X_scale = np.ones(X.shape[1], dtype=_fptype) else: X_offset = np.zeros(X.shape[1], dtype=_fptype) X_scale = np.ones(X.shape[1], dtype=_fptype) if (self.fit_intercept and not np.allclose(X_offset, np.zeros(X.shape[1])) or self.normalize and not np.allclose(X_scale, np.ones(X.shape[1]))): warnings.warn("Gram matrix was provided but X was centered" " to fit intercept, " "or X was normalized : recomputing Gram matrix.", UserWarning) else: if self.precompute not in [False, True, 'auto']: raise ValueError("precompute should be one of True, False, " "'auto' or array-like. Got %r" % self.precompute) #check selection if self.selection not in ['random', 'cyclic']: raise ValueError("selection should be either random or cyclic.")
Example #12
Source File: dis_sim.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License | 4 votes |
def fit(self, neigh_dist: np.ndarray, neigh_ind: np.ndarray, X: np.ndarray, assume_sorted: bool = True, *args, **kwargs) -> DisSimLocal: """ Fit the model using X, neigh_dist, and neigh_ind as training data. Parameters ---------- neigh_dist: np.ndarray, shape (n_samples, n_neighbors) Distance matrix of training objects (rows) against their individual k nearest neighbors (colums). neigh_ind: np.ndarray, shape (n_samples, n_neighbors) Neighbor indices corresponding to the values in neigh_dist. X: np.ndarray, shape (n_samples, n_features) Training data, where n_samples is the number of vectors, and n_features their dimensionality (number of features). assume_sorted: bool, default = True Assume input matrices are sorted according to neigh_dist. If False, these are sorted here. """ # Check equal number of rows and columns check_consistent_length(neigh_ind, neigh_dist) check_consistent_length(neigh_ind.T, neigh_dist.T) X = check_array(X) try: if self.k <= 0: raise ValueError(f"Expected k > 0. Got {self.k}") except TypeError: raise TypeError(f'Expected k: int > 0. Got {self.k}') k = self.k if k > neigh_ind.shape[1]: warnings.warn(f'Neighborhood parameter k larger than provided neighbors in neigh_dist, neigh_ind. ' f'Will reduce to k={neigh_ind.shape[1]}.') k = neigh_ind.shape[1] # Calculate local neighborhood centroids among the training points if assume_sorted: knn = neigh_ind[:, :k] else: mask = np.argpartition(neigh_dist, kth=k-1)[:, :k] knn = np.take_along_axis(neigh_ind, mask, axis=1) centroids = X[knn].mean(axis=1) dist_to_cent = row_norms(X - centroids, squared=True) self.X_train_ = X self.X_train_centroids_ = centroids self.X_train_dist_to_centroids_ = dist_to_cent return self
Example #13
Source File: _coordinate_descent_0_23.py From daal4py with Apache License 2.0 | 4 votes |
def _daal4py_check(self, X, y, check_input): _fptype = getFPType(X) #check alpha if self.alpha == 0: warnings.warn("With alpha=0, this algorithm does not converge " "well. You are advised to use the LinearRegression " "estimator", stacklevel=2) #check precompute if isinstance(self.precompute, np.ndarray): if check_input: check_array(self.precompute, dtype=_fptype) self.precompute = make2d(self.precompute) #only for compliance with Sklearn if self.fit_intercept: X_offset = np.average(X, axis=0, weights=None) if self.normalize: X_scale = row_norms(X) if np.isscalar(X_scale): if X_scale == .0: X_scale = 1. elif isinstance(X_scale, np.ndarray): X_scale[X_scale == 0.0] = 1.0 else: X_scale = np.ones(X.shape[1], dtype=_fptype) else: X_offset = np.zeros(X.shape[1], dtype=_fptype) X_scale = np.ones(X.shape[1], dtype=_fptype) if (self.fit_intercept and not np.allclose(X_offset, np.zeros(X.shape[1])) or self.normalize and not np.allclose(X_scale, np.ones(X.shape[1]))): warnings.warn("Gram matrix was provided but X was centered" " to fit intercept, " "or X was normalized : recomputing Gram matrix.", UserWarning) else: if self.precompute not in [False, True, 'auto']: raise ValueError("precompute should be one of True, False, " "'auto' or array-like. Got %r" % self.precompute) #check selection if self.selection not in ['random', 'cyclic']: raise ValueError("selection should be either random or cyclic.")
Example #14
Source File: factorization_machine.py From polylearn with BSD 2-Clause "Simplified" License | 4 votes |
def fit(self, X, y): """Fit factorization machine to training data. Parameters ---------- X : array-like or sparse, shape = [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape = [n_samples] Target values. Returns ------- self : Estimator Returns self. """ if self.degree > 3: raise ValueError("FMs with degree >3 not yet supported.") X, y = self._check_X_y(X, y) X = self._augment(X) n_features = X.shape[1] # augmented X_col_norms = row_norms(X.T, squared=True) dataset = get_dataset(X, order="fortran") rng = check_random_state(self.random_state) loss_obj = self._get_loss(self.loss) if not (self.warm_start and hasattr(self, 'w_')): self.w_ = np.zeros(n_features, dtype=np.double) if self.fit_lower == 'explicit': n_orders = self.degree - 1 else: n_orders = 1 if not (self.warm_start and hasattr(self, 'P_')): self.P_ = 0.01 * rng.randn(n_orders, self.n_components, n_features) if not (self.warm_start and hasattr(self, 'lams_')): if self.init_lambdas == 'ones': self.lams_ = np.ones(self.n_components) elif self.init_lambdas == 'random_signs': self.lams_ = np.sign(rng.randn(self.n_components)) else: raise ValueError("Lambdas must be initialized as ones " "(init_lambdas='ones') or as random " "+/- 1 (init_lambdas='random_signs').") y_pred = self._get_output(X) converged, self.n_iter_ = _cd_direct_ho( self.P_, self.w_, dataset, X_col_norms, y, y_pred, self.lams_, self.degree, self.alpha, self.beta, self.fit_linear, self.fit_lower == 'explicit', loss_obj, self.max_iter, self.tol, self.verbose) if not converged: warnings.warn("Objective did not converge. Increase max_iter.") return self
Example #15
Source File: test_algebra_onnx_operators.py From sklearn-onnx with MIT License | 4 votes |
def test_sub_kmeans(self): def conv(scope, operator, container): X = operator.inputs[0] out = operator.outputs op = operator.raw_operator C = op.cluster_centers_ C2 = row_norms(C, squared=True).astype(container.dtype) C = C.astype(container.dtype) rs = OnnxReduceSumSquare( X, axes=[1], keepdims=1, op_version=container.target_opset) N = X.type.shape[0] if isinstance(N, int): zeros = np.zeros((N, )) else: zeros = OnnxMul( rs, np.array([0], dtype=np.float32), op_version=container.target_opset) z = OnnxAdd( rs, OnnxGemm( X, C, zeros, alpha=-2., transB=1, op_version=container.target_opset), op_version=container.target_opset) y2 = OnnxAdd(C2, z, op_version=container.target_opset) lo = OnnxArgMin( y2, axis=1, keepdims=0, output_names=out[:1], op_version=container.target_opset) y2s = OnnxSqrt( y2, output_names=out[1:], op_version=container.target_opset) lo.add_to(scope, container) y2s.add_to(scope, container) data = load_iris() X = data.data model = KMeans(n_clusters=3) model.fit(X) model_onnx = convert_sklearn( model, 'a-kmeans', [('input', FloatTensorType([None, X.shape[1]]))], custom_conversion_functions={KMeans: conv}, target_opset=TARGET_OPSET) dump_data_and_model(X.astype(np.float32)[40:60], model, model_onnx, basename="SklearnKMeansCustom-Dec4")
Example #16
Source File: test_sag.py From twitter-stock-recommendation with MIT License | 4 votes |
def test_get_auto_step_size(): X = np.array([[1, 2, 3], [2, 3, 4], [2, 3, 2]], dtype=np.float64) alpha = 1.2 fit_intercept = False # sum the squares of the second sample because that's the largest max_squared_sum = 4 + 9 + 16 max_squared_sum_ = row_norms(X, squared=True).max() n_samples = X.shape[0] assert_almost_equal(max_squared_sum, max_squared_sum_, decimal=4) for saga in [True, False]: for fit_intercept in (True, False): if saga: L_sqr = (max_squared_sum + alpha + int(fit_intercept)) L_log = (max_squared_sum + 4.0 * alpha + int(fit_intercept)) / 4.0 mun_sqr = min(2 * n_samples * alpha, L_sqr) mun_log = min(2 * n_samples * alpha, L_log) step_size_sqr = 1 / (2 * L_sqr + mun_sqr) step_size_log = 1 / (2 * L_log + mun_log) else: step_size_sqr = 1.0 / (max_squared_sum + alpha + int(fit_intercept)) step_size_log = 4.0 / (max_squared_sum + 4.0 * alpha + int(fit_intercept)) step_size_sqr_ = get_auto_step_size(max_squared_sum_, alpha, "squared", fit_intercept, n_samples=n_samples, is_saga=saga) step_size_log_ = get_auto_step_size(max_squared_sum_, alpha, "log", fit_intercept, n_samples=n_samples, is_saga=saga) assert_almost_equal(step_size_sqr, step_size_sqr_, decimal=4) assert_almost_equal(step_size_log, step_size_log_, decimal=4) msg = 'Unknown loss function for SAG solver, got wrong instead of' assert_raise_message(ValueError, msg, get_auto_step_size, max_squared_sum_, alpha, "wrong", fit_intercept)
Example #17
Source File: test_sag.py From Mastering-Elasticsearch-7.0 with MIT License | 4 votes |
def test_get_auto_step_size(): X = np.array([[1, 2, 3], [2, 3, 4], [2, 3, 2]], dtype=np.float64) alpha = 1.2 fit_intercept = False # sum the squares of the second sample because that's the largest max_squared_sum = 4 + 9 + 16 max_squared_sum_ = row_norms(X, squared=True).max() n_samples = X.shape[0] assert_almost_equal(max_squared_sum, max_squared_sum_, decimal=4) for saga in [True, False]: for fit_intercept in (True, False): if saga: L_sqr = (max_squared_sum + alpha + int(fit_intercept)) L_log = (max_squared_sum + 4.0 * alpha + int(fit_intercept)) / 4.0 mun_sqr = min(2 * n_samples * alpha, L_sqr) mun_log = min(2 * n_samples * alpha, L_log) step_size_sqr = 1 / (2 * L_sqr + mun_sqr) step_size_log = 1 / (2 * L_log + mun_log) else: step_size_sqr = 1.0 / (max_squared_sum + alpha + int(fit_intercept)) step_size_log = 4.0 / (max_squared_sum + 4.0 * alpha + int(fit_intercept)) step_size_sqr_ = get_auto_step_size(max_squared_sum_, alpha, "squared", fit_intercept, n_samples=n_samples, is_saga=saga) step_size_log_ = get_auto_step_size(max_squared_sum_, alpha, "log", fit_intercept, n_samples=n_samples, is_saga=saga) assert_almost_equal(step_size_sqr, step_size_sqr_, decimal=4) assert_almost_equal(step_size_log, step_size_log_, decimal=4) msg = 'Unknown loss function for SAG solver, got wrong instead of' assert_raise_message(ValueError, msg, get_auto_step_size, max_squared_sum_, alpha, "wrong", fit_intercept)