Python sklearn.utils.extmath.stable_cumsum() Examples
The following are 11
code examples of sklearn.utils.extmath.stable_cumsum().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.utils.extmath
, or try the search function
.
Example #1
Source File: test_extmath.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_stable_cumsum(): assert_array_equal(stable_cumsum([1, 2, 3]), np.cumsum([1, 2, 3])) r = np.random.RandomState(0).rand(100000) assert_warns(RuntimeWarning, stable_cumsum, r, rtol=0, atol=0) # test axis parameter A = np.random.RandomState(36).randint(1000, size=(5, 5, 5)) assert_array_equal(stable_cumsum(A, axis=0), np.cumsum(A, axis=0)) assert_array_equal(stable_cumsum(A, axis=1), np.cumsum(A, axis=1)) assert_array_equal(stable_cumsum(A, axis=2), np.cumsum(A, axis=2))
Example #2
Source File: stats.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def _weighted_percentile(array, sample_weight, percentile=50): """ Compute the weighted ``percentile`` of ``array`` with ``sample_weight``. """ sorted_idx = np.argsort(array) # Find index of median prediction for each sample weight_cdf = stable_cumsum(sample_weight[sorted_idx]) percentile_idx = np.searchsorted( weight_cdf, (percentile / 100.) * weight_cdf[-1]) return array[sorted_idx[percentile_idx]]
Example #3
Source File: _pca_0_22.py From daal4py with Apache License 2.0 | 5 votes |
def _n_components_from_fraction(explained_variance_ratio, frac): # number of components for which the cumulated explained # variance percentage is superior to the desired threshold # side='right' ensures that number of features selected # their variance is always greater than n_components float # passed. More discussion in issue: #15669 ratio_cumsum = stable_cumsum(explained_variance_ratio) n_components = np.searchsorted(ratio_cumsum, frac, side='right') + 1 return n_components
Example #4
Source File: _pca_0_21.py From daal4py with Apache License 2.0 | 5 votes |
def _fit_full_daal4py(self, X, n_components): n_samples, n_features = X.shape # due to need to flip components, need to do full decomposition self._fit_daal4py(X, min(n_samples, n_features)) U = self._transform_daal4py(X, whiten=True, check_X=False, scale_eigenvalues=True) V = self.components_ U, V = svd_flip(U, V) U = U.copy() V = V.copy() S = self.singular_values_.copy() if n_components == 'mle': n_components = \ _infer_dimension_(self.explained_variance_, n_samples, n_features) elif 0 < n_components < 1.0: # number of components for which the cumulated explained # variance percentage is superior to the desired threshold ratio_cumsum = stable_cumsum(self.explained_variance_ratio_) n_components = np.searchsorted(ratio_cumsum, n_components) + 1 # Compute noise covariance using Probabilistic PCA model # The sigma2 maximum likelihood (cf. eq. 12.46) if n_components < min(n_features, n_samples): self.noise_variance_ = self.explained_variance_[n_components:].mean() else: self.noise_variance_ = 0. self.n_samples_, self.n_features_ = n_samples, n_features self.components_ = self.components_[:n_components] self.n_components_ = n_components self.explained_variance_ = self.explained_variance_[:n_components] self.explained_variance_ratio_ = \ self.explained_variance_ratio_[:n_components] self.singular_values_ = self.singular_values_[:n_components] return U, S, V
Example #5
Source File: _pca_0_23.py From daal4py with Apache License 2.0 | 5 votes |
def _n_components_from_fraction(explained_variance_ratio, frac): # number of components for which the cumulated explained # variance percentage is superior to the desired threshold # side='right' ensures that number of features selected # their variance is always greater than n_components float # passed. More discussion in issue: #15669 ratio_cumsum = stable_cumsum(explained_variance_ratio) n_components = np.searchsorted(ratio_cumsum, frac, side='right') + 1 return n_components
Example #6
Source File: stats.py From twitter-stock-recommendation with MIT License | 5 votes |
def _weighted_percentile(array, sample_weight, percentile=50): """ Compute the weighted ``percentile`` of ``array`` with ``sample_weight``. """ sorted_idx = np.argsort(array) # Find index of median prediction for each sample weight_cdf = stable_cumsum(sample_weight[sorted_idx]) percentile_idx = np.searchsorted( weight_cdf, (percentile / 100.) * weight_cdf[-1]) return array[sorted_idx[percentile_idx]]
Example #7
Source File: test_extmath.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_stable_cumsum(): if np_version < (1, 9): raise SkipTest("Sum is as unstable as cumsum for numpy < 1.9") assert_array_equal(stable_cumsum([1, 2, 3]), np.cumsum([1, 2, 3])) r = np.random.RandomState(0).rand(100000) assert_warns(RuntimeWarning, stable_cumsum, r, rtol=0, atol=0) # test axis parameter A = np.random.RandomState(36).randint(1000, size=(5, 5, 5)) assert_array_equal(stable_cumsum(A, axis=0), np.cumsum(A, axis=0)) assert_array_equal(stable_cumsum(A, axis=1), np.cumsum(A, axis=1)) assert_array_equal(stable_cumsum(A, axis=2), np.cumsum(A, axis=2))
Example #8
Source File: metrics.py From scikit-uplift with MIT License | 4 votes |
def uplift_curve(y_true, uplift, treatment): """Compute Uplift curve. For computing the area under the Uplift Curve, see :func:`.uplift_auc_score`. Args: y_true (1d array-like): Correct (true) target values. uplift (1d array-like): Predicted uplift, as returned by a model. treatment (1d array-like): Treatment labels. Returns: array (shape = [>2]), array (shape = [>2]): Points on a curve. See also: :func:`.uplift_auc_score`: Compute normalized Area Under the Uplift curve from prediction scores. :func:`.perfect_uplift_curve`: Compute the perfect Uplift curve. :func:`.plot_uplift_curve`: Plot Uplift curves from predictions. :func:`.qini_curve`: Compute Qini curve. References: Devriendt, F., Guns, T., & Verbeke, W. (2020). Learning to rank for uplift modeling. ArXiv, abs/2002.05897. """ # TODO: check the treatment is binary y_true, uplift, treatment = np.array(y_true), np.array(uplift), np.array(treatment) desc_score_indices = np.argsort(uplift, kind="mergesort")[::-1] y_true, uplift, treatment = y_true[desc_score_indices], uplift[desc_score_indices], treatment[desc_score_indices] y_true_ctrl, y_true_trmnt = y_true.copy(), y_true.copy() y_true_ctrl[treatment == 1] = 0 y_true_trmnt[treatment == 0] = 0 distinct_value_indices = np.where(np.diff(uplift))[0] threshold_indices = np.r_[distinct_value_indices, uplift.size - 1] num_trmnt = stable_cumsum(treatment)[threshold_indices] y_trmnt = stable_cumsum(y_true_trmnt)[threshold_indices] num_all = threshold_indices + 1 num_ctrl = num_all - num_trmnt y_ctrl = stable_cumsum(y_true_ctrl)[threshold_indices] curve_values = (np.divide(y_trmnt, num_trmnt, out=np.zeros_like(y_trmnt), where=num_trmnt != 0) - np.divide(y_ctrl, num_ctrl, out=np.zeros_like(y_ctrl), where=num_ctrl != 0)) * num_all if num_all.size == 0 or curve_values[0] != 0 or num_all[0] != 0: # Add an extra threshold position if necessary # to make sure that the curve starts at (0, 0) num_all = np.r_[0, num_all] curve_values = np.r_[0, curve_values] return num_all, curve_values
Example #9
Source File: metrics.py From scikit-uplift with MIT License | 4 votes |
def qini_curve(y_true, uplift, treatment): """Compute Qini curve. For computing the area under the Qini Curve, see :func:`.qini_auc_score`. Args: y_true (1d array-like): Correct (true) target values. uplift (1d array-like): Predicted uplift, as returned by a model. treatment (1d array-like): Treatment labels. Returns: array (shape = [>2]), array (shape = [>2]): Points on a curve. See also: :func:`.uplift_curve`: Compute the area under the Qini curve. :func:`.perfect_qini_curve`: Compute the perfect Qini curve. :func:`.plot_qini_curves`: Plot Qini curves from predictions.. :func:`.uplift_curve`: Compute Uplift curve. References: Nicholas J Radcliffe. (2007). Using control groups to target on predicted lift: Building and assessing uplift model. Direct Marketing Analytics Journal, (3):14–21, 2007. Devriendt, F., Guns, T., & Verbeke, W. (2020). Learning to rank for uplift modeling. ArXiv, abs/2002.05897. """ # TODO: check the treatment is binary y_true, uplift, treatment = np.array(y_true), np.array(uplift), np.array(treatment) desc_score_indices = np.argsort(uplift, kind="mergesort")[::-1] y_true = y_true[desc_score_indices] treatment = treatment[desc_score_indices] uplift = uplift[desc_score_indices] y_true_ctrl, y_true_trmnt = y_true.copy(), y_true.copy() y_true_ctrl[treatment == 1] = 0 y_true_trmnt[treatment == 0] = 0 distinct_value_indices = np.where(np.diff(uplift))[0] threshold_indices = np.r_[distinct_value_indices, uplift.size - 1] num_trmnt = stable_cumsum(treatment)[threshold_indices] y_trmnt = stable_cumsum(y_true_trmnt)[threshold_indices] num_all = threshold_indices + 1 num_ctrl = num_all - num_trmnt y_ctrl = stable_cumsum(y_true_ctrl)[threshold_indices] curve_values = y_trmnt - y_ctrl * np.divide(num_trmnt, num_ctrl, out=np.zeros_like(num_trmnt), where=num_ctrl != 0) if num_all.size == 0 or curve_values[0] != 0 or num_all[0] != 0: # Add an extra threshold position if necessary # to make sure that the curve starts at (0, 0) num_all = np.r_[0, num_all] curve_values = np.r_[0, curve_values] return num_all, curve_values
Example #10
Source File: _pca_0_21.py From daal4py with Apache License 2.0 | 4 votes |
def _fit_daal4py(self, X, n_components): n_samples, n_features = X.shape n_sf_min = min(n_samples, n_features) _validate_n_components(n_components, n_samples, n_features) if n_components == 'mle': daal_n_components = n_features elif n_components < 1: daal_n_components = n_sf_min else: daal_n_components = n_components fpType = getFPType(X) centering_algo = daal4py.normalization_zscore( fptype=fpType, doScale=False) pca_alg = daal4py.pca( fptype=fpType, method='svdDense', normalization=centering_algo, resultsToCompute='mean|variance|eigenvalue', isDeterministic=True, nComponents=daal_n_components ) pca_res = pca_alg.compute(X) self.mean_ = pca_res.means.ravel() variances_ = pca_res.variances.ravel() components_ = pca_res.eigenvectors explained_variance_ = pca_res.eigenvalues.ravel() tot_var = explained_variance_.sum() explained_variance_ratio_ = explained_variance_ / tot_var if n_components == 'mle': n_components = \ _infer_dimension_(explained_variance_, n_samples, n_features) elif 0 < n_components < 1.0: # number of components for which the cumulated explained # variance percentage is superior to the desired threshold ratio_cumsum = stable_cumsum(explained_variance_ratio_) n_components = np.searchsorted(ratio_cumsum, n_components) + 1 # Compute noise covariance using Probabilistic PCA model # The sigma2 maximum likelihood (cf. eq. 12.46) if n_components < n_sf_min: if explained_variance_.shape[0] == n_sf_min: self.noise_variance_ = explained_variance_[n_components:].mean() else: resid_var_ = variances_.sum() resid_var_ -= explained_variance_[:n_components].sum() self.noise_variance_ = resid_var_ / (n_sf_min - n_components) else: self.noise_variance_ = 0. self.n_samples_, self.n_features_ = n_samples, n_features self.components_ = components_[:n_components] self.n_components_ = n_components self.explained_variance_ = explained_variance_[:n_components] self.explained_variance_ratio_ = \ explained_variance_ratio_[:n_components] self.singular_values_ = np.sqrt((n_samples - 1) * self.explained_variance_)
Example #11
Source File: _pca_0_21.py From daal4py with Apache License 2.0 | 4 votes |
def _fit_full_vanilla(self, X, n_components): """Fit the model by computing full SVD on X""" n_samples, n_features = X.shape # Center data self.mean_ = np.mean(X, axis=0) X -= self.mean_ U, S, V = np.linalg.svd(X, full_matrices=False) # flip eigenvectors' sign to enforce deterministic output U, V = svd_flip(U, V) components_ = V # Get variance explained by singular values explained_variance_ = (S ** 2) / (n_samples - 1) total_var = explained_variance_.sum() explained_variance_ratio_ = explained_variance_ / total_var # Postprocess the number of components required if n_components == 'mle': n_components = \ _infer_dimension_(explained_variance_, n_samples, n_features) elif 0 < n_components < 1.0: # number of components for which the cumulated explained # variance percentage is superior to the desired threshold ratio_cumsum = stable_cumsum(explained_variance_ratio_) n_components = np.searchsorted(ratio_cumsum, n_components) + 1 # Compute noise covariance using Probabilistic PCA model # The sigma2 maximum likelihood (cf. eq. 12.46) if n_components < min(n_features, n_samples): self.noise_variance_ = explained_variance_[n_components:].mean() else: self.noise_variance_ = 0. self.n_samples_, self.n_features_ = n_samples, n_features self.components_ = components_[:n_components] self.n_components_ = n_components self.explained_variance_ = explained_variance_[:n_components] self.explained_variance_ratio_ = \ explained_variance_ratio_[:n_components] self.singular_values_ = S[:n_components] return U, S, V