Python sklearn.utils.compute_class_weight() Examples
The following are 13
code examples of sklearn.utils.compute_class_weight().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.utils
, or try the search function
.
Example #1
Source File: test_estimator_checks.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def fit(self, X, y): from sklearn.preprocessing import LabelEncoder from sklearn.utils import compute_class_weight label_encoder = LabelEncoder().fit(y) classes = label_encoder.classes_ class_weight = compute_class_weight(self.class_weight, classes, y) # Intentionally modify the balanced class_weight # to simulate a bug and raise an exception if self.class_weight == "balanced": class_weight += 1. # Simply assigning coef_ to the class_weight self.coef_ = class_weight return self
Example #2
Source File: test_logistic.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def _compute_class_weight_dictionary(y): # helper for returning a dictionary instead of an array classes = np.unique(y) class_weight = compute_class_weight("balanced", classes, y) class_weight_dict = dict(zip(classes, class_weight)) return class_weight_dict
Example #3
Source File: struct_models.py From marseille with BSD 3-Clause "New" or "Revised" License | 6 votes |
def initialize_labels(self, Y): y_nodes_flat = [y_val for y in Y for y_val in y.nodes] y_links_flat = [y_val for y in Y for y_val in y.links] self.prop_encoder_ = LabelEncoder().fit(y_nodes_flat) self.link_encoder_ = LabelEncoder().fit(y_links_flat) self.n_prop_states = len(self.prop_encoder_.classes_) self.n_link_states = len(self.link_encoder_.classes_) self.prop_cw_ = np.ones_like(self.prop_encoder_.classes_, dtype=np.double) self.link_cw_ = compute_class_weight(self.class_weight, self.link_encoder_.classes_, y_links_flat) self.link_cw_ /= self.link_cw_.min() logging.info('Setting node class weights {}'.format(", ".join( "{}: {}".format(lbl, cw) for lbl, cw in zip( self.prop_encoder_.classes_, self.prop_cw_)))) logging.info('Setting link class weights {}'.format(", ".join( "{}: {}".format(lbl, cw) for lbl, cw in zip( self.link_encoder_.classes_, self.link_cw_))))
Example #4
Source File: test_svm.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_auto_weight(): # Test class weights for imbalanced data from sklearn.linear_model import LogisticRegression # We take as dataset the two-dimensional projection of iris so # that it is not separable and remove half of predictors from # class 1. # We add one to the targets as a non-regression test: class_weight="balanced" # used to work only when the labels where a range [0..K). from sklearn.utils import compute_class_weight X, y = iris.data[:, :2], iris.target + 1 unbalanced = np.delete(np.arange(y.size), np.where(y > 2)[0][::2]) classes = np.unique(y[unbalanced]) class_weights = compute_class_weight('balanced', classes, y[unbalanced]) assert_true(np.argmax(class_weights) == 2) for clf in (svm.SVC(kernel='linear'), svm.LinearSVC(random_state=0), LogisticRegression()): # check that score is better when class='balanced' is set. y_pred = clf.fit(X[unbalanced], y[unbalanced]).predict(X) clf.set_params(class_weight='balanced') y_pred_balanced = clf.fit(X[unbalanced], y[unbalanced],).predict(X) assert_true(metrics.f1_score(y, y_pred, average='macro') <= metrics.f1_score(y, y_pred_balanced, average='macro'))
Example #5
Source File: test_svm.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_auto_weight(): # Test class weights for imbalanced data from sklearn.linear_model import LogisticRegression # We take as dataset the two-dimensional projection of iris so # that it is not separable and remove half of predictors from # class 1. # We add one to the targets as a non-regression test: # class_weight="balanced" # used to work only when the labels where a range [0..K). from sklearn.utils import compute_class_weight X, y = iris.data[:, :2], iris.target + 1 unbalanced = np.delete(np.arange(y.size), np.where(y > 2)[0][::2]) classes = np.unique(y[unbalanced]) class_weights = compute_class_weight('balanced', classes, y[unbalanced]) assert np.argmax(class_weights) == 2 for clf in (svm.SVC(kernel='linear'), svm.LinearSVC(random_state=0), LogisticRegression()): # check that score is better when class='balanced' is set. y_pred = clf.fit(X[unbalanced], y[unbalanced]).predict(X) clf.set_params(class_weight='balanced') y_pred_balanced = clf.fit(X[unbalanced], y[unbalanced],).predict(X) assert (metrics.f1_score(y, y_pred, average='macro') <= metrics.f1_score(y, y_pred_balanced, average='macro'))
Example #6
Source File: data_preparation.py From keras-utilities with MIT License | 5 votes |
def get_class_weights(y): """ Returns the normalized weights for each class based on the frequencies of the samples :param y: list of true labels (the labels must be hashable) :return: dictionary with the weight for each class """ weights = compute_class_weight('balanced', numpy.unique(y), y) d = {c: w for c, w in zip(numpy.unique(y), weights)} return d
Example #7
Source File: training.py From ntua-slp-semeval2018 with MIT License | 5 votes |
def get_class_weights(y): """ Returns the normalized weights for each class based on the frequencies of the samples :param y: list of true labels (the labels must be hashable) :return: dictionary with the weight for each class """ weights = compute_class_weight('balanced', numpy.unique(y), y) d = {c: w for c, w in zip(numpy.unique(y), weights)} return d
Example #8
Source File: test_logistic.py From twitter-stock-recommendation with MIT License | 5 votes |
def _compute_class_weight_dictionary(y): # helper for returning a dictionary instead of an array classes = np.unique(y) class_weight = compute_class_weight("balanced", classes, y) class_weight_dict = dict(zip(classes, class_weight)) return class_weight_dict
Example #9
Source File: test_sag.py From Mastering-Elasticsearch-7.0 with MIT License | 4 votes |
def test_binary_classifier_class_weight(): """tests binary classifier with classweights for each class""" alpha = .1 n_samples = 50 n_iter = 20 tol = .00001 fit_intercept = True X, y = make_blobs(n_samples=n_samples, centers=2, random_state=10, cluster_std=0.1) step_size = get_step_size(X, alpha, fit_intercept, classification=True) classes = np.unique(y) y_tmp = np.ones(n_samples) y_tmp[y != classes[1]] = -1 y = y_tmp class_weight = {1: .45, -1: .55} clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples, max_iter=n_iter, tol=tol, random_state=77, fit_intercept=fit_intercept, multi_class='ovr', class_weight=class_weight) clf2 = clone(clf1) clf1.fit(X, y) clf2.fit(sp.csr_matrix(X), y) le = LabelEncoder() class_weight_ = compute_class_weight(class_weight, np.unique(y), y) sample_weight = class_weight_[le.fit_transform(y)] spweights, spintercept = sag_sparse(X, y, step_size, alpha, n_iter=n_iter, dloss=log_dloss, sample_weight=sample_weight, fit_intercept=fit_intercept) spweights2, spintercept2 = sag_sparse(X, y, step_size, alpha, n_iter=n_iter, dloss=log_dloss, sparse=True, sample_weight=sample_weight, fit_intercept=fit_intercept) assert_array_almost_equal(clf1.coef_.ravel(), spweights.ravel(), decimal=2) assert_almost_equal(clf1.intercept_, spintercept, decimal=1) assert_array_almost_equal(clf2.coef_.ravel(), spweights2.ravel(), decimal=2) assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)
Example #10
Source File: test_sag.py From Mastering-Elasticsearch-7.0 with MIT License | 4 votes |
def test_multiclass_classifier_class_weight(): """tests multiclass with classweights for each class""" alpha = .1 n_samples = 20 tol = .00001 max_iter = 50 class_weight = {0: .45, 1: .55, 2: .75} fit_intercept = True X, y = make_blobs(n_samples=n_samples, centers=3, random_state=0, cluster_std=0.1) step_size = get_step_size(X, alpha, fit_intercept, classification=True) classes = np.unique(y) clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples, max_iter=max_iter, tol=tol, random_state=77, fit_intercept=fit_intercept, multi_class='ovr', class_weight=class_weight) clf2 = clone(clf1) clf1.fit(X, y) clf2.fit(sp.csr_matrix(X), y) le = LabelEncoder() class_weight_ = compute_class_weight(class_weight, np.unique(y), y) sample_weight = class_weight_[le.fit_transform(y)] coef1 = [] intercept1 = [] coef2 = [] intercept2 = [] for cl in classes: y_encoded = np.ones(n_samples) y_encoded[y != cl] = -1 spweights1, spintercept1 = sag_sparse(X, y_encoded, step_size, alpha, n_iter=max_iter, dloss=log_dloss, sample_weight=sample_weight) spweights2, spintercept2 = sag_sparse(X, y_encoded, step_size, alpha, n_iter=max_iter, dloss=log_dloss, sample_weight=sample_weight, sparse=True) coef1.append(spweights1) intercept1.append(spintercept1) coef2.append(spweights2) intercept2.append(spintercept2) coef1 = np.vstack(coef1) intercept1 = np.array(intercept1) coef2 = np.vstack(coef2) intercept2 = np.array(intercept2) for i, cl in enumerate(classes): assert_array_almost_equal(clf1.coef_[i].ravel(), coef1[i].ravel(), decimal=2) assert_almost_equal(clf1.intercept_[i], intercept1[i], decimal=1) assert_array_almost_equal(clf2.coef_[i].ravel(), coef2[i].ravel(), decimal=2) assert_almost_equal(clf2.intercept_[i], intercept2[i], decimal=1)
Example #11
Source File: log_reg_classifier.py From snips-nlu with Apache License 2.0 | 4 votes |
def fit(self, dataset): """Fits the intent classifier with a valid Snips dataset Returns: :class:`LogRegIntentClassifier`: The same instance, trained """ from sklearn.linear_model import SGDClassifier from sklearn.utils import compute_class_weight logger.info("Fitting LogRegIntentClassifier...") dataset = validate_and_format_dataset(dataset) self.load_resources_if_needed(dataset[LANGUAGE]) self.fit_builtin_entity_parser_if_needed(dataset) self.fit_custom_entity_parser_if_needed(dataset) language = dataset[LANGUAGE] data_augmentation_config = self.config.data_augmentation_config utterances, classes, intent_list = build_training_data( dataset, language, data_augmentation_config, self.resources, self.random_state) self.intent_list = intent_list if len(self.intent_list) <= 1: return self self.featurizer = Featurizer( config=self.config.featurizer_config, builtin_entity_parser=self.builtin_entity_parser, custom_entity_parser=self.custom_entity_parser, resources=self.resources, random_state=self.random_state, ) self.featurizer.language = language none_class = max(classes) try: x = self.featurizer.fit_transform( dataset, utterances, classes, none_class) except _EmptyDatasetUtterancesError: logger.warning("No (non-empty) utterances found in dataset") self.featurizer = None return self alpha = get_regularization_factor(dataset) class_weights_arr = compute_class_weight( "balanced", range(none_class + 1), classes) # Re-weight the noise class class_weights_arr[-1] *= self.config.noise_reweight_factor class_weight = {idx: w for idx, w in enumerate(class_weights_arr)} self.classifier = SGDClassifier( random_state=self.random_state, alpha=alpha, class_weight=class_weight, **LOG_REG_ARGS) self.classifier.fit(x, classes) logger.debug("%s", DifferedLoggingMessage(self.log_best_features)) return self
Example #12
Source File: test_sag.py From twitter-stock-recommendation with MIT License | 4 votes |
def test_binary_classifier_class_weight(): """tests binary classifier with classweights for each class""" alpha = .1 n_samples = 50 n_iter = 20 tol = .00001 fit_intercept = True X, y = make_blobs(n_samples=n_samples, centers=2, random_state=10, cluster_std=0.1) step_size = get_step_size(X, alpha, fit_intercept, classification=True) classes = np.unique(y) y_tmp = np.ones(n_samples) y_tmp[y != classes[1]] = -1 y = y_tmp class_weight = {1: .45, -1: .55} clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples, max_iter=n_iter, tol=tol, random_state=77, fit_intercept=fit_intercept, class_weight=class_weight) clf2 = clone(clf1) clf1.fit(X, y) clf2.fit(sp.csr_matrix(X), y) le = LabelEncoder() class_weight_ = compute_class_weight(class_weight, np.unique(y), y) sample_weight = class_weight_[le.fit_transform(y)] spweights, spintercept = sag_sparse(X, y, step_size, alpha, n_iter=n_iter, dloss=log_dloss, sample_weight=sample_weight, fit_intercept=fit_intercept) spweights2, spintercept2 = sag_sparse(X, y, step_size, alpha, n_iter=n_iter, dloss=log_dloss, sparse=True, sample_weight=sample_weight, fit_intercept=fit_intercept) assert_array_almost_equal(clf1.coef_.ravel(), spweights.ravel(), decimal=2) assert_almost_equal(clf1.intercept_, spintercept, decimal=1) assert_array_almost_equal(clf2.coef_.ravel(), spweights2.ravel(), decimal=2) assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)
Example #13
Source File: test_sag.py From twitter-stock-recommendation with MIT License | 4 votes |
def test_multiclass_classifier_class_weight(): """tests multiclass with classweights for each class""" alpha = .1 n_samples = 20 tol = .00001 max_iter = 50 class_weight = {0: .45, 1: .55, 2: .75} fit_intercept = True X, y = make_blobs(n_samples=n_samples, centers=3, random_state=0, cluster_std=0.1) step_size = get_step_size(X, alpha, fit_intercept, classification=True) classes = np.unique(y) clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples, max_iter=max_iter, tol=tol, random_state=77, fit_intercept=fit_intercept, class_weight=class_weight) clf2 = clone(clf1) clf1.fit(X, y) clf2.fit(sp.csr_matrix(X), y) le = LabelEncoder() class_weight_ = compute_class_weight(class_weight, np.unique(y), y) sample_weight = class_weight_[le.fit_transform(y)] coef1 = [] intercept1 = [] coef2 = [] intercept2 = [] for cl in classes: y_encoded = np.ones(n_samples) y_encoded[y != cl] = -1 spweights1, spintercept1 = sag_sparse(X, y_encoded, step_size, alpha, n_iter=max_iter, dloss=log_dloss, sample_weight=sample_weight) spweights2, spintercept2 = sag_sparse(X, y_encoded, step_size, alpha, n_iter=max_iter, dloss=log_dloss, sample_weight=sample_weight, sparse=True) coef1.append(spweights1) intercept1.append(spintercept1) coef2.append(spweights2) intercept2.append(spintercept2) coef1 = np.vstack(coef1) intercept1 = np.array(intercept1) coef2 = np.vstack(coef2) intercept2 = np.array(intercept2) for i, cl in enumerate(classes): assert_array_almost_equal(clf1.coef_[i].ravel(), coef1[i].ravel(), decimal=2) assert_almost_equal(clf1.intercept_[i], intercept1[i], decimal=1) assert_array_almost_equal(clf2.coef_[i].ravel(), coef2[i].ravel(), decimal=2) assert_almost_equal(clf2.intercept_[i], intercept2[i], decimal=1)