Python Examples of sklearn.utils.compute_class

Source File: test_estimator_checks.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def fit(self, X, y):
        from sklearn.preprocessing import LabelEncoder
        from sklearn.utils import compute_class_weight

        label_encoder = LabelEncoder().fit(y)
        classes = label_encoder.classes_
        class_weight = compute_class_weight(self.class_weight, classes, y)

        # Intentionally modify the balanced class_weight
        # to simulate a bug and raise an exception
        if self.class_weight == "balanced":
            class_weight += 1.

        # Simply assigning coef_ to the class_weight
        self.coef_ = class_weight
        return self

Source File: test_logistic.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def _compute_class_weight_dictionary(y):
    # helper for returning a dictionary instead of an array
    classes = np.unique(y)
    class_weight = compute_class_weight("balanced", classes, y)
    class_weight_dict = dict(zip(classes, class_weight))
    return class_weight_dict

Source File: struct_models.py From marseille with BSD 3-Clause "New" or "Revised" License

6 votes

def initialize_labels(self, Y):

        y_nodes_flat = [y_val for y in Y for y_val in y.nodes]
        y_links_flat = [y_val for y in Y for y_val in y.links]
        self.prop_encoder_ = LabelEncoder().fit(y_nodes_flat)
        self.link_encoder_ = LabelEncoder().fit(y_links_flat)

        self.n_prop_states = len(self.prop_encoder_.classes_)
        self.n_link_states = len(self.link_encoder_.classes_)

        self.prop_cw_ = np.ones_like(self.prop_encoder_.classes_,
                                     dtype=np.double)
        self.link_cw_ = compute_class_weight(self.class_weight,
                                             self.link_encoder_.classes_,
                                             y_links_flat)

        self.link_cw_ /= self.link_cw_.min()

        logging.info('Setting node class weights {}'.format(", ".join(
            "{}: {}".format(lbl, cw) for lbl, cw in zip(
                self.prop_encoder_.classes_, self.prop_cw_))))

        logging.info('Setting link class weights {}'.format(", ".join(
            "{}: {}".format(lbl, cw) for lbl, cw in zip(
                self.link_encoder_.classes_, self.link_cw_))))

Source File: test_svm.py From twitter-stock-recommendation with MIT License

6 votes

def test_auto_weight():
    # Test class weights for imbalanced data
    from sklearn.linear_model import LogisticRegression
    # We take as dataset the two-dimensional projection of iris so
    # that it is not separable and remove half of predictors from
    # class 1.
    # We add one to the targets as a non-regression test: class_weight="balanced"
    # used to work only when the labels where a range [0..K).
    from sklearn.utils import compute_class_weight
    X, y = iris.data[:, :2], iris.target + 1
    unbalanced = np.delete(np.arange(y.size), np.where(y > 2)[0][::2])

    classes = np.unique(y[unbalanced])
    class_weights = compute_class_weight('balanced', classes, y[unbalanced])
    assert_true(np.argmax(class_weights) == 2)

    for clf in (svm.SVC(kernel='linear'), svm.LinearSVC(random_state=0),
                LogisticRegression()):
        # check that score is better when class='balanced' is set.
        y_pred = clf.fit(X[unbalanced], y[unbalanced]).predict(X)
        clf.set_params(class_weight='balanced')
        y_pred_balanced = clf.fit(X[unbalanced], y[unbalanced],).predict(X)
        assert_true(metrics.f1_score(y, y_pred, average='macro')
                    <= metrics.f1_score(y, y_pred_balanced,
                                        average='macro'))

Source File: test_svm.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_auto_weight():
    # Test class weights for imbalanced data
    from sklearn.linear_model import LogisticRegression
    # We take as dataset the two-dimensional projection of iris so
    # that it is not separable and remove half of predictors from
    # class 1.
    # We add one to the targets as a non-regression test:
    # class_weight="balanced"
    # used to work only when the labels where a range [0..K).
    from sklearn.utils import compute_class_weight
    X, y = iris.data[:, :2], iris.target + 1
    unbalanced = np.delete(np.arange(y.size), np.where(y > 2)[0][::2])

    classes = np.unique(y[unbalanced])
    class_weights = compute_class_weight('balanced', classes, y[unbalanced])
    assert np.argmax(class_weights) == 2

    for clf in (svm.SVC(kernel='linear'), svm.LinearSVC(random_state=0),
                LogisticRegression()):
        # check that score is better when class='balanced' is set.
        y_pred = clf.fit(X[unbalanced], y[unbalanced]).predict(X)
        clf.set_params(class_weight='balanced')
        y_pred_balanced = clf.fit(X[unbalanced], y[unbalanced],).predict(X)
        assert (metrics.f1_score(y, y_pred, average='macro')
                <= metrics.f1_score(y, y_pred_balanced,
                                    average='macro'))

Source File: data_preparation.py From keras-utilities with MIT License

5 votes

def get_class_weights(y):
    """
    Returns the normalized weights for each class based on the frequencies of the samples
    :param y: list of true labels (the labels must be hashable)
    :return: dictionary with the weight for each class
    """

    weights = compute_class_weight('balanced', numpy.unique(y), y)

    d = {c: w for c, w in zip(numpy.unique(y), weights)}

    return d

Source File: training.py From ntua-slp-semeval2018 with MIT License

5 votes

def get_class_weights(y):
    """
    Returns the normalized weights for each class
    based on the frequencies of the samples
    :param y: list of true labels (the labels must be hashable)
    :return: dictionary with the weight for each class
    """

    weights = compute_class_weight('balanced', numpy.unique(y), y)

    d = {c: w for c, w in zip(numpy.unique(y), weights)}

    return d

Source File: test_logistic.py From twitter-stock-recommendation with MIT License

5 votes

def _compute_class_weight_dictionary(y):
    # helper for returning a dictionary instead of an array
    classes = np.unique(y)
    class_weight = compute_class_weight("balanced", classes, y)
    class_weight_dict = dict(zip(classes, class_weight))
    return class_weight_dict

Source File: test_sag.py From Mastering-Elasticsearch-7.0 with MIT License

4 votes

def test_binary_classifier_class_weight():
    """tests binary classifier with classweights for each class"""
    alpha = .1
    n_samples = 50
    n_iter = 20
    tol = .00001
    fit_intercept = True
    X, y = make_blobs(n_samples=n_samples, centers=2, random_state=10,
                      cluster_std=0.1)
    step_size = get_step_size(X, alpha, fit_intercept, classification=True)
    classes = np.unique(y)
    y_tmp = np.ones(n_samples)
    y_tmp[y != classes[1]] = -1
    y = y_tmp

    class_weight = {1: .45, -1: .55}
    clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
                              max_iter=n_iter, tol=tol, random_state=77,
                              fit_intercept=fit_intercept, multi_class='ovr',
                              class_weight=class_weight)
    clf2 = clone(clf1)

    clf1.fit(X, y)
    clf2.fit(sp.csr_matrix(X), y)

    le = LabelEncoder()
    class_weight_ = compute_class_weight(class_weight, np.unique(y), y)
    sample_weight = class_weight_[le.fit_transform(y)]
    spweights, spintercept = sag_sparse(X, y, step_size, alpha, n_iter=n_iter,
                                        dloss=log_dloss,
                                        sample_weight=sample_weight,
                                        fit_intercept=fit_intercept)
    spweights2, spintercept2 = sag_sparse(X, y, step_size, alpha,
                                          n_iter=n_iter,
                                          dloss=log_dloss, sparse=True,
                                          sample_weight=sample_weight,
                                          fit_intercept=fit_intercept)

    assert_array_almost_equal(clf1.coef_.ravel(),
                              spweights.ravel(),
                              decimal=2)
    assert_almost_equal(clf1.intercept_, spintercept, decimal=1)

    assert_array_almost_equal(clf2.coef_.ravel(),
                              spweights2.ravel(),
                              decimal=2)
    assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)

Source File: test_sag.py From Mastering-Elasticsearch-7.0 with MIT License

4 votes

def test_multiclass_classifier_class_weight():
    """tests multiclass with classweights for each class"""
    alpha = .1
    n_samples = 20
    tol = .00001
    max_iter = 50
    class_weight = {0: .45, 1: .55, 2: .75}
    fit_intercept = True
    X, y = make_blobs(n_samples=n_samples, centers=3, random_state=0,
                      cluster_std=0.1)
    step_size = get_step_size(X, alpha, fit_intercept, classification=True)
    classes = np.unique(y)

    clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
                              max_iter=max_iter, tol=tol, random_state=77,
                              fit_intercept=fit_intercept, multi_class='ovr',
                              class_weight=class_weight)
    clf2 = clone(clf1)
    clf1.fit(X, y)
    clf2.fit(sp.csr_matrix(X), y)

    le = LabelEncoder()
    class_weight_ = compute_class_weight(class_weight, np.unique(y), y)
    sample_weight = class_weight_[le.fit_transform(y)]

    coef1 = []
    intercept1 = []
    coef2 = []
    intercept2 = []
    for cl in classes:
        y_encoded = np.ones(n_samples)
        y_encoded[y != cl] = -1

        spweights1, spintercept1 = sag_sparse(X, y_encoded, step_size, alpha,
                                              n_iter=max_iter, dloss=log_dloss,
                                              sample_weight=sample_weight)
        spweights2, spintercept2 = sag_sparse(X, y_encoded, step_size, alpha,
                                              n_iter=max_iter, dloss=log_dloss,
                                              sample_weight=sample_weight,
                                              sparse=True)
        coef1.append(spweights1)
        intercept1.append(spintercept1)
        coef2.append(spweights2)
        intercept2.append(spintercept2)

    coef1 = np.vstack(coef1)
    intercept1 = np.array(intercept1)
    coef2 = np.vstack(coef2)
    intercept2 = np.array(intercept2)

    for i, cl in enumerate(classes):
        assert_array_almost_equal(clf1.coef_[i].ravel(),
                                  coef1[i].ravel(),
                                  decimal=2)
        assert_almost_equal(clf1.intercept_[i], intercept1[i], decimal=1)

        assert_array_almost_equal(clf2.coef_[i].ravel(),
                                  coef2[i].ravel(),
                                  decimal=2)
        assert_almost_equal(clf2.intercept_[i], intercept2[i], decimal=1)

Source File: log_reg_classifier.py From snips-nlu with Apache License 2.0

4 votes

def fit(self, dataset):
        """Fits the intent classifier with a valid Snips dataset

        Returns:
            :class:`LogRegIntentClassifier`: The same instance, trained
        """
        from sklearn.linear_model import SGDClassifier
        from sklearn.utils import compute_class_weight

        logger.info("Fitting LogRegIntentClassifier...")
        dataset = validate_and_format_dataset(dataset)
        self.load_resources_if_needed(dataset[LANGUAGE])
        self.fit_builtin_entity_parser_if_needed(dataset)
        self.fit_custom_entity_parser_if_needed(dataset)
        language = dataset[LANGUAGE]

        data_augmentation_config = self.config.data_augmentation_config
        utterances, classes, intent_list = build_training_data(
            dataset, language, data_augmentation_config, self.resources,
            self.random_state)

        self.intent_list = intent_list
        if len(self.intent_list) <= 1:
            return self

        self.featurizer = Featurizer(
            config=self.config.featurizer_config,
            builtin_entity_parser=self.builtin_entity_parser,
            custom_entity_parser=self.custom_entity_parser,
            resources=self.resources,
            random_state=self.random_state,
        )
        self.featurizer.language = language

        none_class = max(classes)
        try:
            x = self.featurizer.fit_transform(
                dataset, utterances, classes, none_class)
        except _EmptyDatasetUtterancesError:
            logger.warning("No (non-empty) utterances found in dataset")
            self.featurizer = None
            return self

        alpha = get_regularization_factor(dataset)

        class_weights_arr = compute_class_weight(
            "balanced", range(none_class + 1), classes)
        # Re-weight the noise class
        class_weights_arr[-1] *= self.config.noise_reweight_factor
        class_weight = {idx: w for idx, w in enumerate(class_weights_arr)}

        self.classifier = SGDClassifier(
            random_state=self.random_state, alpha=alpha,
            class_weight=class_weight, **LOG_REG_ARGS)
        self.classifier.fit(x, classes)
        logger.debug("%s", DifferedLoggingMessage(self.log_best_features))
        return self

Source File: test_sag.py From twitter-stock-recommendation with MIT License

4 votes

def test_binary_classifier_class_weight():
    """tests binary classifier with classweights for each class"""
    alpha = .1
    n_samples = 50
    n_iter = 20
    tol = .00001
    fit_intercept = True
    X, y = make_blobs(n_samples=n_samples, centers=2, random_state=10,
                      cluster_std=0.1)
    step_size = get_step_size(X, alpha, fit_intercept, classification=True)
    classes = np.unique(y)
    y_tmp = np.ones(n_samples)
    y_tmp[y != classes[1]] = -1
    y = y_tmp

    class_weight = {1: .45, -1: .55}
    clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
                              max_iter=n_iter, tol=tol, random_state=77,
                              fit_intercept=fit_intercept,
                              class_weight=class_weight)
    clf2 = clone(clf1)

    clf1.fit(X, y)
    clf2.fit(sp.csr_matrix(X), y)

    le = LabelEncoder()
    class_weight_ = compute_class_weight(class_weight, np.unique(y), y)
    sample_weight = class_weight_[le.fit_transform(y)]
    spweights, spintercept = sag_sparse(X, y, step_size, alpha, n_iter=n_iter,
                                        dloss=log_dloss,
                                        sample_weight=sample_weight,
                                        fit_intercept=fit_intercept)
    spweights2, spintercept2 = sag_sparse(X, y, step_size, alpha,
                                          n_iter=n_iter,
                                          dloss=log_dloss, sparse=True,
                                          sample_weight=sample_weight,
                                          fit_intercept=fit_intercept)

    assert_array_almost_equal(clf1.coef_.ravel(),
                              spweights.ravel(),
                              decimal=2)
    assert_almost_equal(clf1.intercept_, spintercept, decimal=1)

    assert_array_almost_equal(clf2.coef_.ravel(),
                              spweights2.ravel(),
                              decimal=2)
    assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)

Source File: test_sag.py From twitter-stock-recommendation with MIT License

4 votes

def test_multiclass_classifier_class_weight():
    """tests multiclass with classweights for each class"""
    alpha = .1
    n_samples = 20
    tol = .00001
    max_iter = 50
    class_weight = {0: .45, 1: .55, 2: .75}
    fit_intercept = True
    X, y = make_blobs(n_samples=n_samples, centers=3, random_state=0,
                      cluster_std=0.1)
    step_size = get_step_size(X, alpha, fit_intercept, classification=True)
    classes = np.unique(y)

    clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
                              max_iter=max_iter, tol=tol, random_state=77,
                              fit_intercept=fit_intercept,
                              class_weight=class_weight)
    clf2 = clone(clf1)
    clf1.fit(X, y)
    clf2.fit(sp.csr_matrix(X), y)

    le = LabelEncoder()
    class_weight_ = compute_class_weight(class_weight, np.unique(y), y)
    sample_weight = class_weight_[le.fit_transform(y)]

    coef1 = []
    intercept1 = []
    coef2 = []
    intercept2 = []
    for cl in classes:
        y_encoded = np.ones(n_samples)
        y_encoded[y != cl] = -1

        spweights1, spintercept1 = sag_sparse(X, y_encoded, step_size, alpha,
                                              n_iter=max_iter, dloss=log_dloss,
                                              sample_weight=sample_weight)
        spweights2, spintercept2 = sag_sparse(X, y_encoded, step_size, alpha,
                                              n_iter=max_iter, dloss=log_dloss,
                                              sample_weight=sample_weight,
                                              sparse=True)
        coef1.append(spweights1)
        intercept1.append(spintercept1)
        coef2.append(spweights2)
        intercept2.append(spintercept2)

    coef1 = np.vstack(coef1)
    intercept1 = np.array(intercept1)
    coef2 = np.vstack(coef2)
    intercept2 = np.array(intercept2)

    for i, cl in enumerate(classes):
        assert_array_almost_equal(clf1.coef_[i].ravel(),
                                  coef1[i].ravel(),
                                  decimal=2)
        assert_almost_equal(clf1.intercept_[i], intercept1[i], decimal=1)

        assert_array_almost_equal(clf2.coef_[i].ravel(),
                                  coef2[i].ravel(),
                                  decimal=2)
        assert_almost_equal(clf2.intercept_[i], intercept2[i], decimal=1)

Python sklearn.utils.compute_class_weight() Examples