Python Examples of sklearn.multioutput.MultiOutputClassifier

Source File: test_multioutput.py From twitter-stock-recommendation with MIT License

7 votes

def test_multi_output_classification_sample_weights():
    # weighted classifier
    Xw = [[1, 2, 3], [4, 5, 6]]
    yw = [[3, 2], [2, 3]]
    w = np.asarray([2., 1.])
    forest = RandomForestClassifier(n_estimators=10, random_state=1)
    clf_w = MultiOutputClassifier(forest)
    clf_w.fit(Xw, yw, w)

    # unweighted, but with repeated samples
    X = [[1, 2, 3], [1, 2, 3], [4, 5, 6]]
    y = [[3, 2], [3, 2], [2, 3]]
    forest = RandomForestClassifier(n_estimators=10, random_state=1)
    clf = MultiOutputClassifier(forest)
    clf.fit(X, y)

    X_test = [[1.5, 2.5, 3.5], [3.5, 4.5, 5.5]]
    assert_almost_equal(clf.predict(X_test), clf_w.predict(X_test))

Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_multi_output_predict_proba():
    sgd_linear_clf = SGDClassifier(random_state=1, max_iter=5, tol=1e-3)
    param = {'loss': ('hinge', 'log', 'modified_huber')}

    # inner function for custom scoring
    def custom_scorer(estimator, X, y):
        if hasattr(estimator, "predict_proba"):
            return 1.0
        else:
            return 0.0
    grid_clf = GridSearchCV(sgd_linear_clf, param_grid=param,
                            scoring=custom_scorer, cv=3, error_score=np.nan)
    multi_target_linear = MultiOutputClassifier(grid_clf)
    multi_target_linear.fit(X, y)

    multi_target_linear.predict_proba(X)

    # SGDClassifier defaults to loss='hinge' which is not a probabilistic
    # loss function; therefore it does not expose a predict_proba method
    sgd_linear_clf = SGDClassifier(random_state=1, max_iter=5, tol=1e-3)
    multi_target_linear = MultiOutputClassifier(sgd_linear_clf)
    multi_target_linear.fit(X, y)
    err_msg = "The base estimator should implement predict_proba method"
    with pytest.raises(ValueError, match=err_msg):
        multi_target_linear.predict_proba(X)


# 0.23. warning about tol not having its correct default value.

Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_multi_output_classification_partial_fit_parallelism():
    sgd_linear_clf = SGDClassifier(loss='log', random_state=1, max_iter=5)
    mor = MultiOutputClassifier(sgd_linear_clf, n_jobs=4)
    mor.partial_fit(X, y, classes)
    est1 = mor.estimators_[0]
    mor.partial_fit(X, y)
    est2 = mor.estimators_[0]
    if cpu_count() > 1:
        # parallelism requires this to be the case for a sane implementation
        assert est1 is not est2


# check predict_proba passes

Source File: test_multioutput.py From twitter-stock-recommendation with MIT License

6 votes

def test_multi_output_classification_partial_fit_sample_weights():
    # weighted classifier
    Xw = [[1, 2, 3], [4, 5, 6], [1.5, 2.5, 3.5]]
    yw = [[3, 2], [2, 3], [3, 2]]
    w = np.asarray([2., 1., 1.])
    sgd_linear_clf = SGDClassifier(random_state=1, max_iter=5)
    clf_w = MultiOutputClassifier(sgd_linear_clf)
    clf_w.fit(Xw, yw, w)

    # unweighted, but with repeated samples
    X = [[1, 2, 3], [1, 2, 3], [4, 5, 6], [1.5, 2.5, 3.5]]
    y = [[3, 2], [3, 2], [2, 3], [3, 2]]
    sgd_linear_clf = SGDClassifier(random_state=1, max_iter=5)
    clf = MultiOutputClassifier(sgd_linear_clf)
    clf.fit(X, y)
    X_test = [[1.5, 2.5, 3.5]]
    assert_array_almost_equal(clf.predict(X_test), clf_w.predict(X_test))

Source File: test_multioutput.py From twitter-stock-recommendation with MIT License

6 votes

def test_multiclass_multioutput_estimator():
    # test to check meta of meta estimators
    svc = LinearSVC(random_state=0)
    multi_class_svc = OneVsRestClassifier(svc)
    multi_target_svc = MultiOutputClassifier(multi_class_svc)

    multi_target_svc.fit(X, y)

    predictions = multi_target_svc.predict(X)
    assert_equal((n_samples, n_outputs), predictions.shape)

    # train the forest with each column and assert that predictions are equal
    for i in range(3):
        multi_class_svc_ = clone(multi_class_svc)  # create a clone
        multi_class_svc_.fit(X, y[:, i])
        assert_equal(list(multi_class_svc_.predict(X)),
                     list(predictions[:, i]))

Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_multi_output_classification_sample_weights():
    # weighted classifier
    Xw = [[1, 2, 3], [4, 5, 6]]
    yw = [[3, 2], [2, 3]]
    w = np.asarray([2., 1.])
    forest = RandomForestClassifier(n_estimators=10, random_state=1)
    clf_w = MultiOutputClassifier(forest)
    clf_w.fit(Xw, yw, w)

    # unweighted, but with repeated samples
    X = [[1, 2, 3], [1, 2, 3], [4, 5, 6]]
    y = [[3, 2], [3, 2], [2, 3]]
    forest = RandomForestClassifier(n_estimators=10, random_state=1)
    clf = MultiOutputClassifier(forest)
    clf.fit(X, y)

    X_test = [[1.5, 2.5, 3.5], [3.5, 4.5, 5.5]]
    assert_almost_equal(clf.predict(X_test), clf_w.predict(X_test))


# 0.23. warning about tol not having its correct default value.

Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_multi_output_classification_partial_fit_sample_weights():
    # weighted classifier
    Xw = [[1, 2, 3], [4, 5, 6], [1.5, 2.5, 3.5]]
    yw = [[3, 2], [2, 3], [3, 2]]
    w = np.asarray([2., 1., 1.])
    sgd_linear_clf = SGDClassifier(random_state=1, max_iter=20)
    clf_w = MultiOutputClassifier(sgd_linear_clf)
    clf_w.fit(Xw, yw, w)

    # unweighted, but with repeated samples
    X = [[1, 2, 3], [1, 2, 3], [4, 5, 6], [1.5, 2.5, 3.5]]
    y = [[3, 2], [3, 2], [2, 3], [3, 2]]
    sgd_linear_clf = SGDClassifier(random_state=1, max_iter=20)
    clf = MultiOutputClassifier(sgd_linear_clf)
    clf.fit(X, y)
    X_test = [[1.5, 2.5, 3.5]]
    assert_array_almost_equal(clf.predict(X_test), clf_w.predict(X_test))

Source File: test_multioutput.py From twitter-stock-recommendation with MIT License

5 votes

def test_multi_output_exceptions():
    # NotFittedError when fit is not done but score, predict and
    # and predict_proba are called
    moc = MultiOutputClassifier(LinearSVC(random_state=0))
    assert_raises(NotFittedError, moc.predict, y)
    assert_raises(NotFittedError, moc.predict_proba, y)
    assert_raises(NotFittedError, moc.score, X, y)
    # ValueError when number of outputs is different
    # for fit and score
    y_new = np.column_stack((y1, y2))
    moc.fit(X, y)
    assert_raises(ValueError, moc.score, X, y_new)
    # ValueError when y is continuous
    assert_raise_message(ValueError, "Unknown label type", moc.fit, X, X[:, 1])

Source File: test_multioutput.py From twitter-stock-recommendation with MIT License

5 votes

def test_multi_output_classification():
    # test if multi_target initializes correctly with base estimator and fit
    # assert predictions work as expected for predict, prodict_proba and score

    forest = RandomForestClassifier(n_estimators=10, random_state=1)
    multi_target_forest = MultiOutputClassifier(forest)

    # train the multi_target_forest and also get the predictions.
    multi_target_forest.fit(X, y)

    predictions = multi_target_forest.predict(X)
    assert_equal((n_samples, n_outputs), predictions.shape)

    predict_proba = multi_target_forest.predict_proba(X)

    assert len(predict_proba) == n_outputs
    for class_probabilities in predict_proba:
        assert_equal((n_samples, n_classes), class_probabilities.shape)

    assert_array_equal(np.argmax(np.dstack(predict_proba), axis=1),
                       predictions)

    # train the forest with each column and assert that predictions are equal
    for i in range(3):
        forest_ = clone(forest)  # create a clone with the same state
        forest_.fit(X, y[:, i])
        assert_equal(list(forest_.predict(X)), list(predictions[:, i]))
        assert_array_equal(list(forest_.predict_proba(X)),
                           list(predict_proba[i]))

Source File: test_multioutput.py From twitter-stock-recommendation with MIT License

5 votes

def test_mutli_output_classifiation_partial_fit_no_first_classes_exception():
    sgd_linear_clf = SGDClassifier(loss='log', random_state=1, max_iter=5)
    multi_target_linear = MultiOutputClassifier(sgd_linear_clf)
    assert_raises_regex(ValueError, "classes must be passed on the first call "
                                    "to partial_fit.",
                        multi_target_linear.partial_fit, X, y)

Source File: test_multioutput.py From twitter-stock-recommendation with MIT License

5 votes

def test_multi_output_classification_partial_fit():
    # test if multi_target initializes correctly with base estimator and fit
    # assert predictions work as expected for predict

    sgd_linear_clf = SGDClassifier(loss='log', random_state=1, max_iter=5)
    multi_target_linear = MultiOutputClassifier(sgd_linear_clf)

    # train the multi_target_linear and also get the predictions.
    half_index = X.shape[0] // 2
    multi_target_linear.partial_fit(
        X[:half_index], y[:half_index], classes=classes)

    first_predictions = multi_target_linear.predict(X)
    assert_equal((n_samples, n_outputs), first_predictions.shape)

    multi_target_linear.partial_fit(X[half_index:], y[half_index:])
    second_predictions = multi_target_linear.predict(X)
    assert_equal((n_samples, n_outputs), second_predictions.shape)

    # train the linear classification with each column and assert that
    # predictions are equal after first partial_fit and second partial_fit
    for i in range(3):
        # create a clone with the same state
        sgd_linear_clf = clone(sgd_linear_clf)
        sgd_linear_clf.partial_fit(
            X[:half_index], y[:half_index, i], classes=classes[i])
        assert_array_equal(sgd_linear_clf.predict(X), first_predictions[:, i])
        sgd_linear_clf.partial_fit(X[half_index:], y[half_index:, i])
        assert_array_equal(sgd_linear_clf.predict(X), second_predictions[:, i])

Source File: test_multioutput.py From twitter-stock-recommendation with MIT License

5 votes

def test_multi_output_classification_partial_fit_parallelism():
    sgd_linear_clf = SGDClassifier(loss='log', random_state=1, max_iter=5)
    mor = MultiOutputClassifier(sgd_linear_clf, n_jobs=-1)
    mor.partial_fit(X, y, classes)
    est1 = mor.estimators_[0]
    mor.partial_fit(X, y)
    est2 = mor.estimators_[0]
    if cpu_count() > 1:
        # parallelism requires this to be the case for a sane implementation
        assert_false(est1 is est2)

Source File: test_multioutput.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.multioutput.MultiOutputRegressor,
                      multioutput.MultiOutputRegressor)
        self.assertIs(df.multioutput.MultiOutputClassifier,
                      multioutput.MultiOutputClassifier)

Source File: test_optimization.py From sports-betting with MIT License

5 votes

def test_multi_bettor_predict_proba():
    """Test predict probabilities method of multi-bettor."""
    
    multi_bettor = MultiBettor(multi_classifier=MultiOutputClassifier(DummyClassifier()), meta_classifier=DummyClassifier(), targets=['D', 'H']).fit(X, score1, score2, odds)
    assert multi_bettor.predict_proba(X).shape[1] == len(np.array(['-', 'D', 'H']))

    multi_bettor = MultiBettor(multi_classifier=MultiOutputClassifier(DummyClassifier()), meta_classifier=DummyClassifier(), targets=['over_2.5', 'under_2.5']).fit(X, score1, score2, odds)
    assert multi_bettor.predict_proba(X).shape[1] ==  len(np.array(['over_2.5', 'under_2.5']))

    multi_bettor = MultiBettor(multi_classifier=MultiOutputClassifier(DummyClassifier()), meta_classifier=DummyClassifier(), targets=['over_2.5', 'A']).fit(X, score1, score2, odds)
    assert multi_bettor.predict_proba(X).shape[1] ==  len(np.array(['A', 'over_2.5']))

Source File: test_optimization.py From sports-betting with MIT License

5 votes

def test_multi_bettor_predict():
    """Test predict method of multi-bettor."""
    
    multi_bettor = MultiBettor(multi_classifier=MultiOutputClassifier(DummyClassifier()), meta_classifier=DummyClassifier(), targets=['D', 'H']).fit(X, score1, score2, odds)
    np.testing.assert_array_equal(np.unique(multi_bettor.predict(X)), np.array(['-', 'D', 'H']))

    multi_bettor = MultiBettor(multi_classifier=MultiOutputClassifier(DummyClassifier()), meta_classifier=DummyClassifier(), targets=['over_2.5', 'under_2.5']).fit(X, score1, score2, odds)
    np.testing.assert_array_equal(np.unique(multi_bettor.predict(X)), np.array(['over_2.5', 'under_2.5']))

    multi_bettor = MultiBettor(multi_classifier=MultiOutputClassifier(DummyClassifier()), meta_classifier=DummyClassifier(), targets=['over_2.5', 'A']).fit(X, score1, score2, odds)
    np.testing.assert_array_equal(np.unique(multi_bettor.predict(X)), np.array(['A', 'over_2.5']))

Source File: test_optimization.py From sports-betting with MIT License

5 votes

def test_multi_bettor_fit():
    """Test fit method of multi-bettor."""
    
    multi_bettor = MultiBettor(multi_classifier=MultiOutputClassifier(DummyClassifier()), meta_classifier=DummyClassifier(), targets=['D', 'H']).fit(X, score1, score2, odds)
    assert len(multi_bettor.multi_classifier_.estimators_) == len(multi_bettor.targets_)
    np.testing.assert_array_equal(np.unique(multi_bettor.meta_classifier_.classes_), np.array(['-', 'D', 'H']))

    multi_bettor = MultiBettor(multi_classifier=MultiOutputClassifier(DummyClassifier()), meta_classifier=DummyClassifier(), targets=['over_2.5', 'under_2.5']).fit(X, score1, score2, odds)
    assert len(multi_bettor.multi_classifier_.estimators_) == len(multi_bettor.targets_)
    np.testing.assert_array_equal(np.unique(multi_bettor.meta_classifier_.classes_), np.array(['over_2.5', 'under_2.5']))

    multi_bettor = MultiBettor(multi_classifier=MultiOutputClassifier(DummyClassifier()), meta_classifier=DummyClassifier(), targets=['over_2.5', 'A']).fit(X, score1, score2, odds)
    assert len(multi_bettor.multi_classifier_.estimators_) == len(multi_bettor.targets_)
    np.testing.assert_array_equal(np.unique(multi_bettor.meta_classifier_.classes_), np.array(['A', 'over_2.5']))

Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_multi_output_exceptions():
    # NotFittedError when fit is not done but score, predict and
    # and predict_proba are called
    moc = MultiOutputClassifier(LinearSVC(random_state=0))
    assert_raises(NotFittedError, moc.predict, y)
    assert_raises(NotFittedError, moc.predict_proba, y)
    assert_raises(NotFittedError, moc.score, X, y)
    # ValueError when number of outputs is different
    # for fit and score
    y_new = np.column_stack((y1, y2))
    moc.fit(X, y)
    assert_raises(ValueError, moc.score, X, y_new)
    # ValueError when y is continuous
    assert_raise_message(ValueError, "Unknown label type", moc.fit, X, X[:, 1])

Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_multi_output_classification():
    # test if multi_target initializes correctly with base estimator and fit
    # assert predictions work as expected for predict, prodict_proba and score

    forest = RandomForestClassifier(n_estimators=10, random_state=1)
    multi_target_forest = MultiOutputClassifier(forest)

    # train the multi_target_forest and also get the predictions.
    multi_target_forest.fit(X, y)

    predictions = multi_target_forest.predict(X)
    assert_equal((n_samples, n_outputs), predictions.shape)

    predict_proba = multi_target_forest.predict_proba(X)

    assert len(predict_proba) == n_outputs
    for class_probabilities in predict_proba:
        assert_equal((n_samples, n_classes), class_probabilities.shape)

    assert_array_equal(np.argmax(np.dstack(predict_proba), axis=1),
                       predictions)

    # train the forest with each column and assert that predictions are equal
    for i in range(3):
        forest_ = clone(forest)  # create a clone with the same state
        forest_.fit(X, y[:, i])
        assert_equal(list(forest_.predict(X)), list(predictions[:, i]))
        assert_array_equal(list(forest_.predict_proba(X)),
                           list(predict_proba[i]))

Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_multi_output_classification_partial_fit_no_first_classes_exception():
    sgd_linear_clf = SGDClassifier(loss='log', random_state=1, max_iter=5)
    multi_target_linear = MultiOutputClassifier(sgd_linear_clf)
    assert_raises_regex(ValueError, "classes must be passed on the first call "
                                    "to partial_fit.",
                        multi_target_linear.partial_fit, X, y)

Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_multi_output_classification_partial_fit():
    # test if multi_target initializes correctly with base estimator and fit
    # assert predictions work as expected for predict

    sgd_linear_clf = SGDClassifier(loss='log', random_state=1, max_iter=5)
    multi_target_linear = MultiOutputClassifier(sgd_linear_clf)

    # train the multi_target_linear and also get the predictions.
    half_index = X.shape[0] // 2
    multi_target_linear.partial_fit(
        X[:half_index], y[:half_index], classes=classes)

    first_predictions = multi_target_linear.predict(X)
    assert_equal((n_samples, n_outputs), first_predictions.shape)

    multi_target_linear.partial_fit(X[half_index:], y[half_index:])
    second_predictions = multi_target_linear.predict(X)
    assert_equal((n_samples, n_outputs), second_predictions.shape)

    # train the linear classification with each column and assert that
    # predictions are equal after first partial_fit and second partial_fit
    for i in range(3):
        # create a clone with the same state
        sgd_linear_clf = clone(sgd_linear_clf)
        sgd_linear_clf.partial_fit(
            X[:half_index], y[:half_index, i], classes=classes[i])
        assert_array_equal(sgd_linear_clf.predict(X), first_predictions[:, i])
        sgd_linear_clf.partial_fit(X[half_index:], y[half_index:, i])
        assert_array_equal(sgd_linear_clf.predict(X), second_predictions[:, i])


# 0.23. warning about tol not having its correct default value.

Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License

4 votes

def test_multiclass_multioutput_estimator_predict_proba():
    seed = 542

    # make test deterministic
    rng = np.random.RandomState(seed)

    # random features
    X = rng.normal(size=(5, 5))

    # random labels
    y1 = np.array(['b', 'a', 'a', 'b', 'a']).reshape(5, 1)  # 2 classes
    y2 = np.array(['d', 'e', 'f', 'e', 'd']).reshape(5, 1)  # 3 classes

    Y = np.concatenate([y1, y2], axis=1)

    clf = MultiOutputClassifier(LogisticRegression(
        multi_class='ovr', solver='liblinear', random_state=seed))

    clf.fit(X, Y)

    y_result = clf.predict_proba(X)
    y_actual = [np.array([[0.23481764, 0.76518236],
                          [0.67196072, 0.32803928],
                          [0.54681448, 0.45318552],
                          [0.34883923, 0.65116077],
                          [0.73687069, 0.26312931]]),
                np.array([[0.5171785, 0.23878628, 0.24403522],
                          [0.22141451, 0.64102704, 0.13755846],
                          [0.16751315, 0.18256843, 0.64991843],
                          [0.27357372, 0.55201592, 0.17441036],
                          [0.65745193, 0.26062899, 0.08191907]])]

    for i in range(len(y_actual)):
        assert_almost_equal(y_result[i], y_actual[i])

Python sklearn.multioutput.MultiOutputClassifier() Examples