Python Examples of sklearn.dummy.DummyRegressor

Source File: test_voting.py From Mastering-Elasticsearch-7.0 with MIT License

7 votes

def test_notfitted():
    eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()),
                                        ('lr2', LogisticRegression())],
                            voting='soft')
    ereg = VotingRegressor([('dr', DummyRegressor())])
    msg = ("This %s instance is not fitted yet. Call \'fit\'"
           " with appropriate arguments before using this method.")
    assert_raise_message(NotFittedError, msg % 'VotingClassifier',
                         eclf.predict, X)
    assert_raise_message(NotFittedError, msg % 'VotingClassifier',
                         eclf.predict_proba, X)
    assert_raise_message(NotFittedError, msg % 'VotingClassifier',
                         eclf.transform, X)
    assert_raise_message(NotFittedError, msg % 'VotingRegressor',
                         ereg.predict, X_r)
    assert_raise_message(NotFittedError, msg % 'VotingRegressor',
                         ereg.transform, X_r)

Source File: test_grouped_model.py From scikit-lego with MIT License

6 votes

def test_unseen_groups_shrinkage(shrinkage_data):
    df, means = shrinkage_data

    X, y = df.drop(columns="Target"), df["Target"]

    shrink_est = GroupedEstimator(
        DummyRegressor(), ["Planet", "Country", "City"], shrinkage="constant", alpha=0.1
    )

    shrink_est.fit(X, y)

    unseen_group = pd.DataFrame(
        {"Planet": ["Earth"], "Country": ["DE"], "City": ["Hamburg"]}
    )

    with pytest.raises(ValueError) as e:
        shrink_est.predict(X=pd.concat([unseen_group] * 4, axis=0))
        assert "found a group" in str(e)

Source File: test_dummy.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_dummy_regressor_sample_weight(n_samples=10):
    random_state = np.random.RandomState(seed=1)

    X = [[0]] * n_samples
    y = random_state.rand(n_samples)
    sample_weight = random_state.rand(n_samples)

    est = DummyRegressor(strategy="mean").fit(X, y, sample_weight)
    assert_equal(est.constant_, np.average(y, weights=sample_weight))

    est = DummyRegressor(strategy="median").fit(X, y, sample_weight)
    assert_equal(est.constant_, _weighted_percentile(y, sample_weight, 50.))

    est = DummyRegressor(strategy="quantile", quantile=.95).fit(X, y,
                                                                sample_weight)
    assert_equal(est.constant_, _weighted_percentile(y, sample_weight, 95.))

Source File: test_grouped_model.py From scikit-lego with MIT License

6 votes

def test_constant_shrinkage(shrinkage_data):
    df, means = shrinkage_data

    X, y = df.drop(columns="Target"), df["Target"]

    shrink_est = GroupedEstimator(
        DummyRegressor(),
        ["Planet", "Country", "City"],
        shrinkage="constant",
        use_global_model=False,
        alpha=0.1,
    )

    shrinkage_factors = np.array([0.01, 0.09, 0.9])

    shrink_est.fit(X, y)

    expected_prediction = [
        np.array([means["Earth"], means["NL"], means["Amsterdam"]]) @ shrinkage_factors,
        np.array([means["Earth"], means["NL"], means["Rotterdam"]]) @ shrinkage_factors,
        np.array([means["Earth"], means["BE"], means["Antwerp"]]) @ shrinkage_factors,
        np.array([means["Earth"], means["BE"], means["Brussels"]]) @ shrinkage_factors,
    ]

    assert expected_prediction == shrink_est.predict(X).tolist()

Source File: test_grouped_model.py From scikit-lego with MIT License

6 votes

def test_relative_shrinkage(shrinkage_data):
    df, means = shrinkage_data

    X, y = df.drop(columns="Target"), df["Target"]

    shrink_est = GroupedEstimator(
        DummyRegressor(),
        ["Planet", "Country", "City"],
        shrinkage="relative",
        use_global_model=False,
    )

    shrinkage_factors = np.array([4, 2, 1]) / 7

    shrink_est.fit(X, y)

    expected_prediction = [
        np.array([means["Earth"], means["NL"], means["Amsterdam"]]) @ shrinkage_factors,
        np.array([means["Earth"], means["NL"], means["Rotterdam"]]) @ shrinkage_factors,
        np.array([means["Earth"], means["BE"], means["Antwerp"]]) @ shrinkage_factors,
        np.array([means["Earth"], means["BE"], means["Brussels"]]) @ shrinkage_factors,
    ]

    assert expected_prediction == shrink_est.predict(X).tolist()

Source File: test_grouped_model.py From scikit-lego with MIT License

6 votes

def test_min_n_obs_shrinkage(shrinkage_data):
    df, means = shrinkage_data

    X, y = df.drop(columns="Target"), df["Target"]

    shrink_est = GroupedEstimator(
        DummyRegressor(),
        ["Planet", "Country", "City"],
        shrinkage="min_n_obs",
        use_global_model=False,
        min_n_obs=2,
    )

    shrink_est.fit(X, y)

    expected_prediction = [means["NL"], means["NL"], means["BE"], means["BE"]]

    assert expected_prediction == shrink_est.predict(X).tolist()

Source File: test_grouped_model.py From scikit-lego with MIT License

6 votes

def test_min_n_obs_shrinkage_too_little_obs(shrinkage_data):
    df, means = shrinkage_data

    X, y = df.drop(columns="Target"), df["Target"]

    too_big_n_obs = X.shape[0] + 1

    shrink_est = GroupedEstimator(
        DummyRegressor(),
        ["Planet", "Country", "City"],
        shrinkage="min_n_obs",
        use_global_model=False,
        min_n_obs=too_big_n_obs,
    )

    with pytest.raises(ValueError) as e:
        shrink_est.fit(X, y)

        assert (
            f"There is no group with size greater than or equal to {too_big_n_obs}"
            in str(e)
        )

Source File: test_grouped_model.py From scikit-lego with MIT License

6 votes

def test_custom_shrinkage_wrong_length(shrinkage_data):
    df, means = shrinkage_data

    X, y = df.drop(columns="Target"), df["Target"]

    def shrinkage_func(group_sizes):
        n = len(group_sizes)
        return np.repeat(1 / n, n + 1)

    with pytest.raises(ValueError) as e:
        shrink_est = GroupedEstimator(
            DummyRegressor(),
            ["Planet", "Country", "City"],
            shrinkage=shrinkage_func,
            use_global_model=False,
        )

        shrink_est.fit(X, y)

        assert ".shape should be " in str(e)

Source File: test_grouped_model.py From scikit-lego with MIT License

6 votes

def test_invalid_shrinkage(shrinkage_data, wrong_func):
    df, means = shrinkage_data

    X, y = df.drop(columns="Target"), df["Target"]

    with pytest.raises(ValueError) as e:
        shrink_est = GroupedEstimator(
            DummyRegressor(),
            ["Planet", "Country", "City"],
            shrinkage=wrong_func,
            use_global_model=False,
        )

        shrink_est.fit(X, y)

        assert "Invalid shrinkage specified." in str(e)

Source File: test_grouped_model.py From scikit-lego with MIT License

6 votes

def test_unexisting_shrinkage_func(shrinkage_data):
    df, means = shrinkage_data

    X, y = df.drop(columns="Target"), df["Target"]

    with pytest.raises(ValueError) as e:
        unexisting_func = "some_highly_unlikely_function_name"

        shrink_est = GroupedEstimator(
            estimator=DummyRegressor(),
            groups=["Planet", "Country"],
            shrinkage=unexisting_func,
        )

        shrink_est.fit(X, y)

        assert "shrinkage function" in str(e)

Source File: test_grouped_model.py From scikit-lego with MIT License

6 votes

def test_custom_shrinkage_raises_error(shrinkage_data):
    df, means = shrinkage_data

    X, y = df.drop(columns="Target"), df["Target"]

    def shrinkage_func(group_sizes):
        raise KeyError("This function is bad and you should feel bad")

    with pytest.raises(ValueError) as e:
        shrink_est = GroupedEstimator(
            DummyRegressor(),
            ["Planet", "Country", "City"],
            shrinkage=shrinkage_func,
            use_global_model=False,
        )

        shrink_est.fit(X, y)

        assert "you should feel bad" in str(
            e
        ) and "while checking the shrinkage function" in str(e)

Source File: test_grouped_model.py From scikit-lego with MIT License

6 votes

def test_predict_missing_group_column(shrinkage_data):
    df, means = shrinkage_data

    X, y = df.drop(columns="Target"), df["Target"]

    shrink_est = GroupedEstimator(
        DummyRegressor(),
        ["Planet", "Country", "City"],
        shrinkage="constant",
        use_global_model=False,
        alpha=0.1,
    )

    shrink_est.fit(X, y)

    with pytest.raises(ValueError) as e:
        shrink_est.predict(X.drop(columns=["Country"]))
        assert "group columns" in str(e)

Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0

6 votes

def test_fit_verify(self):
		pipeline = PMMLPipeline([("estimator", DummyRegressor())])
		self.assertFalse(hasattr(pipeline, "active_fields"))
		self.assertFalse(hasattr(pipeline, "target_fields"))
		X = DataFrame([[1, 0], [2, 0], [3, 0]], columns = ["X1", "X2"])
		y = Series([0.5, 1.0, 1.5], name = "y")
		pipeline.fit(X, y)
		self.assertEqual(["X1", "X2"], pipeline.active_fields.tolist())
		self.assertEqual("y", pipeline.target_fields.tolist())
		X.columns = ["x1", "x2"]
		pipeline.fit(X, y)
		self.assertEqual(["x1", "x2"], pipeline.active_fields.tolist())
		self.assertEqual("y", pipeline.target_fields.tolist())
		self.assertFalse(hasattr(pipeline, "verification"))
		pipeline.verify(X.sample(2))
		self.assertEqual(2, len(pipeline.verification.active_values))
		self.assertEqual(2, len(pipeline.verification.target_values))
		X.columns = ["x2", "x1"]
		with self.assertRaises(ValueError):
			pipeline.verify(X.sample(2))

Source File: test_dummy.py From twitter-stock-recommendation with MIT License

6 votes

def test_mean_strategy_multioutput_regressor():

    random_state = np.random.RandomState(seed=1)

    X_learn = random_state.randn(10, 10)
    y_learn = random_state.randn(10, 5)

    mean = np.mean(y_learn, axis=0).reshape((1, -1))

    X_test = random_state.randn(20, 10)
    y_test = random_state.randn(20, 5)

    # Correctness oracle
    est = DummyRegressor()
    est.fit(X_learn, y_learn)
    y_pred_learn = est.predict(X_learn)
    y_pred_test = est.predict(X_test)

    _check_equality_regressor(mean, y_learn, y_pred_learn, y_test, y_pred_test)
    _check_behavior_2d(est)

Source File: test_dummy.py From twitter-stock-recommendation with MIT License

6 votes

def test_quantile_strategy_regressor():

    random_state = np.random.RandomState(seed=1)

    X = [[0]] * 5  # ignored
    y = random_state.randn(5)

    reg = DummyRegressor(strategy="quantile", quantile=0.5)
    reg.fit(X, y)
    assert_array_equal(reg.predict(X), [np.median(y)] * len(X))

    reg = DummyRegressor(strategy="quantile", quantile=0)
    reg.fit(X, y)
    assert_array_equal(reg.predict(X), [np.min(y)] * len(X))

    reg = DummyRegressor(strategy="quantile", quantile=1)
    reg.fit(X, y)
    assert_array_equal(reg.predict(X), [np.max(y)] * len(X))

    reg = DummyRegressor(strategy="quantile", quantile=0.3)
    reg.fit(X, y)
    assert_array_equal(reg.predict(X), [np.percentile(y, q=30)] * len(X))

Source File: test_dummy.py From twitter-stock-recommendation with MIT License

6 votes

def test_quantile_invalid():

    X = [[0]] * 5  # ignored
    y = [0] * 5  # ignored

    est = DummyRegressor(strategy="quantile")
    assert_raises(ValueError, est.fit, X, y)

    est = DummyRegressor(strategy="quantile", quantile=None)
    assert_raises(ValueError, est.fit, X, y)

    est = DummyRegressor(strategy="quantile", quantile=[0])
    assert_raises(ValueError, est.fit, X, y)

    est = DummyRegressor(strategy="quantile", quantile=-0.1)
    assert_raises(ValueError, est.fit, X, y)

    est = DummyRegressor(strategy="quantile", quantile=1.1)
    assert_raises(ValueError, est.fit, X, y)

    est = DummyRegressor(strategy="quantile", quantile='abc')
    assert_raises(TypeError, est.fit, X, y)

Source File: test_dummy.py From twitter-stock-recommendation with MIT License

6 votes

def test_dummy_regressor_sample_weight(n_samples=10):
    random_state = np.random.RandomState(seed=1)

    X = [[0]] * n_samples
    y = random_state.rand(n_samples)
    sample_weight = random_state.rand(n_samples)

    est = DummyRegressor(strategy="mean").fit(X, y, sample_weight)
    assert_equal(est.constant_, np.average(y, weights=sample_weight))

    est = DummyRegressor(strategy="median").fit(X, y, sample_weight)
    assert_equal(est.constant_, _weighted_percentile(y, sample_weight, 50.))

    est = DummyRegressor(strategy="quantile", quantile=.95).fit(X, y,
                                                                sample_weight)
    assert_equal(est.constant_, _weighted_percentile(y, sample_weight, 95.))

Source File: test_weight_boosting.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_multidimensional_X():
    """
    Check that the AdaBoost estimators can work with n-dimensional
    data matrix
    """

    from sklearn.dummy import DummyClassifier, DummyRegressor

    rng = np.random.RandomState(0)

    X = rng.randn(50, 3, 3)
    yc = rng.choice([0, 1], 50)
    yr = rng.randn(50)

    boost = AdaBoostClassifier(DummyClassifier(strategy='most_frequent'))
    boost.fit(X, yc)
    boost.predict(X)
    boost.predict_proba(X)

    boost = AdaBoostRegressor(DummyRegressor())
    boost.fit(X, yr)
    boost.predict(X)

Source File: test_dummy.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_median_strategy_multioutput_regressor():

    random_state = np.random.RandomState(seed=1)

    X_learn = random_state.randn(10, 10)
    y_learn = random_state.randn(10, 5)

    median = np.median(y_learn, axis=0).reshape((1, -1))

    X_test = random_state.randn(20, 10)
    y_test = random_state.randn(20, 5)

    # Correctness oracle
    est = DummyRegressor(strategy="median")
    est.fit(X_learn, y_learn)
    y_pred_learn = est.predict(X_learn)
    y_pred_test = est.predict(X_test)

    _check_equality_regressor(
        median, y_learn, y_pred_learn, y_test, y_pred_test)
    _check_behavior_2d(est)

Source File: test_dummy.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_constant_strategy_multioutput_regressor():

    random_state = np.random.RandomState(seed=1)

    X_learn = random_state.randn(10, 10)
    y_learn = random_state.randn(10, 5)

    # test with 2d array
    constants = random_state.randn(5)

    X_test = random_state.randn(20, 10)
    y_test = random_state.randn(20, 5)

    # Correctness oracle
    est = DummyRegressor(strategy="constant", constant=constants)
    est.fit(X_learn, y_learn)
    y_pred_learn = est.predict(X_learn)
    y_pred_test = est.predict(X_test)

    _check_equality_regressor(
        constants, y_learn, y_pred_learn, y_test, y_pred_test)
    _check_behavior_2d_for_constant(est)

Source File: test_dummy.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_quantile_strategy_regressor():

    random_state = np.random.RandomState(seed=1)

    X = [[0]] * 5  # ignored
    y = random_state.randn(5)

    reg = DummyRegressor(strategy="quantile", quantile=0.5)
    reg.fit(X, y)
    assert_array_equal(reg.predict(X), [np.median(y)] * len(X))

    reg = DummyRegressor(strategy="quantile", quantile=0)
    reg.fit(X, y)
    assert_array_equal(reg.predict(X), [np.min(y)] * len(X))

    reg = DummyRegressor(strategy="quantile", quantile=1)
    reg.fit(X, y)
    assert_array_equal(reg.predict(X), [np.max(y)] * len(X))

    reg = DummyRegressor(strategy="quantile", quantile=0.3)
    reg.fit(X, y)
    assert_array_equal(reg.predict(X), [np.percentile(y, q=30)] * len(X))

Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0

5 votes

def test_make_pmml_pipeline(self):
		estimator = DummyRegressor()
		pmml_pipeline = make_pmml_pipeline(estimator)
		self.assertTrue(isinstance(pmml_pipeline, PMMLPipeline))
		pipeline = Pipeline([
			("estimator", estimator)
		])
		pmml_pipeline = make_pmml_pipeline(pipeline)
		self.assertTrue(isinstance(pmml_pipeline, PMMLPipeline))

Source File: test_dummy.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_regressor_exceptions():
    reg = DummyRegressor()
    assert_raises(NotFittedError, reg.predict, [])

Source File: test_dummy.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.dummy.DummyClassifier, dummy.DummyClassifier)
        self.assertIs(df.dummy.DummyRegressor, dummy.DummyRegressor)

Source File: test_pipeline.py From twitter-stock-recommendation with MIT License

5 votes

def test_set_params_nested_pipeline():
    estimator = Pipeline([
        ('a', Pipeline([
            ('b', DummyRegressor())
        ]))
    ])
    estimator.set_params(a__b__alpha=0.001, a__b=Lasso())
    estimator.set_params(a__steps=[('b', LogisticRegression())], a__b__C=5)

Source File: test_dummy.py From twitter-stock-recommendation with MIT License

5 votes

def test_mean_strategy_regressor():

    random_state = np.random.RandomState(seed=1)

    X = [[0]] * 4  # ignored
    y = random_state.randn(4)

    reg = DummyRegressor()
    reg.fit(X, y)
    assert_array_equal(reg.predict(X), [np.mean(y)] * len(X))

Source File: test_dummy.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_mean_strategy_regressor():

    random_state = np.random.RandomState(seed=1)

    X = [[0]] * 4  # ignored
    y = random_state.randn(4)

    reg = DummyRegressor()
    reg.fit(X, y)
    assert_array_equal(reg.predict(X), [np.mean(y)] * len(X))

Source File: test_dummy.py From twitter-stock-recommendation with MIT License

5 votes

def test_regressor_exceptions():
    reg = DummyRegressor()
    assert_raises(ValueError, reg.predict, [])

Source File: test_dummy.py From twitter-stock-recommendation with MIT License

5 votes

def test_median_strategy_regressor():

    random_state = np.random.RandomState(seed=1)

    X = [[0]] * 5  # ignored
    y = random_state.randn(5)

    reg = DummyRegressor(strategy="median")
    reg.fit(X, y)
    assert_array_equal(reg.predict(X), [np.median(y)] * len(X))

Source File: test_pipeline.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_set_params_nested_pipeline():
    estimator = Pipeline([
        ('a', Pipeline([
            ('b', DummyRegressor())
        ]))
    ])
    estimator.set_params(a__b__alpha=0.001, a__b=Lasso())
    estimator.set_params(a__steps=[('b', LogisticRegression())], a__b__C=5)

Python sklearn.dummy.DummyRegressor() Examples