Python Examples of sklearn.multioutput.MultiOutputRegressor

Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License

8 votes

def test_multi_target_regression_partial_fit():
    X, y = datasets.make_regression(n_targets=3)
    X_train, y_train = X[:50], y[:50]
    X_test, y_test = X[50:], y[50:]

    references = np.zeros_like(y_test)
    half_index = 25
    for n in range(3):
        sgr = SGDRegressor(random_state=0, max_iter=5)
        sgr.partial_fit(X_train[:half_index], y_train[:half_index, n])
        sgr.partial_fit(X_train[half_index:], y_train[half_index:, n])
        references[:, n] = sgr.predict(X_test)

    sgr = MultiOutputRegressor(SGDRegressor(random_state=0, max_iter=5))

    sgr.partial_fit(X_train[:half_index], y_train[:half_index])
    sgr.partial_fit(X_train[half_index:], y_train[half_index:])

    y_pred = sgr.predict(X_test)
    assert_almost_equal(references, y_pred)
    assert not hasattr(MultiOutputRegressor(Lasso), 'partial_fit')

Source File: test_multioutput.py From twitter-stock-recommendation with MIT License

6 votes

def test_multi_target_sample_weights():
    # weighted regressor
    Xw = [[1, 2, 3], [4, 5, 6]]
    yw = [[3.141, 2.718], [2.718, 3.141]]
    w = [2., 1.]
    rgr_w = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    rgr_w.fit(Xw, yw, w)

    # unweighted, but with repeated samples
    X = [[1, 2, 3], [1, 2, 3], [4, 5, 6]]
    y = [[3.141, 2.718], [3.141, 2.718], [2.718, 3.141]]
    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    rgr.fit(X, y)

    X_test = [[1.5, 2.5, 3.5], [3.5, 4.5, 5.5]]
    assert_almost_equal(rgr.predict(X_test), rgr_w.predict(X_test))


# Import the data

Source File: test_anomaly_detectors.py From gordo with GNU Affero General Public License v3.0

6 votes

def test_diff_detector_require_thresholds(require_threshold: bool):
    """
    Should fail if requiring thresholds, but not calling cross_validate
    """
    X = pd.DataFrame(np.random.random((100, 5)))
    y = pd.DataFrame(np.random.random((100, 2)))

    model = DiffBasedAnomalyDetector(
        base_estimator=MultiOutputRegressor(LinearRegression()),
        require_thresholds=require_threshold,
    )

    model.fit(X, y)

    if require_threshold:
        # FAIL: Forgot to call .cross_validate to calculate thresholds.
        with pytest.raises(AttributeError):
            model.anomaly(X, y)

        model.cross_validate(X=X, y=y)
        model.anomaly(X, y)
    else:
        # thresholds not required
        model.anomaly(X, y)

Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_multi_target_regression():
    X, y = datasets.make_regression(n_targets=3)
    X_train, y_train = X[:50], y[:50]
    X_test, y_test = X[50:], y[50:]

    references = np.zeros_like(y_test)
    for n in range(3):
        rgr = GradientBoostingRegressor(random_state=0)
        rgr.fit(X_train, y_train[:, n])
        references[:, n] = rgr.predict(X_test)

    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    rgr.fit(X_train, y_train)
    y_pred = rgr.predict(X_test)

    assert_almost_equal(references, y_pred)


# 0.23. warning about tol not having its correct default value.

Source File: test_multioutput.py From twitter-stock-recommendation with MIT License

6 votes

def test_multi_target_regression_partial_fit():
    X, y = datasets.make_regression(n_targets=3)
    X_train, y_train = X[:50], y[:50]
    X_test, y_test = X[50:], y[50:]

    references = np.zeros_like(y_test)
    half_index = 25
    for n in range(3):
        sgr = SGDRegressor(random_state=0, max_iter=5)
        sgr.partial_fit(X_train[:half_index], y_train[:half_index, n])
        sgr.partial_fit(X_train[half_index:], y_train[half_index:, n])
        references[:, n] = sgr.predict(X_test)

    sgr = MultiOutputRegressor(SGDRegressor(random_state=0, max_iter=5))

    sgr.partial_fit(X_train[:half_index], y_train[:half_index])
    sgr.partial_fit(X_train[half_index:], y_train[half_index:])

    y_pred = sgr.predict(X_test)
    assert_almost_equal(references, y_pred)
    assert_false(hasattr(MultiOutputRegressor(Lasso), 'partial_fit'))

Source File: test_multioutput.py From twitter-stock-recommendation with MIT License

6 votes

def test_multi_target_regression():
    X, y = datasets.make_regression(n_targets=3)
    X_train, y_train = X[:50], y[:50]
    X_test, y_test = X[50:], y[50:]

    references = np.zeros_like(y_test)
    for n in range(3):
        rgr = GradientBoostingRegressor(random_state=0)
        rgr.fit(X_train, y_train[:, n])
        references[:, n] = rgr.predict(X_test)

    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    rgr.fit(X_train, y_train)
    y_pred = rgr.predict(X_test)

    assert_almost_equal(references, y_pred)

Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_multi_target_sample_weights():
    # weighted regressor
    Xw = [[1, 2, 3], [4, 5, 6]]
    yw = [[3.141, 2.718], [2.718, 3.141]]
    w = [2., 1.]
    rgr_w = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    rgr_w.fit(Xw, yw, w)

    # unweighted, but with repeated samples
    X = [[1, 2, 3], [1, 2, 3], [4, 5, 6]]
    y = [[3.141, 2.718], [3.141, 2.718], [2.718, 3.141]]
    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    rgr.fit(X, y)

    X_test = [[1.5, 2.5, 3.5], [3.5, 4.5, 5.5]]
    assert_almost_equal(rgr.predict(X_test), rgr_w.predict(X_test))


# Import the data

Source File: engine.py From hyperparameter_hunter with MIT License

6 votes

def base_estimator(self, value):
        # Build `base_estimator` if string given
        if isinstance(value, str):
            value = cook_estimator(
                value, space=self.space, random_state=self.rng.randint(0, np.iinfo(np.int32).max)
            )

        # Check if regressor
        if not is_regressor(value) and value is not None:
            raise ValueError(f"`base_estimator` must be a regressor. Got {value}")

        # Treat per second acquisition function specially
        is_multi_regressor = isinstance(value, MultiOutputRegressor)
        if self.acq_func.endswith("ps") and not is_multi_regressor:
            value = MultiOutputRegressor(value)

        self._base_estimator = value

Source File: sindy.py From sparsereg with MIT License

6 votes

def fit(self, x, y=None):
        if y is not None:
            xdot = y
        else:
            xdot = self.derivative.transform(x)

        if self.operators is not None:
            feature_transformer = SymbolicFeatures(
                exponents=np.linspace(1, self.degree, self.degree), operators=self.operators
            )
        else:
            feature_transformer = PolynomialFeatures(degree=self.degree, include_bias=False)

        steps = [
            ("features", feature_transformer),
            ("model", STRidge(alpha=self.alpha, threshold=self.threshold, **self.kw)),
        ]
        self.model = MultiOutputRegressor(Pipeline(steps), n_jobs=self.n_jobs)
        self.model.fit(x, xdot)

        self.n_input_features_ = self.model.estimators_[0].steps[0][1].n_input_features_
        self.n_output_features_ = self.model.estimators_[0].steps[0][1].n_output_features_
        return self

Source File: test_optimizer.py From scikit-optimize with BSD 3-Clause "New" or "Revised" License

5 votes

def test_optimizer_copy(acq_func):
    # Checks that the base estimator, the objective and target values
    # are copied correctly.

    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer([(-2.0, 2.0)], base_estimator, acq_func=acq_func,
                    n_initial_points=1, acq_optimizer="sampling")

    # run three iterations so that we have some points and objective values
    if "ps" in acq_func:
        opt.run(bench1_with_time, n_iter=3)
    else:
        opt.run(bench1, n_iter=3)

    opt_copy = opt.copy()

    copied_estimator = opt_copy.base_estimator_

    if "ps" in acq_func:
        assert isinstance(copied_estimator, MultiOutputRegressor)
        # check that the base_estimator is not wrapped multiple times
        is_multi = isinstance(copied_estimator.estimator,
                              MultiOutputRegressor)
        assert not is_multi
    else:
        assert not isinstance(copied_estimator, MultiOutputRegressor)

    assert_array_equal(opt_copy.Xi, opt.Xi)
    assert_array_equal(opt_copy.yi, opt.yi)

Source File: test_multioutput.py From twitter-stock-recommendation with MIT License

5 votes

def test_multi_target_sample_weights_api():
    X = [[1, 2, 3], [4, 5, 6]]
    y = [[3.141, 2.718], [2.718, 3.141]]
    w = [0.8, 0.6]

    rgr = MultiOutputRegressor(Lasso())
    assert_raises_regex(ValueError, "does not support sample weights",
                        rgr.fit, X, y, w)

    # no exception should be raised if the base estimator supports weights
    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    rgr.fit(X, y, w)

Source File: test_multioutput.py From twitter-stock-recommendation with MIT License

5 votes

def test_multi_target_sparse_regression():
    X, y = datasets.make_regression(n_targets=3)
    X_train, y_train = X[:50], y[:50]
    X_test = X[50:]

    for sparse in [sp.csr_matrix, sp.csc_matrix, sp.coo_matrix, sp.dok_matrix,
                   sp.lil_matrix]:
        rgr = MultiOutputRegressor(Lasso(random_state=0))
        rgr_sparse = MultiOutputRegressor(Lasso(random_state=0))

        rgr.fit(X_train, y_train)
        rgr_sparse.fit(sparse(X_train), y_train)

        assert_almost_equal(rgr.predict(X_test),
                            rgr_sparse.predict(sparse(X_test)))

Source File: test_multioutput.py From twitter-stock-recommendation with MIT License

5 votes

def test_multi_target_regression_one_target():
    # Test multi target regression raises
    X, y = datasets.make_regression(n_targets=1)
    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    assert_raises(ValueError, rgr.fit, X, y)

Source File: test_multioutput.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def test_multioutput(self):

        # http://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py

        from sklearn.multioutput import MultiOutputRegressor
        from sklearn.ensemble import RandomForestRegressor

        # Create a random dataset
        rng = np.random.RandomState(1)
        X = np.sort(200 * rng.rand(600, 1) - 100, axis=0)
        y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T
        y += (0.5 - rng.rand(*y.shape))

        df = pdml.ModelFrame(X, target=y)

        max_depth = 30

        rf1 = df.ensemble.RandomForestRegressor(max_depth=max_depth,
                                                random_state=self.random_state)
        reg1 = df.multioutput.MultiOutputRegressor(rf1)

        rf2 = RandomForestRegressor(max_depth=max_depth,
                                    random_state=self.random_state)
        reg2 = MultiOutputRegressor(rf2)

        df.fit(reg1)
        reg2.fit(X, y)

        result = df.predict(reg2)
        expected = pd.DataFrame(reg2.predict(X))
        tm.assert_frame_equal(result, expected)

Source File: test_multioutput.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.multioutput.MultiOutputRegressor,
                      multioutput.MultiOutputRegressor)
        self.assertIs(df.multioutput.MultiOutputClassifier,
                      multioutput.MultiOutputClassifier)

Source File: test_engine.py From hyperparameter_hunter with MIT License

5 votes

def test_optimizer_copy(acq_func):
    """Check that base estimator, objective and target values are copied correctly"""
    # TODO: Refactor - Use PyTest

    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer(
        [(-2.0, 2.0)],
        base_estimator,
        acq_func=acq_func,
        n_initial_points=1,
        acq_optimizer="sampling",
    )

    # Run three iterations so that we have some points and objective values
    if "ps" in acq_func:
        opt.run(bench1_with_time, n_iter=3)
    else:
        opt.run(bench1, n_iter=3)

    opt_copy = opt.copy()
    copied_estimator = opt_copy.base_estimator

    if "ps" in acq_func:
        assert isinstance(copied_estimator, MultiOutputRegressor)
        # Check that `base_estimator` is not wrapped multiple times
        assert not isinstance(copied_estimator.estimator, MultiOutputRegressor)
    else:
        assert not isinstance(copied_estimator, MultiOutputRegressor)

    assert_array_equal(opt_copy.Xi, opt.Xi)
    assert_array_equal(opt_copy.yi, opt.yi)

Source File: test_acquisition.py From scikit-optimize with BSD 3-Clause "New" or "Revised" License

5 votes

def test_acquisition_per_second_gradient(acq_func):
    rng = np.random.RandomState(0)
    X = rng.randn(20, 10)
    # Make the second component large, so that mean_grad and std_grad
    # do not become zero.
    y = np.vstack((X[:, 0], np.abs(X[:, 0])**3)).T

    for X_new in [rng.randn(10), rng.randn(10)]:
        gpr = cook_estimator("GP", Space(((-5.0, 5.0),)), random_state=0)
        mor = MultiOutputRegressor(gpr)
        mor.fit(X, y)
        check_gradient_correctness(X_new, mor, acq_func, 1.5)

Source File: test_MultiOutput.py From susi with BSD 3-Clause "New" or "Revised" License

5 votes

def test_MultiOutputRegressor():
    mor = MultiOutputRegressor(
        estimator=susi.SOMRegressor(n_jobs=2),
        n_jobs=2
    )
    mor.fit(X, y)

Source File: estimators.py From MOS-X with MIT License

5 votes

def _get_tree_rain_prediction(self, X):
        # Get predictions from individual trees.
        num_samples = X.shape[0]
        if self._imputer is not None:
            X = self._imputer.transform(X)
        if isinstance(self._forest, MultiOutputRegressor):
            num_trees = len(self._forest.estimators_[0].estimators_)
            predicted_rain = np.zeros((num_samples, num_trees))
            for s in range(num_samples):
                Xs = X[s].reshape(1, -1)
                for t in range(num_trees):
                    try:
                        predicted_rain[s, t] = self._forest.estimators_[3].estimators_[t].predict(Xs)
                    except AttributeError:
                        # Work around the 2-D array of estimators for GBTrees
                        predicted_rain[s, t] = self._forest.estimators_[3].estimators_[t][0].predict(Xs)
        else:
            num_trees = len(self._forest.estimators_)
            predicted_rain = np.zeros((num_samples, num_trees))
            for s in range(num_samples):
                Xs = X[s].reshape(1, -1)
                for t in range(num_trees):
                    try:
                        predicted_rain[s, t] = self._forest.estimators_[t].predict(Xs)
                    except AttributeError:
                        # Work around an error in sklearn where GBTrees have length-1 ndarrays...
                        predicted_rain[s, t] = self._forest.estimators_[t][0].predict(Xs)
        return predicted_rain

Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_multi_target_sample_weights_api():
    X = [[1, 2, 3], [4, 5, 6]]
    y = [[3.141, 2.718], [2.718, 3.141]]
    w = [0.8, 0.6]

    rgr = MultiOutputRegressor(Lasso())
    assert_raises_regex(ValueError, "does not support sample weights",
                        rgr.fit, X, y, w)

    # no exception should be raised if the base estimator supports weights
    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    rgr.fit(X, y, w)


# 0.23. warning about tol not having its correct default value.

Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_multi_target_sparse_regression():
    X, y = datasets.make_regression(n_targets=3)
    X_train, y_train = X[:50], y[:50]
    X_test = X[50:]

    for sparse in [sp.csr_matrix, sp.csc_matrix, sp.coo_matrix, sp.dok_matrix,
                   sp.lil_matrix]:
        rgr = MultiOutputRegressor(Lasso(random_state=0))
        rgr_sparse = MultiOutputRegressor(Lasso(random_state=0))

        rgr.fit(X_train, y_train)
        rgr_sparse.fit(sparse(X_train), y_train)

        assert_almost_equal(rgr.predict(X_test),
                            rgr_sparse.predict(sparse(X_test)))

Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_multi_target_regression_one_target():
    # Test multi target regression raises
    X, y = datasets.make_regression(n_targets=1)
    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    assert_raises(ValueError, rgr.fit, X, y)

Source File: test_builder.py From gordo with GNU Affero General Public License v3.0

5 votes

def test_n_splits_from_config(mocked_pipeline_from_definition, cv):
    """
    Test that we can set arbitrary splitters and parameters in the config file which is called by the serializer.
    """
    data_config = get_random_data()
    evaluation_config = {"cv_mode": "full_build"}
    if cv:
        evaluation_config["cv"] = cv

    model_config = {
        "sklearn.multioutput.MultiOutputRegressor": {
            "estimator": "sklearn.ensemble.forest.RandomForestRegressor"
        }
    }

    machine = Machine(
        name="model-name",
        dataset=data_config,
        model=model_config,
        evaluation=evaluation_config,
        project_name="test",
    )

    ModelBuilder(machine).build()

    if cv:
        mocked_pipeline_from_definition.assert_called_with(cv)
    else:
        mocked_pipeline_from_definition.assert_called_with(
            {"sklearn.model_selection.TimeSeriesSplit": {"n_splits": 3}}
        )

Source File: test_builder.py From gordo with GNU Affero General Public License v3.0

5 votes

def test_model_builder_metrics_list(metrics_: Optional[List[str]]):
    model_config = {
        "sklearn.multioutput.MultiOutputRegressor": {
            "estimator": "sklearn.linear_model.LinearRegression"
        }
    }
    data_config = get_random_data()

    evaluation_config: Dict[str, Any] = {"cv_mode": "full_build"}
    if metrics_:
        evaluation_config.update({"metrics": metrics_})

    machine = Machine(
        name="model-name",
        dataset=data_config,
        model=model_config,
        evaluation=evaluation_config,
        project_name="test",
    )
    _model, machine = ModelBuilder(machine).build()

    expected_metrics = metrics_ or [
        "sklearn.metrics.explained_variance_score",
        "sklearn.metrics.r2_score",
        "sklearn.metrics.mean_squared_error",
        "sklearn.metrics.mean_absolute_error",
    ]

    assert all(
        metric.split(".")[-1].replace("_", "-")
        in machine.metadata.build_metadata.model.cross_validation.scores
        for metric in expected_metrics
    )

Source File: test_serializer_from_definition.py From gordo with GNU Affero General Public License v3.0

5 votes

def test_load_from_definition(definition):
    """
    Ensure serializer can load models which take other models as parameters.
    """
    X, y = np.random.random((10, 10)), np.random.random((10, 2))
    definition = yaml.load(definition, Loader=yaml.SafeLoader)
    model = serializer.from_definition(definition)
    assert isinstance(model, MultiOutputRegressor)
    model.fit(X, y)
    model.predict(X)

Source File: test_anomaly_detectors.py From gordo with GNU Affero General Public License v3.0

4 votes

def test_diff_detector_threshold(n_features_y: int, n_features_x: int):
    """
    Basic construction logic of thresholds_ attribute in the
    DiffBasedAnomalyDetector
    """
    X = np.random.random((100, n_features_x))
    y = np.random.random((100, n_features_y))

    model = DiffBasedAnomalyDetector(
        base_estimator=MultiOutputRegressor(estimator=LinearRegression())
    )

    # Model has own implementation of cross_validate
    assert hasattr(model, "cross_validate")

    # When initialized it should not have a threshold calculated.
    assert not hasattr(model, "feature_thresholds_")
    assert not hasattr(model, "aggregate_threshold_")
    assert not hasattr(model, "feature_thresholds_per_fold_")
    assert not hasattr(model, "aggregate_thresholds_per_fold_")

    model.fit(X, y)

    # Until it has done cross validation, it has no threshold.
    assert not hasattr(model, "feature_thresholds_")
    assert not hasattr(model, "aggregate_threshold_")
    assert not hasattr(model, "feature_thresholds_per_fold_")
    assert not hasattr(model, "aggregate_thresholds_per_fold_")

    # Calling cross validate should set the threshold for it.
    model.cross_validate(X=X, y=y)

    # Now we have calculated thresholds based on cross validation folds
    assert hasattr(model, "feature_thresholds_")
    assert hasattr(model, "aggregate_threshold_")
    assert hasattr(model, "feature_thresholds_per_fold_")
    assert hasattr(model, "aggregate_thresholds_per_fold_")
    assert isinstance(model.feature_thresholds_, pd.Series)
    assert len(model.feature_thresholds_) == y.shape[1]
    assert all(model.feature_thresholds_.notna())
    assert isinstance(model.feature_thresholds_per_fold_, pd.DataFrame)
    assert isinstance(model.aggregate_thresholds_per_fold_, dict)

Python sklearn.multioutput.MultiOutputRegressor() Examples