Python sklearn.metrics.mean_squared_error() Examples

The following are 30 code examples of sklearn.metrics.mean_squared_error(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.metrics , or try the search function .
Example #1
Source File: XGBoost_Regression_pm25.py    From Machine-Learning-for-Beginner-by-Python3 with MIT License 13 votes vote down vote up
def Train(data, modelcount, censhu, yanzhgdata):
    model = xgb.XGBRegressor(max_depth=censhu, learning_rate=0.1, n_estimators=modelcount, silent=True, objective='reg:gamma')

    model.fit(data[:, :-1], data[:, -1])
    # 给出训练数据的预测值
    train_out = model.predict(data[:, :-1])
    # 计算MSE
    train_mse = mse(data[:, -1], train_out)

    # 给出验证数据的预测值
    add_yan = model.predict(yanzhgdata[:, :-1])
    # 计算MSE
    add_mse = mse(yanzhgdata[:, -1], add_yan)
    print(train_mse, add_mse)
    return train_mse, add_mse

# 最终确定组合的函数 
Example #2
Source File: test.py    From malss with MIT License 7 votes vote down vote up
def test_regression_small():
    X, y = make_regression(n_samples=2000,
                           n_features=10,
                           n_informative=5,
                           noise=30.0,
                           random_state=0)
    X = pd.DataFrame(X)
    y = pd.Series(y)
    cls = MALSS('regression').fit(X, y, 'test_regression_small')
    cls.generate_module_sample()

    from sklearn.metrics import mean_squared_error
    pred = cls.predict(X)
    print(mean_squared_error(y, pred))

    assert len(cls.algorithms) == 4
    assert cls.algorithms[0].best_score is not None 
Example #3
Source File: LightGBM_Regression_pm25.py    From Machine-Learning-for-Beginner-by-Python3 with MIT License 7 votes vote down vote up
def Train(data, modelcount, censhu, yanzhgdata):
    model = lgbm.LGBMRegressor(boosting_type='gbdt', objective='regression', num_leaves=1200,
                                learning_rate=0.17, n_estimators=modelcount, max_depth=censhu,
                                metric='rmse', bagging_fraction=0.8, feature_fraction=0.8, reg_lambda=0.9)

    model.fit(data[:, :-1], data[:, -1])
    # 给出训练数据的预测值
    train_out = model.predict(data[:, :-1])
    # 计算MSE
    train_mse = mse(data[:, -1], train_out)

    # 给出验证数据的预测值
    add_yan = model.predict(yanzhgdata[:, :-1])
    # 计算MSE
    add_mse = mse(yanzhgdata[:, -1], add_yan)
    print(train_mse, add_mse)
    return train_mse, add_mse

# 最终确定组合的函数 
Example #4
Source File: mean_squared_error.py    From emmental with MIT License 7 votes vote down vote up
def mean_squared_error_scorer(
    golds: ndarray,
    probs: ndarray,
    preds: Optional[ndarray],
    uids: Optional[List[str]] = None,
) -> Dict[str, float]:
    """Mean squared error regression loss.

    Args:
      golds: Ground truth values.
      probs: Predicted probabilities.
      preds: Predicted values.
      uids: Unique ids, defaults to None.

    Returns:
      Mean squared error regression loss.
    """
    return {"mean_squared_error": float(mean_squared_error(golds, probs))} 
Example #5
Source File: test_averaging.py    From nyaggle with MIT License 6 votes vote down vote up
def test_averaging_opt_minimize():
    X, y = make_regression_df(n_samples=1024)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    oof, test = _make_1st_stage_preds(X_train, y_train, X_test)

    best_single_model = min(mean_squared_error(y_train, oof[0]),
                            mean_squared_error(y_train, oof[1]),
                            mean_squared_error(y_train, oof[2]))

    result = averaging_opt(test, oof, y_train, mean_squared_error, higher_is_better=False)

    assert result.score <= best_single_model

    result_simple_avg = averaging(test, oof, y_train, eval_func=mean_squared_error)

    assert result.score <= result_simple_avg.score 
Example #6
Source File: domainAdaptation.py    From dzetsaka with GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, transportAlgorithm="MappingTransport",
                 scaler=False, params=None, feedback=True):
        try:
            from sklearn.metrics import mean_squared_error
            from itertools import product
            from sklearn.metrics import (
                f1_score, cohen_kappa_score, accuracy_score)
        except BaseException:
            raise ImportError('Please install itertools and scikit-learn')

        self.transportAlgorithm = transportAlgorithm
        self.feedback = feedback

        self.params_ = params

        if scaler:
            from sklearn.preprocessing import MinMaxScaler
            self.scaler = MinMaxScaler(feature_range=(-1, 1))
            self.scalerTarget = MinMaxScaler(feature_range=(-1, 1))
        else:
            self.scaler = scaler 
Example #7
Source File: test.py    From malss with MIT License 6 votes vote down vote up
def test_regression_medium():
    X, y = make_regression(n_samples=20000,
                           n_features=10,
                           n_informative=5,
                           noise=30.0,
                           random_state=0)
    X = pd.DataFrame(X)
    y = pd.Series(y)
    cls = MALSS('regression').fit(X, y, 'test_regression_medium')
    cls.generate_module_sample()

    from sklearn.metrics import mean_squared_error
    pred = cls.predict(X)
    print(mean_squared_error(y, pred))

    assert len(cls.algorithms) == 2
    assert cls.algorithms[0].best_score is not None 
Example #8
Source File: regression_tests.py    From drifter_ml with MIT License 6 votes vote down vote up
def mse_cv(self, cv):
        """
        This method performs cross-validation over mean squared error.
        
        Parameters
        ----------
        * cv : integer
          The number of cross validation folds to perform

        Returns
        -------
        Returns a scores of the k-fold mean squared error.
        """
        mse = metrics.make_scorer(metrics.mean_squared_error)
        result = cross_validate(self.reg, self.X,
                                self.y, cv=cv,
                                scoring=(mse))
        return self.get_test_score(result) 
Example #9
Source File: test.py    From malss with MIT License 6 votes vote down vote up
def test_regression_big():
    X, y = make_regression(n_samples=200000,
                           n_features=10,
                           n_informative=5,
                           noise=30.0,
                           random_state=0)
    X = pd.DataFrame(X)
    y = pd.Series(y)
    cls = MALSS('regression').fit(X, y, 'test_regression_big')
    cls.generate_module_sample()

    from sklearn.metrics import mean_squared_error
    pred = cls.predict(X)
    print(mean_squared_error(y, pred))

    assert len(cls.algorithms) == 1
    assert cls.algorithms[0].best_score is not None 
Example #10
Source File: model_eval.py    From healthcareai-py with MIT License 6 votes vote down vote up
def calculate_regression_metrics(trained_sklearn_estimator, x_test, y_test):
    """
    Given a trained estimator, calculate metrics.

    Args:
        trained_sklearn_estimator (sklearn.base.BaseEstimator): a scikit-learn estimator that has been `.fit()`
        y_test (numpy.ndarray): A 1d numpy array of the y_test set (predictions)
        x_test (numpy.ndarray): A 2d numpy array of the x_test set (features)

    Returns:
        dict: A dictionary of metrics objects
    """
    # Get predictions
    predictions = trained_sklearn_estimator.predict(x_test)

    # Calculate individual metrics
    mean_squared_error = skmetrics.mean_squared_error(y_test, predictions)
    mean_absolute_error = skmetrics.mean_absolute_error(y_test, predictions)

    result = {'mean_squared_error': mean_squared_error, 'mean_absolute_error': mean_absolute_error}

    return result 
Example #11
Source File: test_multioutput.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_base_chain_crossval_fit_and_predict():
    # Fit chain with cross_val_predict and verify predict
    # performance
    X, Y = generate_multilabel_dataset_with_correlations()

    for chain in [ClassifierChain(LogisticRegression()),
                  RegressorChain(Ridge())]:
        chain.fit(X, Y)
        chain_cv = clone(chain).set_params(cv=3)
        chain_cv.fit(X, Y)
        Y_pred_cv = chain_cv.predict(X)
        Y_pred = chain.predict(X)

        assert Y_pred_cv.shape == Y_pred.shape
        assert not np.all(Y_pred == Y_pred_cv)
        if isinstance(chain, ClassifierChain):
            assert jaccard_score(Y, Y_pred_cv, average='samples') > .4
        else:
            assert mean_squared_error(Y, Y_pred_cv) < .25 
Example #12
Source File: test_regression.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_regression_custom_weights():
    y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]]
    y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]]

    msew = mean_squared_error(y_true, y_pred, multioutput=[0.4, 0.6])
    maew = mean_absolute_error(y_true, y_pred, multioutput=[0.4, 0.6])
    rw = r2_score(y_true, y_pred, multioutput=[0.4, 0.6])
    evsw = explained_variance_score(y_true, y_pred, multioutput=[0.4, 0.6])

    assert_almost_equal(msew, 0.39, decimal=2)
    assert_almost_equal(maew, 0.475, decimal=3)
    assert_almost_equal(rw, 0.94, decimal=2)
    assert_almost_equal(evsw, 0.94, decimal=2)

    # Handling msle separately as it does not accept negative inputs.
    y_true = np.array([[0.5, 1], [1, 2], [7, 6]])
    y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]])
    msle = mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])
    msle2 = mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred),
                               multioutput=[0.3, 0.7])
    assert_almost_equal(msle, msle2, decimal=2) 
Example #13
Source File: pm25_RF_Regression.py    From Machine-Learning-for-Beginner-by-Python3 with MIT License 6 votes vote down vote up
def Train(data, treecount, tezh, yanzhgdata):
    model = RF(n_estimators=treecount, max_features=tezh)
    model.fit(data[:, :-1], data[:, -1])
    # 给出训练数据的预测值
    train_out = model.predict(data[:, :-1])
    # 计算MSE
    train_mse = mse(data[:, -1], train_out)

    # 给出验证数据的预测值
    add_yan = model.predict(yanzhgdata[:, :-1])
    # 计算MSE
    add_mse = mse(yanzhgdata[:, -1], add_yan)
    print(train_mse, add_mse)
    return train_mse, add_mse

# 最终确定组合的函数 
Example #14
Source File: AdaBoost_Regression.py    From Machine-Learning-for-Beginner-by-Python3 with MIT License 6 votes vote down vote up
def Train(data, modelcount, censhu, yanzhgdata):
    model = AdaBoostRegressor(DecisionTreeRegressor(max_depth=censhu),
                              n_estimators=modelcount, learning_rate=0.8)

    model.fit(data[:, :-1], data[:, -1])
    # 给出训练数据的预测值
    train_out = model.predict(data[:, :-1])
    # 计算MSE
    train_mse = mse(data[:, -1], train_out)

    # 给出验证数据的预测值
    add_yan = model.predict(yanzhgdata[:, :-1])
    # 计算MSE
    add_mse = mse(yanzhgdata[:, -1], add_yan)
    print(train_mse, add_mse)
    return train_mse, add_mse

# 最终确定组合的函数 
Example #15
Source File: test_tree.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_boston():
    # Check consistency on dataset boston house prices.

    for (name, Tree), criterion in product(REG_TREES.items(), REG_CRITERIONS):
        reg = Tree(criterion=criterion, random_state=0)
        reg.fit(boston.data, boston.target)
        score = mean_squared_error(boston.target, reg.predict(boston.data))
        assert_less(score, 1,
                    "Failed with {0}, criterion = {1} and score = {2}"
                    "".format(name, criterion, score))

        # using fewer features reduces the learning ability of this tree,
        # but reduces training time.
        reg = Tree(criterion=criterion, max_features=6, random_state=0)
        reg.fit(boston.data, boston.target)
        score = mean_squared_error(boston.target, reg.predict(boston.data))
        assert_less(score, 2,
                    "Failed with {0}, criterion = {1} and score = {2}"
                    "".format(name, criterion, score)) 
Example #16
Source File: test_utils.py    From gordo with GNU Affero General Public License v3.0 6 votes vote down vote up
def test_metrics_wrapper():
    # make the features in y be in different scales
    y = np.array([[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]) * [1, 100]

    # With no scaler provided it is relevant which of the two series gets an 80% error
    metric_func_noscaler = model_utils.metric_wrapper(mean_squared_error)

    mse_feature_one_wrong = metric_func_noscaler(y, y * [0.8, 1])
    mse_feature_two_wrong = metric_func_noscaler(y, y * [1, 0.8])

    assert not np.isclose(mse_feature_one_wrong, mse_feature_two_wrong)

    # With a scaler provided it is not relevant which of the two series gets an 80%
    # error
    scaler = MinMaxScaler().fit(y)
    metric_func_scaler = model_utils.metric_wrapper(mean_squared_error, scaler=scaler)

    mse_feature_one_wrong = metric_func_scaler(y, y * [0.8, 1])
    mse_feature_two_wrong = metric_func_scaler(y, y * [1, 0.8])

    assert np.isclose(mse_feature_one_wrong, mse_feature_two_wrong) 
Example #17
Source File: test_builder.py    From gordo with GNU Affero General Public License v3.0 6 votes vote down vote up
def test_get_metrics_dict_scaler(scaler, mock):
    mock_model = mock
    metrics_list = [sklearn.metrics.mean_squared_error]
    # make the features in y be in different scales
    y = pd.DataFrame(
        np.array([[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]) * [1, 100],
        columns=["Tag 1", "Tag 2"],
    )
    metrics_dict = ModelBuilder.build_metrics_dict(metrics_list, y, scaler=scaler)
    metric_func = metrics_dict["mean-squared-error"]

    mock_model.predict = lambda _y: _y * [0.8, 1]
    mse_feature_one_wrong = metric_func(mock_model, y, y)
    mock_model.predict = lambda _y: _y * [1, 0.8]
    mse_feature_two_wrong = metric_func(mock_model, y, y)

    if scaler:
        assert np.isclose(mse_feature_one_wrong, mse_feature_two_wrong)
    else:
        assert not np.isclose(mse_feature_one_wrong, mse_feature_two_wrong) 
Example #18
Source File: test_builder.py    From gordo with GNU Affero General Public License v3.0 6 votes vote down vote up
def test_metrics_from_list():
    """
    Check getting functions from a list of metric names
    """
    default = ModelBuilder.metrics_from_list()
    assert default == [
        metrics.explained_variance_score,
        metrics.r2_score,
        metrics.mean_squared_error,
        metrics.mean_absolute_error,
    ]

    specifics = ModelBuilder.metrics_from_list(
        ["sklearn.metrics.adjusted_mutual_info_score", "sklearn.metrics.r2_score"]
    )
    assert specifics == [metrics.adjusted_mutual_info_score, metrics.r2_score] 
Example #19
Source File: test_run.py    From nyaggle with MIT License 6 votes vote down vote up
def test_experiment_lgb_regressor(tmpdir_name):
    X, y = make_regression_df(n_samples=1024, n_num_features=10, n_cat_features=2,
                              random_state=0, id_column='user_id')

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

    params = {
        'objective': 'regression',
        'max_depth': 8
    }

    result = run_experiment(params, X_train, y_train, X_test, tmpdir_name)

    assert len(np.unique(result.oof_prediction)) > 5  # making sure prediction is not binarized
    assert len(np.unique(result.test_prediction)) > 5
    assert mean_squared_error(y_train, result.oof_prediction) == result.metrics[-1]

    _check_file_exists(tmpdir_name) 
Example #20
Source File: score_dataset.py    From snape with Apache License 2.0 6 votes vote down vote up
def score_regression(y, y_hat, report=True):
    """
    Create regression score
    :param y:
    :param y_hat:
    :return:
    """
    r2 = r2_score(y, y_hat)
    rmse = sqrt(mean_squared_error(y, y_hat))
    mae = mean_absolute_error(y, y_hat)

    report_string = "---Regression Score--- \n"
    report_string += "R2 = " + str(r2) + "\n"
    report_string += "RMSE = " + str(rmse) + "\n"
    report_string += "MAE = " + str(mae) + "\n"

    if report:
        print(report_string)

    return mae, report_string 
Example #21
Source File: test_regression.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_regression_metrics_at_limits():
    assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_squared_log_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(max_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2)
    assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2)
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [-1.], [-1.])
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [1., 2., 3.], [1., -2., 3.])
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [1., -2., 3.], [1., 2., 3.]) 
Example #22
Source File: test_run.py    From nyaggle with MIT License 5 votes vote down vote up
def test_experiment_cat_regressor(tmpdir_name):
    X, y = make_regression_df(n_samples=1024, n_num_features=10, n_cat_features=2,
                              random_state=0, id_column='user_id')

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

    params = {
        'max_depth': 8,
        'num_boost_round': 100
    }

    result = run_experiment(params, X_train, y_train, X_test, tmpdir_name, algorithm_type='cat')

    assert mean_squared_error(y_train, result.oof_prediction) == result.metrics[-1]
    _check_file_exists(tmpdir_name) 
Example #23
Source File: reddit_output_att.py    From causal-text-embeddings with MIT License 5 votes vote down vote up
def fit_conditional_expected_outcomes(outcomes, features):
	model = Ridge()
	model.fit(features, outcomes)
	predict = model.predict(features)
	if verbose:
		print("Training MSE:", mse(outcomes, predict))
	return model 
Example #24
Source File: test_run.py    From nyaggle with MIT License 5 votes vote down vote up
def test_experiment_xgb_regressor(tmpdir_name):
    X, y = make_regression_df(n_samples=1024, n_num_features=10, n_cat_features=2,
                              random_state=0, id_column='user_id')

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

    params = {
        'max_depth': 8,
        'num_boost_round': 100
    }

    result = run_experiment(params, X_train, y_train, X_test, tmpdir_name, algorithm_type='xgb', with_auto_prep=True)

    assert mean_squared_error(y_train, result.oof_prediction) == result.metrics[-1]
    _check_file_exists(tmpdir_name) 
Example #25
Source File: test_nn.py    From numpy-ml with GNU General Public License v3.0 5 votes vote down vote up
def test_squared_error(N=15):
    from numpy_ml.neural_nets.losses import SquaredError

    np.random.seed(12345)

    N = np.inf if N is None else N

    mine = SquaredError()
    gold = (
        lambda y, y_pred: mean_squared_error(y, y_pred)
        * y_pred.shape[0]
        * y_pred.shape[1]
        * 0.5
    )

    # ensure we get 0 when the two arrays are equal
    n_dims = np.random.randint(2, 100)
    n_examples = np.random.randint(1, 1000)
    y = y_pred = random_tensor((n_examples, n_dims))
    assert_almost_equal(mine.loss(y, y_pred), gold(y, y_pred))
    print("PASSED")

    i = 1
    while i < N:
        n_dims = np.random.randint(2, 100)
        n_examples = np.random.randint(1, 1000)
        y = random_tensor((n_examples, n_dims))
        y_pred = random_tensor((n_examples, n_dims))
        assert_almost_equal(mine.loss(y, y_pred), gold(y, y_pred), decimal=5)
        print("PASSED")
        i += 1 
Example #26
Source File: util.py    From Recommender-Systems-Samples with MIT License 5 votes vote down vote up
def mse(y_true, y_pred):
    '''
    y_true : array-like of shape = (n_samples) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs)
        Estimated target values.
    '''
    assert len(y_true) == len(y_pred)
    return mean_squared_error(y_true, y_pred) 
Example #27
Source File: test_gradient_boosting.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def check_boston(presort, loss, subsample):
    # Check consistency on dataset boston house prices with least squares
    # and least absolute deviation.
    ones = np.ones(len(boston.target))
    last_y_pred = None
    for sample_weight in None, ones, 2 * ones:
        clf = GradientBoostingRegressor(n_estimators=100,
                                        loss=loss,
                                        max_depth=4,
                                        subsample=subsample,
                                        min_samples_split=2,
                                        random_state=1,
                                        presort=presort)

        assert_raises(ValueError, clf.predict, boston.data)
        clf.fit(boston.data, boston.target,
                sample_weight=sample_weight)
        leaves = clf.apply(boston.data)
        assert_equal(leaves.shape, (506, 100))

        y_pred = clf.predict(boston.data)
        mse = mean_squared_error(boston.target, y_pred)
        assert_less(mse, 6.0)

        if last_y_pred is not None:
            assert_array_almost_equal(last_y_pred, y_pred)

        last_y_pred = y_pred 
Example #28
Source File: test_gradient_boosting.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_zero_estimator_reg():
    # Test if init='zero' works for regression.

    est = GradientBoostingRegressor(n_estimators=20, max_depth=1,
                                    random_state=1, init='zero')
    est.fit(boston.data, boston.target)
    y_pred = est.predict(boston.data)
    mse = mean_squared_error(boston.target, y_pred)
    assert_almost_equal(mse, 33.0, decimal=0)

    est = GradientBoostingRegressor(n_estimators=20, max_depth=1,
                                    random_state=1, init='foobar')
    assert_raises(ValueError, est.fit, boston.data, boston.target) 
Example #29
Source File: lstm.py    From user-behavior-anomaly-detector with MIT License 5 votes vote down vote up
def calculate_score(self, real, predicted, score):
        rmse = math.sqrt(mean_squared_error(real, predicted))
        # total = helpers.sigmoid(1/rmse * scores[i]) * 100
        total = round(rmse * score / 100, 2)
        return total 
Example #30
Source File: train.py    From KDD2018_MPCN with GNU General Public License v3.0 5 votes vote down vote up
def _majority_baseline(self, labels):
        print("============================================")
        print("Running Majority Baseline...")
        _stat_pred = [abs(math.floor(x)) for x in labels]
        count = Counter(_stat_pred)
        print(count)
        max_class = count.most_common(5)[0][0]
        _majority = [float(max_class) for i in range(len(labels))]
        print('MSE={}'.format(mean_squared_error(_majority, labels)))
        print("============================================")