Python Examples of sklearn.metrics.mean_squared

Source File: XGBoost_Regression_pm25.py From Machine-Learning-for-Beginner-by-Python3 with MIT License

13 votes

def Train(data, modelcount, censhu, yanzhgdata):
    model = xgb.XGBRegressor(max_depth=censhu, learning_rate=0.1, n_estimators=modelcount, silent=True, objective='reg:gamma')

    model.fit(data[:, :-1], data[:, -1])
    # 给出训练数据的预测值
    train_out = model.predict(data[:, :-1])
    # 计算MSE
    train_mse = mse(data[:, -1], train_out)

    # 给出验证数据的预测值
    add_yan = model.predict(yanzhgdata[:, :-1])
    # 计算MSE
    add_mse = mse(yanzhgdata[:, -1], add_yan)
    print(train_mse, add_mse)
    return train_mse, add_mse

# 最终确定组合的函数

Source File: test.py From malss with MIT License

7 votes

def test_regression_small():
    X, y = make_regression(n_samples=2000,
                           n_features=10,
                           n_informative=5,
                           noise=30.0,
                           random_state=0)
    X = pd.DataFrame(X)
    y = pd.Series(y)
    cls = MALSS('regression').fit(X, y, 'test_regression_small')
    cls.generate_module_sample()

    from sklearn.metrics import mean_squared_error
    pred = cls.predict(X)
    print(mean_squared_error(y, pred))

    assert len(cls.algorithms) == 4
    assert cls.algorithms[0].best_score is not None

Source File: LightGBM_Regression_pm25.py From Machine-Learning-for-Beginner-by-Python3 with MIT License

7 votes

def Train(data, modelcount, censhu, yanzhgdata):
    model = lgbm.LGBMRegressor(boosting_type='gbdt', objective='regression', num_leaves=1200,
                                learning_rate=0.17, n_estimators=modelcount, max_depth=censhu,
                                metric='rmse', bagging_fraction=0.8, feature_fraction=0.8, reg_lambda=0.9)

    model.fit(data[:, :-1], data[:, -1])
    # 给出训练数据的预测值
    train_out = model.predict(data[:, :-1])
    # 计算MSE
    train_mse = mse(data[:, -1], train_out)

    # 给出验证数据的预测值
    add_yan = model.predict(yanzhgdata[:, :-1])
    # 计算MSE
    add_mse = mse(yanzhgdata[:, -1], add_yan)
    print(train_mse, add_mse)
    return train_mse, add_mse

# 最终确定组合的函数

Source File: mean_squared_error.py From emmental with MIT License

7 votes

def mean_squared_error_scorer(
    golds: ndarray,
    probs: ndarray,
    preds: Optional[ndarray],
    uids: Optional[List[str]] = None,
) -> Dict[str, float]:
    """Mean squared error regression loss.

    Args:
      golds: Ground truth values.
      probs: Predicted probabilities.
      preds: Predicted values.
      uids: Unique ids, defaults to None.

    Returns:
      Mean squared error regression loss.
    """
    return {"mean_squared_error": float(mean_squared_error(golds, probs))}

Source File: test_averaging.py From nyaggle with MIT License

6 votes

def test_averaging_opt_minimize():
    X, y = make_regression_df(n_samples=1024)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    oof, test = _make_1st_stage_preds(X_train, y_train, X_test)

    best_single_model = min(mean_squared_error(y_train, oof[0]),
                            mean_squared_error(y_train, oof[1]),
                            mean_squared_error(y_train, oof[2]))

    result = averaging_opt(test, oof, y_train, mean_squared_error, higher_is_better=False)

    assert result.score <= best_single_model

    result_simple_avg = averaging(test, oof, y_train, eval_func=mean_squared_error)

    assert result.score <= result_simple_avg.score

Source File: domainAdaptation.py From dzetsaka with GNU General Public License v3.0

6 votes

def __init__(self, transportAlgorithm="MappingTransport",
                 scaler=False, params=None, feedback=True):
        try:
            from sklearn.metrics import mean_squared_error
            from itertools import product
            from sklearn.metrics import (
                f1_score, cohen_kappa_score, accuracy_score)
        except BaseException:
            raise ImportError('Please install itertools and scikit-learn')

        self.transportAlgorithm = transportAlgorithm
        self.feedback = feedback

        self.params_ = params

        if scaler:
            from sklearn.preprocessing import MinMaxScaler
            self.scaler = MinMaxScaler(feature_range=(-1, 1))
            self.scalerTarget = MinMaxScaler(feature_range=(-1, 1))
        else:
            self.scaler = scaler

Source File: test.py From malss with MIT License

6 votes

def test_regression_medium():
    X, y = make_regression(n_samples=20000,
                           n_features=10,
                           n_informative=5,
                           noise=30.0,
                           random_state=0)
    X = pd.DataFrame(X)
    y = pd.Series(y)
    cls = MALSS('regression').fit(X, y, 'test_regression_medium')
    cls.generate_module_sample()

    from sklearn.metrics import mean_squared_error
    pred = cls.predict(X)
    print(mean_squared_error(y, pred))

    assert len(cls.algorithms) == 2
    assert cls.algorithms[0].best_score is not None

Source File: regression_tests.py From drifter_ml with MIT License

6 votes

def mse_cv(self, cv):
        """
        This method performs cross-validation over mean squared error.
        
        Parameters
        ----------
        * cv : integer
          The number of cross validation folds to perform

        Returns
        -------
        Returns a scores of the k-fold mean squared error.
        """
        mse = metrics.make_scorer(metrics.mean_squared_error)
        result = cross_validate(self.reg, self.X,
                                self.y, cv=cv,
                                scoring=(mse))
        return self.get_test_score(result)

Source File: test.py From malss with MIT License

6 votes

def test_regression_big():
    X, y = make_regression(n_samples=200000,
                           n_features=10,
                           n_informative=5,
                           noise=30.0,
                           random_state=0)
    X = pd.DataFrame(X)
    y = pd.Series(y)
    cls = MALSS('regression').fit(X, y, 'test_regression_big')
    cls.generate_module_sample()

    from sklearn.metrics import mean_squared_error
    pred = cls.predict(X)
    print(mean_squared_error(y, pred))

    assert len(cls.algorithms) == 1
    assert cls.algorithms[0].best_score is not None

Source File: model_eval.py From healthcareai-py with MIT License

6 votes

def calculate_regression_metrics(trained_sklearn_estimator, x_test, y_test):
    """
    Given a trained estimator, calculate metrics.

    Args:
        trained_sklearn_estimator (sklearn.base.BaseEstimator): a scikit-learn estimator that has been `.fit()`
        y_test (numpy.ndarray): A 1d numpy array of the y_test set (predictions)
        x_test (numpy.ndarray): A 2d numpy array of the x_test set (features)

    Returns:
        dict: A dictionary of metrics objects
    """
    # Get predictions
    predictions = trained_sklearn_estimator.predict(x_test)

    # Calculate individual metrics
    mean_squared_error = skmetrics.mean_squared_error(y_test, predictions)
    mean_absolute_error = skmetrics.mean_absolute_error(y_test, predictions)

    result = {'mean_squared_error': mean_squared_error, 'mean_absolute_error': mean_absolute_error}

    return result

Source File: test_multioutput.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_base_chain_crossval_fit_and_predict():
    # Fit chain with cross_val_predict and verify predict
    # performance
    X, Y = generate_multilabel_dataset_with_correlations()

    for chain in [ClassifierChain(LogisticRegression()),
                  RegressorChain(Ridge())]:
        chain.fit(X, Y)
        chain_cv = clone(chain).set_params(cv=3)
        chain_cv.fit(X, Y)
        Y_pred_cv = chain_cv.predict(X)
        Y_pred = chain.predict(X)

        assert Y_pred_cv.shape == Y_pred.shape
        assert not np.all(Y_pred == Y_pred_cv)
        if isinstance(chain, ClassifierChain):
            assert jaccard_score(Y, Y_pred_cv, average='samples') > .4
        else:
            assert mean_squared_error(Y, Y_pred_cv) < .25

Source File: test_regression.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_regression_custom_weights():
    y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]]
    y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]]

    msew = mean_squared_error(y_true, y_pred, multioutput=[0.4, 0.6])
    maew = mean_absolute_error(y_true, y_pred, multioutput=[0.4, 0.6])
    rw = r2_score(y_true, y_pred, multioutput=[0.4, 0.6])
    evsw = explained_variance_score(y_true, y_pred, multioutput=[0.4, 0.6])

    assert_almost_equal(msew, 0.39, decimal=2)
    assert_almost_equal(maew, 0.475, decimal=3)
    assert_almost_equal(rw, 0.94, decimal=2)
    assert_almost_equal(evsw, 0.94, decimal=2)

    # Handling msle separately as it does not accept negative inputs.
    y_true = np.array([[0.5, 1], [1, 2], [7, 6]])
    y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]])
    msle = mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])
    msle2 = mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred),
                               multioutput=[0.3, 0.7])
    assert_almost_equal(msle, msle2, decimal=2)

Source File: pm25_RF_Regression.py From Machine-Learning-for-Beginner-by-Python3 with MIT License

6 votes

def Train(data, treecount, tezh, yanzhgdata):
    model = RF(n_estimators=treecount, max_features=tezh)
    model.fit(data[:, :-1], data[:, -1])
    # 给出训练数据的预测值
    train_out = model.predict(data[:, :-1])
    # 计算MSE
    train_mse = mse(data[:, -1], train_out)

    # 给出验证数据的预测值
    add_yan = model.predict(yanzhgdata[:, :-1])
    # 计算MSE
    add_mse = mse(yanzhgdata[:, -1], add_yan)
    print(train_mse, add_mse)
    return train_mse, add_mse

# 最终确定组合的函数

Source File: AdaBoost_Regression.py From Machine-Learning-for-Beginner-by-Python3 with MIT License

6 votes

def Train(data, modelcount, censhu, yanzhgdata):
    model = AdaBoostRegressor(DecisionTreeRegressor(max_depth=censhu),
                              n_estimators=modelcount, learning_rate=0.8)

    model.fit(data[:, :-1], data[:, -1])
    # 给出训练数据的预测值
    train_out = model.predict(data[:, :-1])
    # 计算MSE
    train_mse = mse(data[:, -1], train_out)

    # 给出验证数据的预测值
    add_yan = model.predict(yanzhgdata[:, :-1])
    # 计算MSE
    add_mse = mse(yanzhgdata[:, -1], add_yan)
    print(train_mse, add_mse)
    return train_mse, add_mse

# 最终确定组合的函数

Source File: test_tree.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_boston():
    # Check consistency on dataset boston house prices.

    for (name, Tree), criterion in product(REG_TREES.items(), REG_CRITERIONS):
        reg = Tree(criterion=criterion, random_state=0)
        reg.fit(boston.data, boston.target)
        score = mean_squared_error(boston.target, reg.predict(boston.data))
        assert_less(score, 1,
                    "Failed with {0}, criterion = {1} and score = {2}"
                    "".format(name, criterion, score))

        # using fewer features reduces the learning ability of this tree,
        # but reduces training time.
        reg = Tree(criterion=criterion, max_features=6, random_state=0)
        reg.fit(boston.data, boston.target)
        score = mean_squared_error(boston.target, reg.predict(boston.data))
        assert_less(score, 2,
                    "Failed with {0}, criterion = {1} and score = {2}"
                    "".format(name, criterion, score))

Source File: test_utils.py From gordo with GNU Affero General Public License v3.0

6 votes

def test_metrics_wrapper():
    # make the features in y be in different scales
    y = np.array([[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]) * [1, 100]

    # With no scaler provided it is relevant which of the two series gets an 80% error
    metric_func_noscaler = model_utils.metric_wrapper(mean_squared_error)

    mse_feature_one_wrong = metric_func_noscaler(y, y * [0.8, 1])
    mse_feature_two_wrong = metric_func_noscaler(y, y * [1, 0.8])

    assert not np.isclose(mse_feature_one_wrong, mse_feature_two_wrong)

    # With a scaler provided it is not relevant which of the two series gets an 80%
    # error
    scaler = MinMaxScaler().fit(y)
    metric_func_scaler = model_utils.metric_wrapper(mean_squared_error, scaler=scaler)

    mse_feature_one_wrong = metric_func_scaler(y, y * [0.8, 1])
    mse_feature_two_wrong = metric_func_scaler(y, y * [1, 0.8])

    assert np.isclose(mse_feature_one_wrong, mse_feature_two_wrong)

Source File: test_builder.py From gordo with GNU Affero General Public License v3.0

6 votes

def test_get_metrics_dict_scaler(scaler, mock):
    mock_model = mock
    metrics_list = [sklearn.metrics.mean_squared_error]
    # make the features in y be in different scales
    y = pd.DataFrame(
        np.array([[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]) * [1, 100],
        columns=["Tag 1", "Tag 2"],
    )
    metrics_dict = ModelBuilder.build_metrics_dict(metrics_list, y, scaler=scaler)
    metric_func = metrics_dict["mean-squared-error"]

    mock_model.predict = lambda _y: _y * [0.8, 1]
    mse_feature_one_wrong = metric_func(mock_model, y, y)
    mock_model.predict = lambda _y: _y * [1, 0.8]
    mse_feature_two_wrong = metric_func(mock_model, y, y)

    if scaler:
        assert np.isclose(mse_feature_one_wrong, mse_feature_two_wrong)
    else:
        assert not np.isclose(mse_feature_one_wrong, mse_feature_two_wrong)

Source File: test_builder.py From gordo with GNU Affero General Public License v3.0

6 votes

def test_metrics_from_list():
    """
    Check getting functions from a list of metric names
    """
    default = ModelBuilder.metrics_from_list()
    assert default == [
        metrics.explained_variance_score,
        metrics.r2_score,
        metrics.mean_squared_error,
        metrics.mean_absolute_error,
    ]

    specifics = ModelBuilder.metrics_from_list(
        ["sklearn.metrics.adjusted_mutual_info_score", "sklearn.metrics.r2_score"]
    )
    assert specifics == [metrics.adjusted_mutual_info_score, metrics.r2_score]

Source File: test_run.py From nyaggle with MIT License

6 votes

def test_experiment_lgb_regressor(tmpdir_name):
    X, y = make_regression_df(n_samples=1024, n_num_features=10, n_cat_features=2,
                              random_state=0, id_column='user_id')

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

    params = {
        'objective': 'regression',
        'max_depth': 8
    }

    result = run_experiment(params, X_train, y_train, X_test, tmpdir_name)

    assert len(np.unique(result.oof_prediction)) > 5  # making sure prediction is not binarized
    assert len(np.unique(result.test_prediction)) > 5
    assert mean_squared_error(y_train, result.oof_prediction) == result.metrics[-1]

    _check_file_exists(tmpdir_name)

Source File: score_dataset.py From snape with Apache License 2.0

6 votes

def score_regression(y, y_hat, report=True):
    """
    Create regression score
    :param y:
    :param y_hat:
    :return:
    """
    r2 = r2_score(y, y_hat)
    rmse = sqrt(mean_squared_error(y, y_hat))
    mae = mean_absolute_error(y, y_hat)

    report_string = "---Regression Score--- \n"
    report_string += "R2 = " + str(r2) + "\n"
    report_string += "RMSE = " + str(rmse) + "\n"
    report_string += "MAE = " + str(mae) + "\n"

    if report:
        print(report_string)

    return mae, report_string

Source File: test_regression.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_regression_metrics_at_limits():
    assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_squared_log_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(max_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2)
    assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2)
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [-1.], [-1.])
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [1., 2., 3.], [1., -2., 3.])
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [1., -2., 3.], [1., 2., 3.])

Source File: test_run.py From nyaggle with MIT License

5 votes

def test_experiment_cat_regressor(tmpdir_name):
    X, y = make_regression_df(n_samples=1024, n_num_features=10, n_cat_features=2,
                              random_state=0, id_column='user_id')

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

    params = {
        'max_depth': 8,
        'num_boost_round': 100
    }

    result = run_experiment(params, X_train, y_train, X_test, tmpdir_name, algorithm_type='cat')

    assert mean_squared_error(y_train, result.oof_prediction) == result.metrics[-1]
    _check_file_exists(tmpdir_name)

Source File: reddit_output_att.py From causal-text-embeddings with MIT License

5 votes

def fit_conditional_expected_outcomes(outcomes, features):
	model = Ridge()
	model.fit(features, outcomes)
	predict = model.predict(features)
	if verbose:
		print("Training MSE:", mse(outcomes, predict))
	return model

Source File: test_run.py From nyaggle with MIT License

5 votes

def test_experiment_xgb_regressor(tmpdir_name):
    X, y = make_regression_df(n_samples=1024, n_num_features=10, n_cat_features=2,
                              random_state=0, id_column='user_id')

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

    params = {
        'max_depth': 8,
        'num_boost_round': 100
    }

    result = run_experiment(params, X_train, y_train, X_test, tmpdir_name, algorithm_type='xgb', with_auto_prep=True)

    assert mean_squared_error(y_train, result.oof_prediction) == result.metrics[-1]
    _check_file_exists(tmpdir_name)

Source File: test_nn.py From numpy-ml with GNU General Public License v3.0

5 votes

def test_squared_error(N=15):
    from numpy_ml.neural_nets.losses import SquaredError

    np.random.seed(12345)

    N = np.inf if N is None else N

    mine = SquaredError()
    gold = (
        lambda y, y_pred: mean_squared_error(y, y_pred)
        * y_pred.shape[0]
        * y_pred.shape[1]
        * 0.5
    )

    # ensure we get 0 when the two arrays are equal
    n_dims = np.random.randint(2, 100)
    n_examples = np.random.randint(1, 1000)
    y = y_pred = random_tensor((n_examples, n_dims))
    assert_almost_equal(mine.loss(y, y_pred), gold(y, y_pred))
    print("PASSED")

    i = 1
    while i < N:
        n_dims = np.random.randint(2, 100)
        n_examples = np.random.randint(1, 1000)
        y = random_tensor((n_examples, n_dims))
        y_pred = random_tensor((n_examples, n_dims))
        assert_almost_equal(mine.loss(y, y_pred), gold(y, y_pred), decimal=5)
        print("PASSED")
        i += 1

Source File: util.py From Recommender-Systems-Samples with MIT License

5 votes

def mse(y_true, y_pred):
    '''
    y_true : array-like of shape = (n_samples) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs)
        Estimated target values.
    '''
    assert len(y_true) == len(y_pred)
    return mean_squared_error(y_true, y_pred)

Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def check_boston(presort, loss, subsample):
    # Check consistency on dataset boston house prices with least squares
    # and least absolute deviation.
    ones = np.ones(len(boston.target))
    last_y_pred = None
    for sample_weight in None, ones, 2 * ones:
        clf = GradientBoostingRegressor(n_estimators=100,
                                        loss=loss,
                                        max_depth=4,
                                        subsample=subsample,
                                        min_samples_split=2,
                                        random_state=1,
                                        presort=presort)

        assert_raises(ValueError, clf.predict, boston.data)
        clf.fit(boston.data, boston.target,
                sample_weight=sample_weight)
        leaves = clf.apply(boston.data)
        assert_equal(leaves.shape, (506, 100))

        y_pred = clf.predict(boston.data)
        mse = mean_squared_error(boston.target, y_pred)
        assert_less(mse, 6.0)

        if last_y_pred is not None:
            assert_array_almost_equal(last_y_pred, y_pred)

        last_y_pred = y_pred

Source File: test_gradient_boosting.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_zero_estimator_reg():
    # Test if init='zero' works for regression.

    est = GradientBoostingRegressor(n_estimators=20, max_depth=1,
                                    random_state=1, init='zero')
    est.fit(boston.data, boston.target)
    y_pred = est.predict(boston.data)
    mse = mean_squared_error(boston.target, y_pred)
    assert_almost_equal(mse, 33.0, decimal=0)

    est = GradientBoostingRegressor(n_estimators=20, max_depth=1,
                                    random_state=1, init='foobar')
    assert_raises(ValueError, est.fit, boston.data, boston.target)

Source File: lstm.py From user-behavior-anomaly-detector with MIT License

5 votes

def calculate_score(self, real, predicted, score):
        rmse = math.sqrt(mean_squared_error(real, predicted))
        # total = helpers.sigmoid(1/rmse * scores[i]) * 100
        total = round(rmse * score / 100, 2)
        return total

Source File: train.py From KDD2018_MPCN with GNU General Public License v3.0

5 votes

def _majority_baseline(self, labels):
        print("============================================")
        print("Running Majority Baseline...")
        _stat_pred = [abs(math.floor(x)) for x in labels]
        count = Counter(_stat_pred)
        print(count)
        max_class = count.most_common(5)[0][0]
        _majority = [float(max_class) for i in range(len(labels))]
        print('MSE={}'.format(mean_squared_error(_majority, labels)))
        print("============================================")

Python sklearn.metrics.mean_squared_error() Examples