Python Examples of sklearn.metrics.r2

Source File: vanilla_model.py From OpenChem with MIT License

10 votes

def fit_model(self, data, cross_val_data, cross_val_labels):
        eval_metrics = []
        for i in range(self.n_ensemble):
            train_sm = np.concatenate(cross_val_data[:i] +
                                      cross_val_data[(i + 1):])
            test_sm = cross_val_data[i]
            train_labels = np.concatenate(cross_val_labels[:i] +
                                          cross_val_labels[(i + 1):])
            test_labels = cross_val_labels[i]
            fp_train = get_fp(train_sm)
            fp_test = get_fp(test_sm)
            self.model[i].fit(fp_train, train_labels.ravel())
            predicted = self.model[i].predict(fp_test)
            if self.model_type == 'classifier':
                fpr, tpr, thresholds = metrics.roc_curve(test_labels, predicted)
                eval_metrics.append(metrics.auc(fpr, tpr))
                metrics_type = 'AUC'
            elif self.model_type == 'regressor':
                r2 = metrics.r2_score(test_labels, predicted)
                eval_metrics.append(r2)
                metrics_type = 'R^2 score'
        return eval_metrics, metrics_type

Source File: GC_script.py From ClimateVegetationDynamics_GrangerCausality with GNU General Public License v3.0

8 votes

def crossValidation(X, y, cvFolds, estimator):
    r2 = np.zeros((cvFolds,1))   
    kf = KFold(len(X), n_folds=cvFolds, shuffle=True, random_state = 30)
    cv_j=0    
    for train_index, test_index in kf:
        train_X = X[train_index,:]
        test_X = X[test_index,:]
        train_y = y[train_index]
        test_y = y[test_index]
        est.fit(train_X,train_y)
        y_true, y_pred = test_y,est.predict(test_X)
        r2[cv_j] = r2_score(y_true, y_pred) 
        cv_j = cv_j + 1
    return r2
    
#parameters: 'X' the predictors, 'y' the target, 'cvFolds' number of folds, 'estimator' machine learning algorithm 
#returns: the R squared for each fold

Source File: regression.py From KATE with BSD 3-Clause "New" or "Revised" License

6 votes

def neural_regression(X_train, Y_train, X_val, Y_val, X_test, Y_test, nb_epoch=200, batch_size=10, seed=7):
    reg = neural_network(X_train.shape[1])
    reg.fit(X_train, Y_train,
                        nb_epoch=nb_epoch,
                        batch_size=batch_size,
                        shuffle=True,
                        validation_data=(X_val, Y_val),
                        callbacks=[
                                    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.01),
                                    EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=5, verbose=0, mode='auto'),
                        ]
                        )
    pred = reg.predict(X_test)
    pred = np.reshape(pred, pred.shape[0])
    r2 = r2_score(Y_test, pred)

    return r2

Source File: evaluation.py From few with GNU General Public License v3.0

6 votes

def r2_score_vec(y_true,y_pred):
    """ returns non-aggregate version of r2 score.

    based on r2_score() function from sklearn (http://sklearn.org)
    """

    numerator = (y_true - y_pred) ** 2
    denominator = (y_true - np.average(y_true)) ** 2

    nonzero_denominator = denominator != 0
    nonzero_numerator = numerator != 0
    valid_score = nonzero_denominator & nonzero_numerator
    output_scores = np.ones([y_true.shape[0]])
    output_scores[valid_score] = 1 - (numerator[valid_score] /
                                      denominator[valid_score])
    # arbitrary set to zero to avoid -inf scores, having a constant
    # y_true is not interesting for scoring a regression anyway
    output_scores[nonzero_numerator & ~nonzero_denominator] = 0.

    return output_scores

Source File: test.py From TrafficFlowPrediction with MIT License

6 votes

def eva_regress(y_true, y_pred):
    """Evaluation
    evaluate the predicted resul.

    # Arguments
        y_true: List/ndarray, ture data.
        y_pred: List/ndarray, predicted data.
    """

    mape = MAPE(y_true, y_pred)
    vs = metrics.explained_variance_score(y_true, y_pred)
    mae = metrics.mean_absolute_error(y_true, y_pred)
    mse = metrics.mean_squared_error(y_true, y_pred)
    r2 = metrics.r2_score(y_true, y_pred)
    print('explained_variance_score:%f' % vs)
    print('mape:%f%%' % mape)
    print('mae:%f' % mae)
    print('mse:%f' % mse)
    print('rmse:%f' % math.sqrt(mse))
    print('r2:%f' % r2)

Source File: accuracy.py From basenji with Apache License 2.0

6 votes

def r2(self, log=False, pseudocount=1, clip=None):
    """ Compute target R2 vector. """
    r2_vec = np.zeros(self.num_targets)

    for ti in range(self.num_targets):
      if self.targets_na is not None:
        preds_ti = self.preds[~self.targets_na, ti].astype('float64')
        targets_ti = self.targets[~self.targets_na, ti].astype('float64')
      else:
        preds_ti = self.preds[:, :, ti].flatten().astype('float64')
        targets_ti = self.targets[:, :, ti].flatten().astype('float64')

      if clip is not None:
        preds_ti = np.clip(preds_ti, 0, clip)
        targets_ti = np.clip(targets_ti, 0, clip)

      if log:
        preds_ti = np.log2(preds_ti + pseudocount)
        targets_ti = np.log2(targets_ti + pseudocount)

      r2_vec[ti] = metrics.r2_score(targets_ti, preds_ti)

    return r2_vec

Source File: score_dataset.py From snape with Apache License 2.0

6 votes

def score_regression(y, y_hat, report=True):
    """
    Create regression score
    :param y:
    :param y_hat:
    :return:
    """
    r2 = r2_score(y, y_hat)
    rmse = sqrt(mean_squared_error(y, y_hat))
    mae = mean_absolute_error(y, y_hat)

    report_string = "---Regression Score--- \n"
    report_string += "R2 = " + str(r2) + "\n"
    report_string += "RMSE = " + str(rmse) + "\n"
    report_string += "MAE = " + str(mae) + "\n"

    if report:
        print(report_string)

    return mae, report_string

Source File: test_scoring.py From oddt with BSD 3-Clause "New" or "Revised" License

6 votes

def test_ensemble_model():
    X = np.vstack((np.arange(30, 10, -2, dtype='float64'),
                   np.arange(100, 90, -1, dtype='float64'))).T

    Y = np.arange(10, dtype='float64')

    rf = regressors.randomforest(random_state=42)
    nn = regressors.neuralnetwork(solver='lbfgs', random_state=42)
    ensemble = ensemble_model((rf, nn))

    # we do not need to fit underlying models, they change when we fit enseble
    ensemble.fit(X, Y)

    pred = ensemble.predict(X)
    mean_pred = np.vstack((rf.predict(X), nn.predict(X))).mean(axis=0)
    assert_array_almost_equal(pred, mean_pred)
    assert_almost_equal(ensemble.score(X, Y), r2_score(Y, pred))

    # ensemble of a single model should behave exactly like this model
    nn = neuralnetwork(solver='lbfgs', random_state=42)
    ensemble = ensemble_model((nn,))
    ensemble.fit(X, Y)
    assert_array_almost_equal(ensemble.predict(X), nn.predict(X))
    assert_almost_equal(ensemble.score(X, Y), nn.score(X, Y))

Source File: baselines.py From AirBnbPricePrediction with MIT License

6 votes

def print_evaluation_metrics(trained_model, trained_model_name, X_test, y_test):
    print('--------- For Model: ', trained_model_name, ' ---------\n')
    predicted_values = trained_model.predict(X_test)
    print("Mean absolute error: ",
          metrics.mean_absolute_error(y_test, predicted_values))
    print("Median absolute error: ",
          metrics.median_absolute_error(y_test, predicted_values))
    print("Mean squared error: ", metrics.mean_squared_error(
        y_test, predicted_values))
    print("R2: ", metrics.r2_score(y_test, predicted_values))
    plt.scatter(y_test, predicted_values, color='black')
    # plt.plot(x, y_pred, color='blue', linewidth=3)
    plt.title(trained_model_name)
    plt.xlabel('$y_{test}$')
    plt.ylabel('$y_{predicted}/y_{test}$')
    plt.savefig('%s.png' %trained_model_name, bbox_inches='tight')
    print("---------------------------------------\n")

Source File: test_few.py From few with GNU General Public License v3.0

6 votes

def test_few_fit_shapes():
    """test_few.py: fit and predict return correct shapes """
    np.random.seed(202)
    # load example data
    boston = load_boston()
    d = pd.DataFrame(data=boston.data)
    print("feature shape:",boston.data.shape)

    learner = FEW(generations=1, population_size=5,
                mutation_rate=0.2, crossover_rate=0.8,
                ml = LassoLarsCV(), min_depth = 1, max_depth = 3,
                sel = 'epsilon_lexicase', tourn_size = 2,
                random_state=0, verbosity=0,
                disable_update_check=False, fit_choice = 'mse')

    score = learner.fit(boston.data[:300], boston.target[:300])
    print("learner:",learner._best_estimator)
    yhat_test = learner.predict(boston.data[300:])
    test_score = learner.score(boston.data[300:],boston.target[300:])
    print("train score:",score,"test score:",test_score,
    "test r2:",r2_score(boston.target[300:],yhat_test))
    assert yhat_test.shape == boston.target[300:].shape

Source File: test_builder.py From gordo with GNU Affero General Public License v3.0

6 votes

def test_metrics_from_list():
    """
    Check getting functions from a list of metric names
    """
    default = ModelBuilder.metrics_from_list()
    assert default == [
        metrics.explained_variance_score,
        metrics.r2_score,
        metrics.mean_squared_error,
        metrics.mean_absolute_error,
    ]

    specifics = ModelBuilder.metrics_from_list(
        ["sklearn.metrics.adjusted_mutual_info_score", "sklearn.metrics.r2_score"]
    )
    assert specifics == [metrics.adjusted_mutual_info_score, metrics.r2_score]

Source File: deprecated.py From Neuraxle with Apache License 2.0

6 votes

def __init__(
            self,
            scoring_function=r2_score,
            k_fold=3,
            joiner=NumpyConcatenateOuterBatch(),
            cache_folder_when_no_handle=None,
            split_data_container_during_fit=True,
            predict_after_fit=True
    ):
        self.k_fold = k_fold
        BaseCrossValidationWrapper.__init__(
            self,
            scoring_function=scoring_function,
            joiner=joiner,
            cache_folder_when_no_handle=cache_folder_when_no_handle,
            split_data_container_during_fit=split_data_container_during_fit,
            predict_after_fit=predict_after_fit
        )

Source File: test_predictor.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_boston_dataset(max_bins):
    boston = load_boston()
    X_train, X_test, y_train, y_test = train_test_split(
        boston.data, boston.target, random_state=42)

    mapper = _BinMapper(max_bins=max_bins, random_state=42)
    X_train_binned = mapper.fit_transform(X_train)

    # Init gradients and hessians to that of least squares loss
    gradients = -y_train.astype(G_H_DTYPE)
    hessians = np.ones(1, dtype=G_H_DTYPE)

    min_samples_leaf = 8
    max_leaf_nodes = 31
    grower = TreeGrower(X_train_binned, gradients, hessians,
                        min_samples_leaf=min_samples_leaf,
                        max_leaf_nodes=max_leaf_nodes, max_bins=max_bins,
                        actual_n_bins=mapper.actual_n_bins_)
    grower.grow()

    predictor = grower.make_predictor(bin_thresholds=mapper.bin_thresholds_)

    assert r2_score(y_train, predictor.predict(X_train)) > 0.85
    assert r2_score(y_test, predictor.predict(X_test)) > 0.70

Source File: random.py From Neuraxle with Apache License 2.0

6 votes

def __init__(
            self,
            wrapped: BaseStep = None,
            test_size: float = 0.2,
            scoring_function=r2_score,
            run_validation_split_in_test_mode=True,
            cache_folder_when_no_handle=None
    ):
        """
        :param wrapped: wrapped step
        :param test_size: ratio for test size between 0 and 1
        :param scoring_function: scoring function with two arguments (y_true, y_pred)
        """
        BaseCrossValidationWrapper.__init__(self, wrapped=wrapped, cache_folder_when_no_handle=cache_folder_when_no_handle)

        self.run_validation_split_in_test_mode = run_validation_split_in_test_mode
        self.test_size = test_size
        self.scoring_function = scoring_function

Source File: test_regression.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_multioutput_regression():
    y_true = np.array([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]])
    y_pred = np.array([[0, 0, 0, 1], [1, 0, 1, 1], [0, 0, 0, 1]])

    error = mean_squared_error(y_true, y_pred)
    assert_almost_equal(error, (1. / 3 + 2. / 3 + 2. / 3) / 4.)

    error = mean_squared_log_error(y_true, y_pred)
    assert_almost_equal(error, 0.200, decimal=2)

    # mean_absolute_error and mean_squared_error are equal because
    # it is a binary problem.
    error = mean_absolute_error(y_true, y_pred)
    assert_almost_equal(error, (1. / 3 + 2. / 3 + 2. / 3) / 4.)

    error = r2_score(y_true, y_pred, multioutput='variance_weighted')
    assert_almost_equal(error, 1. - 5. / 2)
    error = r2_score(y_true, y_pred, multioutput='uniform_average')
    assert_almost_equal(error, -.875)

Source File: test_regression.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_regression_metrics_at_limits():
    assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_squared_log_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(max_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2)
    assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2)
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [-1.], [-1.])
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [1., 2., 3.], [1., -2., 3.])
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [1., -2., 3.], [1., 2., 3.])

Source File: perf_data.py From AMPL with MIT License

6 votes

def compute_perf_metrics(self, per_task=False):
        """Returns the R-squared metrics for each task or averaged over tasks based on the accumulated values
        
        Args:
            per_task (bool): True if calculating per-task metrics, False otherwise.
        
        Returns:
            A tuple (r2_score, std): 
                r2_score (np.array): An array of scores for each task, if per_task is True.
                Otherwise, it is a float containing the average R^2 score over tasks.

                std: Always None for this class.
        """
        r2_scores = self.perf_metrics[0]
        if per_task or self.num_tasks == 1:
            return (r2_scores, None)
        else:
            return (r2_scores.mean(), None)



# ****************************************************************************************

Source File: linear_regression.py From tick with BSD 3-Clause "New" or "Revised" License

6 votes

def score(self, X, y):
        """Returns the coefficient of determination R^2 of the fitted linear
        regression model, computed on the given features matrix and labels.

        Parameters
        ----------
        X : `np.ndarray` or `scipy.sparse.csr_matrix`, shape=(n_samples, n_features)
            Features matrix.

        y : `np.ndarray`, shape = (n_samples,)
            Labels vector.

        Returns
        -------
        score : `float`
            R^2 of self.predict(X) against y
        """
        from sklearn.metrics import r2_score
        return r2_score(y, self.predict(X))

Source File: GC_script.py From ClimateVegetationDynamics_GrangerCausality with GNU General Public License v3.0

6 votes

def nestedCrossValidation(X, y, cvFolds, estimator):  
    kf = KFold(len(X), n_folds=cvFolds, shuffle=True, random_state = 30)
    cv_j=0
    param_grid = {'alpha': [0.0000001,0.000001,0.00001,0.0001,0.001,0.01,0.1,1,10,100,1000,10000,100000, 1000000, 10000000,1000000000]}
    r2 = np.zeros((cvFolds,1))   
    for train_index, test_index in kf:
        train_X = X[train_index,:]
        test_X = X[test_index,:]
        train_y = y[train_index]
        test_y = y[test_index]
        grid = GridSearchCV(estimator, param_grid=param_grid, verbose=0, cv=cvFolds, scoring='mean_squared_error')
        grid.fit(train_X,train_y)
        y_true, y_pred = test_y,grid.best_estimator_.predict(test_X)
        r2[cv_j] = r2_score(y_true, y_pred) 
        cv_j = cv_j + 1 
    return r2
    
#%% main script

Source File: libscores.py From automl-phase-2 with MIT License

5 votes

def r2_score_(solution, prediction):
	return metrics.r2_score(solution, prediction)

Source File: test_group_sklearn_wrappers.py From fairlearn with MIT License

5 votes

def test_group_r2_score_multioutput():
    y_t = np.random.rand(len(groups), 2)
    y_p = np.random.rand(len(groups), 2)
    result = metrics.r2_score_group_summary(
        y_t, y_p, sensitive_features=groups, multioutput='raw_values')

    expected_overall = skm.r2_score(y_t, y_p, multioutput='raw_values')

    assert np.array_equal(result.overall, expected_overall)
    for target_group in np.unique(groups):
        mask = np.asarray(groups) == target_group
        expected = skm.r2_score(y_t[mask], y_p[mask], multioutput='raw_values')
        assert np.array_equal(result.by_group[target_group], expected)

# =============================================================================================

Source File: util.py From jh-kaggle-util with Apache License 2.0

5 votes

def model_score(y_pred,y_valid):
    final_eval = jhkaggle.jhkaggle_config['FINAL_EVAL']
    if final_eval == jhkaggle.const.EVAL_R2:
        return r2_score(y_valid, y_pred)
    elif final_eval == jhkaggle.const.EVAL_LOGLOSS:
        return log_loss(y_valid, y_pred)
    elif final_eval == jhkaggle.const.EVAL_AUC:
        fpr, tpr, thresholds = roc_curve(y_valid, y_pred, pos_label=1)
        return auc(fpr, tpr)
    else:
        raise Exception(f"Unknown FINAL_EVAL: {final_eval}")

Source File: evaluation.py From cddd with MIT License

5 votes

def qsar_regression(emb, groups, labels):
    """Helper function that fits and scores a SVM regressor on the extracted molecular
    descriptor in a leave-one-group-out cross-validation manner.

    Args:
        emb: Embedding (molecular descriptor) that is used as input for the SVM
        groups: Array or list with n_samples entries defining the fold membership for the
        crossvalidtion.
        labels: Target values of the of the qsar task.
    Returns:
        The mean accuracy, F1-score, ROC-AUC and prescion-recall-AUC of the cross-validation.
    """
    r2 = []
    r = []
    mse = []
    mae = []
    logo = LeaveOneGroupOut()
    clf = SVR(kernel='rbf', C=5.0)
    for train_index, test_index in logo.split(emb, groups=groups):
        clf.fit(emb[train_index], labels[train_index])
        y_pred = clf.predict(emb[test_index])
        y_true = labels[test_index]
        r2.append(r2_score(y_true, y_pred))
        r.append(spearmanr(y_true, y_pred)[0])
        mse.append(mean_squared_error(y_true, y_pred))
        mae.append(mean_absolute_error(y_true, y_pred))
    return np.mean(r2), np.mean(r), np.mean(mse), np.mean(mae)

Source File: datasets.py From MLBlocks with MIT License

5 votes

def load_boston():
    """Boston House Prices Dataset."""
    dataset = datasets.load_boston()
    return Dataset(load_boston.__doc__, dataset.data, dataset.target, r2_score)

Source File: test_metrics.py From hyperparameter_hunter with MIT License

5 votes

def metric_init_params_lookup():
    """Lookup dictionary for `Metric` initialization parameters used in test scenarios. Keys
    correspond to those in `metric_init_final_attributes_lookup`"""
    return dict(
        m_0=("roc_auc_score",),
        m_1=("roc_auc_score", roc_auc_score),
        m_2=("my_f1_score", "f1_score"),
        m_3=("hamming_loss", hamming_loss),
        m_4=("r2_score", r2_score, "min"),
        m_5=("my_r2_score", my_r2_score),
    )

Source File: pipeline_train.py From models with Apache License 2.0

5 votes

def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2

Source File: test_sklearn_metrics.py From poutyne with GNU Lesser General Public License v3.0

5 votes

def test_regression(self):
        self._test_regression(r2_score, True)
        self._test_regression(r2_score, False)

        self._test_regression(gini, True)
        self._test_regression(gini, False)

        two_skl_metrics = [r2_score, gini]
        self._test_regression(two_skl_metrics, True)
        self._test_regression(two_skl_metrics, False)

Source File: main.py From nni with MIT License

5 votes

def run(X_train, X_test, y_train, y_test, model):
    '''Train model and predict result'''
    model.fit(X_train, y_train)
    predict_y = model.predict(X_test)
    score = r2_score(y_test, predict_y)
    LOG.debug('r2 score: %s' % score)
    nni.report_final_result(score)

Source File: elasticnet_gboost.py From AmusingPythonCodes with MIT License

5 votes

def get_score(prediction, labels):
    print('R2: {}'.format(r2_score(prediction, labels)))
    print('RMSE: {}'.format(np.sqrt(mean_squared_error(prediction, labels))))


# Shows scores for train and validation sets

Source File: base.py From sktime with BSD 3-Clause "New" or "Revised" License

5 votes

def score(self, X_test, y_test):
        from sklearn.metrics import r2_score
        return r2_score(y_test, self.predict(X_test))

Python sklearn.metrics.r2_score() Examples