Python Examples of sklearn.cross_validation.cross_val

Source File: analysis.py From smallrnaseq with GNU General Public License v3.0

7 votes

def classify(X, y, cl, name=''):
    """Classification using gene features"""

    from sklearn.metrics import classification_report, accuracy_score
    np.random.seed()
    ind = np.random.permutation(len(X))

    from sklearn.cross_validation import train_test_split
    Xtrain, Xtest, ytrain, ytest  = train_test_split(X, y, test_size=0.4)
    #print X
    cl.fit(Xtrain, ytrain)
    ypred = cl.predict(Xtest)

    print (classification_report(ytest, ypred))
    #print accuracy_score(ytest, ypred)
    from sklearn import cross_validation
    yl = pd.Categorical(y).labels
    sc = cross_validation.cross_val_score(cl, X, yl, scoring='roc_auc', cv=5)
    print("AUC: %0.2f (+/- %0.2f)" % (sc.mean(), sc.std() * 2))
    return cl

Source File: test_cross_validation.py From twitter-stock-recommendation with MIT License

6 votes

def test_cross_val_score_multilabel():
    X = np.array([[-3, 4], [2, 4], [3, 3], [0, 2], [-3, 1],
                  [-2, 1], [0, 0], [-2, -1], [-1, -2], [1, -2]])
    y = np.array([[1, 1], [0, 1], [0, 1], [0, 1], [1, 1],
                  [0, 1], [1, 0], [1, 1], [1, 0], [0, 0]])
    clf = KNeighborsClassifier(n_neighbors=1)
    scoring_micro = make_scorer(precision_score, average='micro')
    scoring_macro = make_scorer(precision_score, average='macro')
    scoring_samples = make_scorer(precision_score, average='samples')
    score_micro = cval.cross_val_score(clf, X, y, scoring=scoring_micro, cv=5)
    score_macro = cval.cross_val_score(clf, X, y, scoring=scoring_macro, cv=5)
    score_samples = cval.cross_val_score(clf, X, y,
                                         scoring=scoring_samples, cv=5)
    assert_almost_equal(score_micro, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 3])
    assert_almost_equal(score_macro, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 4])
    assert_almost_equal(score_samples, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 4])

Source File: test_cross_validation.py From twitter-stock-recommendation with MIT License

6 votes

def test_cross_val_score_with_score_func_regression():
    X, y = make_regression(n_samples=30, n_features=20, n_informative=5,
                           random_state=0)
    reg = Ridge()

    # Default score of the Ridge regression estimator
    scores = cval.cross_val_score(reg, X, y, cv=5)
    assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # R2 score (aka. determination coefficient) - should be the
    # same as the default estimator score
    r2_scores = cval.cross_val_score(reg, X, y, scoring="r2", cv=5)
    assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # Mean squared error; this is a loss function, so "scores" are negative
    neg_mse_scores = cval.cross_val_score(reg, X, y, cv=5,
                                          scoring="neg_mean_squared_error")
    expected_neg_mse = np.array([-763.07, -553.16, -274.38, -273.26, -1681.99])
    assert_array_almost_equal(neg_mse_scores, expected_neg_mse, 2)

    # Explained variance
    scoring = make_scorer(explained_variance_score)
    ev_scores = cval.cross_val_score(reg, X, y, cv=5, scoring=scoring)
    assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

Source File: test_cross_validation.py From twitter-stock-recommendation with MIT License

6 votes

def test_cross_val_score_with_score_func_classification():
    iris = load_iris()
    clf = SVC(kernel='linear')

    # Default score (should be the accuracy score)
    scores = cval.cross_val_score(clf, iris.data, iris.target, cv=5)
    assert_array_almost_equal(scores, [0.97, 1., 0.97, 0.97, 1.], 2)

    # Correct classification score (aka. zero / one score) - should be the
    # same as the default estimator score
    zo_scores = cval.cross_val_score(clf, iris.data, iris.target,
                                     scoring="accuracy", cv=5)
    assert_array_almost_equal(zo_scores, [0.97, 1., 0.97, 0.97, 1.], 2)

    # F1 score (class are balanced so f1_score should be equal to zero/one
    # score
    f1_scores = cval.cross_val_score(clf, iris.data, iris.target,
                                     scoring="f1_weighted", cv=5)
    assert_array_almost_equal(f1_scores, [0.97, 1., 0.97, 0.97, 1.], 2)

Source File: code.py From The_Ultimate_Student_Hunt with MIT License

6 votes

def run_model(model,dtrain,predictor_var,target,scoring_method='mean_squared_error'):
    cv_method = KFold(len(dtrain),5)
    cv_scores = cross_val_score(model,dtrain[predictor_var],dtrain[target],cv=cv_method,scoring=scoring_method)
    #print cv_scores, np.mean(cv_scores), np.sqrt((-1)*np.mean(cv_scores))
    
    dtrain_for_val = dtrain[dtrain['Year']<2000]
    dtest_for_val = dtrain[dtrain['Year']>1999]
    #cv_method = KFold(len(dtrain_for_val),5)
    #cv_scores_2 = cross_val_score(model,dtrain_for_val[predictor_var],dtrain_for_val[target],cv=cv_method,scoring=scoring_method)
    #print cv_scores_2, np.mean(cv_scores_2)
    
    dtrain_for_val_ini = dtrain_for_val[predictor_var]
    dtest_for_val_ini = dtest_for_val[predictor_var]
    model.fit(dtrain_for_val_ini,dtrain_for_val[target])
    pred_for_val = model.predict(dtest_for_val_ini)
        
    #print math.sqrt(mean_squared_error(dtest_for_val['Footfall'],pred_for_val))

Source File: test_cross_validation.py From twitter-stock-recommendation with MIT License

6 votes

def test_cross_val_score_precomputed():
    # test for svm with precomputed kernel
    svm = SVC(kernel="precomputed")
    iris = load_iris()
    X, y = iris.data, iris.target
    linear_kernel = np.dot(X, X.T)
    score_precomputed = cval.cross_val_score(svm, linear_kernel, y)
    svm = SVC(kernel="linear")
    score_linear = cval.cross_val_score(svm, X, y)
    assert_array_equal(score_precomputed, score_linear)

    # Error raised for non-square X
    svm = SVC(kernel="precomputed")
    assert_raises(ValueError, cval.cross_val_score, svm, X, y)

    # test error is raised when the precomputed kernel is not array-like
    # or sparse
    assert_raises(ValueError, cval.cross_val_score, svm,
                  linear_kernel.tolist(), y)

Source File: test_cross_validation.py From twitter-stock-recommendation with MIT License

6 votes

def test_cross_val_score_mask():
    # test that cross_val_score works with boolean masks
    svm = SVC(kernel="linear")
    iris = load_iris()
    X, y = iris.data, iris.target
    cv_indices = cval.KFold(len(y), 5)
    scores_indices = cval.cross_val_score(svm, X, y, cv=cv_indices)
    cv_indices = cval.KFold(len(y), 5)
    cv_masks = []
    for train, test in cv_indices:
        mask_train = np.zeros(len(y), dtype=np.bool)
        mask_test = np.zeros(len(y), dtype=np.bool)
        mask_train[train] = 1
        mask_test[test] = 1
        cv_masks.append((train, test))
    scores_masks = cval.cross_val_score(svm, X, y, cv=cv_masks)
    assert_array_equal(scores_indices, scores_masks)

Source File: OutPutRes.py From ProFET with GNU General Public License v3.0

5 votes

def CV_multi_stats(X, y, model,n=6) :
    '''
    http://scikit-learn.org/stable/modules/model_evaluation.html#classification-metrics
    This version uses multiclass (or multilabel) compatible metrics.

    May be expanded to use the cross_val_score helper function:
    http://scikit-learn.org/stable/modules/generated/sklearn.cross_validation.cross_val_score.html
    http://scikit-learn.org/stable/modules/cross_validation.html#computing-cross-validated-metrics
    '''

    scores = cross_val_score(estimator=model, X=X, y=y, cv=StratifiedShuffleSplit(y, n_iter=n, test_size=0.16), n_jobs=-1) #Accuracy
    scores_f1 = cross_val_score(estimator=model, X=X, y=y, cv=StratifiedShuffleSplit(y, n_iter=n, test_size=0.16), n_jobs=-1, scoring='f1')
    print("Model Accuracy: %0.3f (+- %0.2f)" % (scores.mean(), scores.std() * 2))
    print("Model f1: %0.3f (+- %0.2f)" % (scores_f1.mean(), scores_f1.std() * 2))
    return (scores.mean(), scores.std() ,scores_f1.mean(), scores_f1.std() ) #Removed * 2 from returned STD .. ?

Source File: OutPutRes.py From ProFET with GNU General Public License v3.0

5 votes

def PlotPerfPercentFeatures(X,y,est=LinearSVC()):
    '''
    Performance of a classifier (default: SVM-Anova)
    varying the percentile of features selected (F-test) .

    http://scikit-learn.org/stable/auto_examples/svm/plot_svm_anova.html#example-svm-plot-svm-anova-py
    '''
    transform = SelectPercentile(f_classif)

    clf = Pipeline([('anova', transform), ('est', est)])
    ###############################################################################
    # Plot the cross-validation score as a function of percentile of features
    score_means = list()
    score_stds = list()
    percentiles = (1,2,3,5,7,10,13,15,20,25,33,50,65,75,90, 100)
    # percentiles = (1,5,10,25,50,75,90)

    for percentile in percentiles:
        # print(percentile)
        clf.set_params(anova__percentile=percentile)
        this_scores = cross_val_score(clf, X, y,cv=StratifiedShuffleSplit(y, n_iter=7, test_size=0.3), n_jobs=-1)
        score_means.append(this_scores.mean())
        score_stds.append(this_scores.std())
    print("Outputting Graph:")

    plt.errorbar(percentiles, score_means, np.array(score_stds))

    plt.title(
        'Predictor Performance, varying percent of features used')
    plt.xlabel('Percentile')
    plt.ylabel('Prediction Performance')
    plt.axis('tight')
    plt.show()

Source File: VisualizeBestFeatures.py From ProFET with GNU General Public License v3.0

5 votes

def PlotPerfPercentFeatures(X,y,est=LinearSVC()):
    '''
    Display performance of a classifier (default: SVM),
    varying the percentile of features retained (F-test) .

    http://scikit-learn.org/stable/auto_examples/svm/plot_svm_anova.html#example-svm-plot-svm-anova-py
    '''
    transform = SelectPercentile(f_classif)

    clf = Pipeline([('anova', transform), ('est', est)])
    ###############################################################################
    # Plot the cross-validation score as a function of percentile of features
    score_means = list()
    score_stds = list()
    percentiles = (1,2,3,5,7,10,15,20,25,33,50,66,75,90, 100)
    # percentiles = (1,5,10,25,50,75,90)

    for percentile in percentiles:
        # print(percentile)
        clf.set_params(anova__percentile=percentile)
        this_scores = cross_val_score(clf, X, y,cv=StratifiedShuffleSplit(y, n_iter=5, test_size=0.4), n_jobs=-1)
        score_means.append(this_scores.mean())
        score_stds.append(this_scores.std())
    print("Outputting Graph:")

    plt.errorbar(percentiles, score_means, np.array(score_stds))

    plt.title(
        'Predictor Performance, varying percent of features used')
    plt.xlabel('Percentile')
    plt.ylabel('Prediction Performance')
    plt.axis('tight')
    plt.show()

Source File: Model_Parameters_CV.py From ProFET with GNU General Public License v3.0

5 votes

def plot_BestKFeatures (X_train, y_train):
    '''
    http://nbviewer.ipython.org/github/gmonce/scikit-learn-book/blob/master/Chapter%204%20-%20Advanced%20Features%20-%20Feature%20Engineering%20and%20Selection.ipynb
    Find the best percentile of features to use,
    using cross-validation on the training set and get K best feats
    '''
    from sklearn import cross_validation
    from sklearn import feature_selection
    from sklearn import tree
    dt = tree.DecisionTreeClassifier(criterion='entropy')
    dt = RandomForestClassifier(n_jobs=2, bootstrap=True, n_estimators=250, criterion='gini')
    dt = dt.fit(X_train, y_train)

    percentiles = range(1, 95, 5)
    results = []
    for i in range(1, 95, 5):
        fs = feature_selection.SelectPercentile(feature_selection.chi2, percentile=i) #Original
        fs = feature_selection.SelectPercentile(feature_selection.f_classif, percentile=i) # alt
        X_train_fs = fs.fit_transform(X_train, y_train)
        scores = cross_validation.cross_val_score(dt, X_train_fs, y_train, cv=4)
        #print i,scores.mean()
        results = np.append(results, scores.mean())

    optimal_percentil = np.where(results == results.max())[0]
    print (("Optimal number of features:{0}".format(percentiles[optimal_percentil])), "\n")

    # Plot number of features VS. cross-validation scores
    import pylab as pl
    import matplotlib.pylab as pl
    pl.figure()
    pl.xlabel("Number of features selected")
    pl.ylabel("Cross validation accuracy)")
    pl.plot(percentiles,results)
    print ("Mean scores:",results)
    return

Source File: simulation.py From jstsp2015 with MIT License

5 votes

def compute_svm_score(K, y, n_folds, scoring='accuracy', random_state=0):
    cv = StratifiedKFold(y, n_folds=n_folds, shuffle=True,
                         random_state=random_state)
    clf = SVC(C=1.0, kernel='precomputed')
    scores = cross_val_score(clf, K, y, scoring=scoring, cv=cv, n_jobs=1)
    score = scores.mean()
    return score

Source File: classif_and_ktst.py From jstsp2015 with MIT License

5 votes

def compute_svm_cv(K, y, C=100.0, n_folds=5,
                   scoring=balanced_accuracy_scoring):
    """Compute cross-validated score of SVM with given precomputed kernel.
    """
    cv = StratifiedKFold(y, n_folds=n_folds)
    clf = SVC(C=C, kernel='precomputed', class_weight='auto')
    scores = cross_val_score(clf, K, y,
                             scoring=scoring, cv=cv)
    return scores.mean()

Source File: test_cross_validation.py From twitter-stock-recommendation with MIT License

5 votes

def test_cross_val_score():
    clf = MockClassifier()
    for a in range(-10, 10):
        clf.a = a
        # Smoke test
        scores = cval.cross_val_score(clf, X, y)
        assert_array_equal(scores, clf.score(X, y))

        # test with multioutput y
        scores = cval.cross_val_score(clf, X_sparse, X)
        assert_array_equal(scores, clf.score(X_sparse, X))

        scores = cval.cross_val_score(clf, X_sparse, y)
        assert_array_equal(scores, clf.score(X_sparse, y))

        # test with multioutput y
        scores = cval.cross_val_score(clf, X_sparse, X)
        assert_array_equal(scores, clf.score(X_sparse, X))

    # test with X and y as list
    list_check = lambda x: isinstance(x, list)
    clf = CheckingClassifier(check_X=list_check)
    scores = cval.cross_val_score(clf, X.tolist(), y.tolist())

    clf = CheckingClassifier(check_y=list_check)
    scores = cval.cross_val_score(clf, X, y.tolist())

    assert_raises(ValueError, cval.cross_val_score, clf, X, y,
                  scoring="sklearn")

    # test with 3d X and
    X_3d = X[:, :, np.newaxis]
    clf = MockClassifier(allow_nd=True)
    scores = cval.cross_val_score(clf, X_3d, y)

    clf = MockClassifier(allow_nd=False)
    assert_raises(ValueError, cval.cross_val_score, clf, X_3d, y)

Source File: test_cross_validation.py From twitter-stock-recommendation with MIT License

5 votes

def test_cross_val_score_pandas():
    # check cross_val_score doesn't destroy pandas dataframe
    types = [(MockDataFrame, MockDataFrame)]
    try:
        from pandas import Series, DataFrame
        types.append((Series, DataFrame))
    except ImportError:
        pass
    for TargetType, InputFeatureType in types:
        # X dataframe, y series
        X_df, y_ser = InputFeatureType(X), TargetType(y)
        check_df = lambda x: isinstance(x, InputFeatureType)
        check_series = lambda x: isinstance(x, TargetType)
        clf = CheckingClassifier(check_X=check_df, check_y=check_series)
        cval.cross_val_score(clf, X_df, y_ser)

Source File: test_cross_validation.py From twitter-stock-recommendation with MIT License

5 votes

def test_cross_val_score_score_func():
    clf = MockClassifier()
    _score_func_args = []

    def score_func(y_test, y_predict):
        _score_func_args.append((y_test, y_predict))
        return 1.0

    with warnings.catch_warnings(record=True):
        scoring = make_scorer(score_func)
        score = cval.cross_val_score(clf, X, y, scoring=scoring)
    assert_array_equal(score, [1.0, 1.0, 1.0])
    assert len(_score_func_args) == 3

Source File: test_cross_validation.py From twitter-stock-recommendation with MIT License

5 votes

def test_cross_val_score_errors():
    class BrokenEstimator:
        pass

    assert_raises(TypeError, cval.cross_val_score, BrokenEstimator(), X)

Source File: test_cross_validation.py From twitter-stock-recommendation with MIT License

5 votes

def train_test_split_pandas():
    # check cross_val_score doesn't destroy pandas dataframe
    types = [MockDataFrame]
    try:
        from pandas import DataFrame
        types.append(DataFrame)
    except ImportError:
        pass
    for InputFeatureType in types:
        # X dataframe
        X_df = InputFeatureType(X)
        X_train, X_test = cval.train_test_split(X_df)
        assert_true(isinstance(X_train, InputFeatureType))
        assert_true(isinstance(X_test, InputFeatureType))

Source File: test_cross_validation.py From twitter-stock-recommendation with MIT License

5 votes

def test_cross_val_predict_pandas():
    # check cross_val_score doesn't destroy pandas dataframe
    types = [(MockDataFrame, MockDataFrame)]
    try:
        from pandas import Series, DataFrame
        types.append((Series, DataFrame))
    except ImportError:
        pass
    for TargetType, InputFeatureType in types:
        # X dataframe, y series
        X_df, y_ser = InputFeatureType(X), TargetType(y)
        check_df = lambda x: isinstance(x, InputFeatureType)
        check_series = lambda x: isinstance(x, TargetType)
        clf = CheckingClassifier(check_X=check_df, check_y=check_series)
        cval.cross_val_predict(clf, X_df, y_ser)

Source File: test_cross_validation.py From twitter-stock-recommendation with MIT License

5 votes

def test_sparse_fit_params():
    iris = load_iris()
    X, y = iris.data, iris.target
    clf = MockClassifier()
    fit_params = {'sparse_sample_weight': coo_matrix(np.eye(X.shape[0]))}
    a = cval.cross_val_score(clf, X, y, fit_params=fit_params)
    assert_array_equal(a, np.ones(3))

Source File: solution.py From Kaggle with MIT License

5 votes

def optimize_logisticRegression():
    train_data = pd.read_csv(r"data/train.csv")
    print u"数据信息：\n",train_data.info()
    print u'数据描述：\n',train_data.describe()  
    #display_data(train_data)  # 简单显示数据信息
    #display_with_process(train_data) # 根据数据的理解，简单处理一下数据显示,验证猜想
    process_data = fe_preprocessData(train_data,'process_train_data')  # 数据预处理，要训练的数据
    train_data = process_data.filter(regex='Survived|Age|SibSp|Parch|Fare|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')  # 使用正则抽取想要的数据
    train_np = train_data.as_matrix()  # 转为矩阵
    '''训练model'''
    X = train_np[:,1:]
    y = train_np[:,0]
    #=X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
    #=model = linear_model.LogisticRegression(C=1.0,tol=1e-6).fit(X_train,y_train)
    model = linear_model.LogisticRegression(C=1.0,tol=1e-6).fit(X,y)
    print pd.DataFrame({"columns":list(train_data.columns)[1:],"coef_":list(model.coef_.T)})

    '''测试集上预测'''
    test_data = pd.read_csv(r"data/test.csv")
    process_test_data = fe_preprocessData(test_data,'process_test_data')  # 预处理数据
    test_data = process_test_data.filter(regex='Age|SibSp|Parch|Fare|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
    test_np = test_data.as_matrix()
    predict = model.predict(test_np)
    result = pd.DataFrame(data={'PassengerId':process_test_data['PassengerId'].as_matrix(),'Survived':predict.astype(np.int32)})
    result.to_csv(r'optimize_logisticRegression_result/prediction.csv',index=False)
    #clf = linear_model.LogisticRegression(C=1.0,tol=1e-6)
    #print cross_validation.cross_val_score(clf, X,y,cv=5)    
## 两项映射为多项式

Source File: ml.py From EDeN with MIT License

5 votes

def estimate_model(positive_data_matrix=None,
                   negative_data_matrix=None,
                   target=None,
                   estimator=None,
                   n_jobs=4):
    """estimate_model."""
    X, y = make_data_matrix(positive_data_matrix=positive_data_matrix,
                            negative_data_matrix=negative_data_matrix,
                            target=target)
    logger.info('Test set')
    logger.info(describe(X))
    logger.info('-' * 80)
    logger.info('Test Estimate')
    predictions = estimator.predict(X)
    margins = estimator.decision_function(X)
    logger.info(classification_report(y, predictions))
    apr = average_precision_score(y, margins)
    logger.info('APR: %.3f' % apr)
    roc = roc_auc_score(y, margins)
    logger.info('ROC: %.3f' % roc)

    logger.info('Cross-validated estimate')
    scoring_strings = ['accuracy', 'precision', 'recall', 'f1',
                       'average_precision', 'roc_auc']
    for scoring in scoring_strings:
        scores = cross_validation.cross_val_score(
            estimator, X, y, cv=5,
            scoring=scoring, n_jobs=n_jobs)
        logger.info('%20s: %.3f +- %.3f' % (scoring,
                                            np.mean(scores),
                                            np.std(scores)))

    return roc, apr

Source File: Train Classifier and Test Video Feed.py From Emotion-Recognition-Using-SVMs with MIT License

5 votes

def evaluate_cross_validation(clf, X, y, K):
    # create a k-fold cross validation iterator
    cv = KFold(len(y), K, shuffle=True, random_state=0)
    # by default the score used is the one returned by score method of the estimator (accuracy)
    scores = cross_val_score(clf, X, y, cv=cv)
    print "Scores: ", (scores)
    print ("Mean score: {0:.3f} (+/-{1:.3f})".format(np.mean(scores), sem(scores)))


# Confusion Matrix and Results

Source File: titanic.py From MachineLearning with Apache License 2.0

5 votes

def compute_score(clf, X, y,scoring='accuracy'):
    xval = cross_val_score(clf, X, y, cv = 5,scoring=scoring)
    return np.mean(xval)

Source File: image-classification.py From Building-Machine-Learning-Systems-With-Python-Second-Edition with MIT License

5 votes

def accuracy(features, labels):
    from sklearn.linear_model import LogisticRegression
    from sklearn.pipeline import Pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn import cross_validation
    # We use logistic regression because it is very fast.
    # Feel free to experiment with other classifiers
    clf = Pipeline([('preproc', StandardScaler()),
                ('classifier', LogisticRegression())])
    cv = cross_validation.LeaveOneOut(len(features))
    scores = cross_validation.cross_val_score(
        clf, features, labels, cv=cv)
    return scores.mean()

Source File: rank_tags.py From TGIF-Release with BSD 3-Clause "New" or "Revised" License

5 votes

def stump(X, y):
    score = cross_val_score(LinearSVC(), X, y, cv = 5, n_jobs=5, scoring = 'average_precision')
    clf = LinearSVC()
    clf.fit(X, y)
    coef = clf.coef_[0,0]
    inter = clf.intercept_[0]
    return np.mean(score), np.sign(coef), inter / np.abs(coef)

Source File: sandpit.py From automl-phase-2 with MIT License

5 votes

def _f(x):
        # iris = load_iris()
        X, y = X, y = make_hastie_10_2(random_state=0)
        x = np.ravel(x)
        f = np.zeros(x.shape)
        for i in range(f.size):
            clf = RandomForestClassifier(n_estimators=1, min_samples_leaf=int(np.round(x[i])), random_state=0)
            # scores = cross_val_score(clf, iris.data, iris.target)
            scores = cross_val_score(clf, X, y, cv=5)
            f[i] = -scores.mean()
        return f.ravel()

Source File: scorer.py From scan with GNU Affero General Public License v3.0

5 votes

def train(self):
        feats = self.get_features()
        scores = np.array(self.scores)

        # Compute error metrics for the estimator.
        self.cv_scores = cross_validation.cross_val_score(self.classifier, feats, scores)
        self.cv_score = self.cv_scores.mean()
        self.cv_dev = self.cv_scores.std()

        self.classifier.fit(feats, scores)
        self.fit_done = True

Source File: sklearnbasemodel.py From Supply-demand-forecasting with MIT License

5 votes

def run_croos_validation(self):
        features,labels,cv = self.getFeaturesLabel()
        scores = cross_validation.cross_val_score(self.clf, features, labels, cv=cv, scoring=mean_absolute_percentage_error_scoring, n_jobs = -1)
        print "cross validation scores: means, {}, std, {}, details,{}".format(np.absolute(scores.mean()), scores.std(), np.absolute(scores))
        return -np.absolute(scores.mean())

Source File: solution.py From Kaggle with MIT License

5 votes

def baseline_logisticRegression():
    train_data = pd.read_csv(r"data/train.csv")
    #print u"数据信息：\n",train_data.info()
    #print u'数据描述：\n',train_data.describe()  
    #display_data(train_data)  # 简单显示数据信息
    #display_with_process(train_data) # 根据数据的理解，简单处理一下数据显示,验证猜想
    process_data = pre_processData(train_data,'process_train_data')  # 数据预处理，要训练的数据
    train_data = process_data.filter(regex='Survived|Age|SibSp|Parch|Fare|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')  # 使用正则抽取想要的数据
    train_np = train_data.as_matrix()  # 转为矩阵
    '''训练model'''
    X = train_np[:,1:]
    y = train_np[:,0]
    #=X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
    #=model = linear_model.LogisticRegression(C=1.0,tol=1e-6).fit(X_train,y_train)
    model = linear_model.LogisticRegression(C=1.0,tol=1e-6).fit(X,y)
    print pd.DataFrame({"columns":list(train_data.columns)[1:],"coef_":list(model.coef_.T)})
    #=prediction = model.predict(X_test)
    #=cv_error = pd.DataFrame(data=list(X_test[np.where(prediction!=y_test)]),columns=list(train_data.columns)[1:])
    #=cv_error.to_csv(r'error.csv',index=True)
    #=print np.float32(np.sum(prediction == y_test))/np.float32(prediction.shape[0])
    
    '''测试集上预测'''
    test_data = pd.read_csv(r"data/test.csv")
    process_test_data = pre_processData(test_data,'process_test_data')  # 预处理数据
    test_data = process_test_data.filter(regex='Age|SibSp|Parch|Fare|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
    test_np = test_data.as_matrix()
    predict = model.predict(test_np)
    result = pd.DataFrame(data={'PassengerId':process_test_data['PassengerId'].as_matrix(),'Survived':predict.astype(np.int32)})
    result.to_csv(r'baseline_logisticRegression_result/prediction.csv',index=False)
    #clf = linear_model.LogisticRegression(C=1.0,tol=1e-6)
    #print cross_validation.cross_val_score(clf, X,y,cv=5)


# baseline：SVM模型——0.78947

Python sklearn.cross_validation.cross_val_score() Examples