Python sklearn.cross_validation.cross_val_score() Examples

The following are 30 code examples of sklearn.cross_validation.cross_val_score(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.cross_validation , or try the search function .
Example #1
Source File: analysis.py    From smallrnaseq with GNU General Public License v3.0 7 votes vote down vote up
def classify(X, y, cl, name=''):
    """Classification using gene features"""

    from sklearn.metrics import classification_report, accuracy_score
    np.random.seed()
    ind = np.random.permutation(len(X))

    from sklearn.cross_validation import train_test_split
    Xtrain, Xtest, ytrain, ytest  = train_test_split(X, y, test_size=0.4)
    #print X
    cl.fit(Xtrain, ytrain)
    ypred = cl.predict(Xtest)

    print (classification_report(ytest, ypred))
    #print accuracy_score(ytest, ypred)
    from sklearn import cross_validation
    yl = pd.Categorical(y).labels
    sc = cross_validation.cross_val_score(cl, X, yl, scoring='roc_auc', cv=5)
    print("AUC: %0.2f (+/- %0.2f)" % (sc.mean(), sc.std() * 2))
    return cl 
Example #2
Source File: test_cross_validation.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_cross_val_score_multilabel():
    X = np.array([[-3, 4], [2, 4], [3, 3], [0, 2], [-3, 1],
                  [-2, 1], [0, 0], [-2, -1], [-1, -2], [1, -2]])
    y = np.array([[1, 1], [0, 1], [0, 1], [0, 1], [1, 1],
                  [0, 1], [1, 0], [1, 1], [1, 0], [0, 0]])
    clf = KNeighborsClassifier(n_neighbors=1)
    scoring_micro = make_scorer(precision_score, average='micro')
    scoring_macro = make_scorer(precision_score, average='macro')
    scoring_samples = make_scorer(precision_score, average='samples')
    score_micro = cval.cross_val_score(clf, X, y, scoring=scoring_micro, cv=5)
    score_macro = cval.cross_val_score(clf, X, y, scoring=scoring_macro, cv=5)
    score_samples = cval.cross_val_score(clf, X, y,
                                         scoring=scoring_samples, cv=5)
    assert_almost_equal(score_micro, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 3])
    assert_almost_equal(score_macro, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 4])
    assert_almost_equal(score_samples, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 4]) 
Example #3
Source File: test_cross_validation.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_cross_val_score_with_score_func_regression():
    X, y = make_regression(n_samples=30, n_features=20, n_informative=5,
                           random_state=0)
    reg = Ridge()

    # Default score of the Ridge regression estimator
    scores = cval.cross_val_score(reg, X, y, cv=5)
    assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # R2 score (aka. determination coefficient) - should be the
    # same as the default estimator score
    r2_scores = cval.cross_val_score(reg, X, y, scoring="r2", cv=5)
    assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # Mean squared error; this is a loss function, so "scores" are negative
    neg_mse_scores = cval.cross_val_score(reg, X, y, cv=5,
                                          scoring="neg_mean_squared_error")
    expected_neg_mse = np.array([-763.07, -553.16, -274.38, -273.26, -1681.99])
    assert_array_almost_equal(neg_mse_scores, expected_neg_mse, 2)

    # Explained variance
    scoring = make_scorer(explained_variance_score)
    ev_scores = cval.cross_val_score(reg, X, y, cv=5, scoring=scoring)
    assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) 
Example #4
Source File: test_cross_validation.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_cross_val_score_with_score_func_classification():
    iris = load_iris()
    clf = SVC(kernel='linear')

    # Default score (should be the accuracy score)
    scores = cval.cross_val_score(clf, iris.data, iris.target, cv=5)
    assert_array_almost_equal(scores, [0.97, 1., 0.97, 0.97, 1.], 2)

    # Correct classification score (aka. zero / one score) - should be the
    # same as the default estimator score
    zo_scores = cval.cross_val_score(clf, iris.data, iris.target,
                                     scoring="accuracy", cv=5)
    assert_array_almost_equal(zo_scores, [0.97, 1., 0.97, 0.97, 1.], 2)

    # F1 score (class are balanced so f1_score should be equal to zero/one
    # score
    f1_scores = cval.cross_val_score(clf, iris.data, iris.target,
                                     scoring="f1_weighted", cv=5)
    assert_array_almost_equal(f1_scores, [0.97, 1., 0.97, 0.97, 1.], 2) 
Example #5
Source File: code.py    From The_Ultimate_Student_Hunt with MIT License 6 votes vote down vote up
def run_model(model,dtrain,predictor_var,target,scoring_method='mean_squared_error'):
    cv_method = KFold(len(dtrain),5)
    cv_scores = cross_val_score(model,dtrain[predictor_var],dtrain[target],cv=cv_method,scoring=scoring_method)
    #print cv_scores, np.mean(cv_scores), np.sqrt((-1)*np.mean(cv_scores))
    
    dtrain_for_val = dtrain[dtrain['Year']<2000]
    dtest_for_val = dtrain[dtrain['Year']>1999]
    #cv_method = KFold(len(dtrain_for_val),5)
    #cv_scores_2 = cross_val_score(model,dtrain_for_val[predictor_var],dtrain_for_val[target],cv=cv_method,scoring=scoring_method)
    #print cv_scores_2, np.mean(cv_scores_2)
    
    dtrain_for_val_ini = dtrain_for_val[predictor_var]
    dtest_for_val_ini = dtest_for_val[predictor_var]
    model.fit(dtrain_for_val_ini,dtrain_for_val[target])
    pred_for_val = model.predict(dtest_for_val_ini)
        
    #print math.sqrt(mean_squared_error(dtest_for_val['Footfall'],pred_for_val)) 
Example #6
Source File: test_cross_validation.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_cross_val_score_precomputed():
    # test for svm with precomputed kernel
    svm = SVC(kernel="precomputed")
    iris = load_iris()
    X, y = iris.data, iris.target
    linear_kernel = np.dot(X, X.T)
    score_precomputed = cval.cross_val_score(svm, linear_kernel, y)
    svm = SVC(kernel="linear")
    score_linear = cval.cross_val_score(svm, X, y)
    assert_array_equal(score_precomputed, score_linear)

    # Error raised for non-square X
    svm = SVC(kernel="precomputed")
    assert_raises(ValueError, cval.cross_val_score, svm, X, y)

    # test error is raised when the precomputed kernel is not array-like
    # or sparse
    assert_raises(ValueError, cval.cross_val_score, svm,
                  linear_kernel.tolist(), y) 
Example #7
Source File: test_cross_validation.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_cross_val_score_mask():
    # test that cross_val_score works with boolean masks
    svm = SVC(kernel="linear")
    iris = load_iris()
    X, y = iris.data, iris.target
    cv_indices = cval.KFold(len(y), 5)
    scores_indices = cval.cross_val_score(svm, X, y, cv=cv_indices)
    cv_indices = cval.KFold(len(y), 5)
    cv_masks = []
    for train, test in cv_indices:
        mask_train = np.zeros(len(y), dtype=np.bool)
        mask_test = np.zeros(len(y), dtype=np.bool)
        mask_train[train] = 1
        mask_test[test] = 1
        cv_masks.append((train, test))
    scores_masks = cval.cross_val_score(svm, X, y, cv=cv_masks)
    assert_array_equal(scores_indices, scores_masks) 
Example #8
Source File: OutPutRes.py    From ProFET with GNU General Public License v3.0 5 votes vote down vote up
def CV_multi_stats(X, y, model,n=6) :
    '''
    http://scikit-learn.org/stable/modules/model_evaluation.html#classification-metrics
    This version uses multiclass (or multilabel) compatible metrics.

    May be expanded to use the cross_val_score helper function:
    http://scikit-learn.org/stable/modules/generated/sklearn.cross_validation.cross_val_score.html
    http://scikit-learn.org/stable/modules/cross_validation.html#computing-cross-validated-metrics
    '''

    scores = cross_val_score(estimator=model, X=X, y=y, cv=StratifiedShuffleSplit(y, n_iter=n, test_size=0.16), n_jobs=-1) #Accuracy
    scores_f1 = cross_val_score(estimator=model, X=X, y=y, cv=StratifiedShuffleSplit(y, n_iter=n, test_size=0.16), n_jobs=-1, scoring='f1')
    print("Model Accuracy: %0.3f (+- %0.2f)" % (scores.mean(), scores.std() * 2))
    print("Model f1: %0.3f (+- %0.2f)" % (scores_f1.mean(), scores_f1.std() * 2))
    return (scores.mean(), scores.std() ,scores_f1.mean(), scores_f1.std() ) #Removed * 2 from returned STD .. ? 
Example #9
Source File: OutPutRes.py    From ProFET with GNU General Public License v3.0 5 votes vote down vote up
def PlotPerfPercentFeatures(X,y,est=LinearSVC()):
    '''
    Performance of a classifier (default: SVM-Anova)
    varying the percentile of features selected (F-test) .

    http://scikit-learn.org/stable/auto_examples/svm/plot_svm_anova.html#example-svm-plot-svm-anova-py
    '''
    transform = SelectPercentile(f_classif)

    clf = Pipeline([('anova', transform), ('est', est)])
    ###############################################################################
    # Plot the cross-validation score as a function of percentile of features
    score_means = list()
    score_stds = list()
    percentiles = (1,2,3,5,7,10,13,15,20,25,33,50,65,75,90, 100)
    # percentiles = (1,5,10,25,50,75,90)

    for percentile in percentiles:
        # print(percentile)
        clf.set_params(anova__percentile=percentile)
        this_scores = cross_val_score(clf, X, y,cv=StratifiedShuffleSplit(y, n_iter=7, test_size=0.3), n_jobs=-1)
        score_means.append(this_scores.mean())
        score_stds.append(this_scores.std())
    print("Outputting Graph:")

    plt.errorbar(percentiles, score_means, np.array(score_stds))

    plt.title(
        'Predictor Performance, varying percent of features used')
    plt.xlabel('Percentile')
    plt.ylabel('Prediction Performance')
    plt.axis('tight')
    plt.show() 
Example #10
Source File: VisualizeBestFeatures.py    From ProFET with GNU General Public License v3.0 5 votes vote down vote up
def PlotPerfPercentFeatures(X,y,est=LinearSVC()):
    '''
    Display performance of a classifier (default: SVM),
    varying the percentile of features retained (F-test) .

    http://scikit-learn.org/stable/auto_examples/svm/plot_svm_anova.html#example-svm-plot-svm-anova-py
    '''
    transform = SelectPercentile(f_classif)

    clf = Pipeline([('anova', transform), ('est', est)])
    ###############################################################################
    # Plot the cross-validation score as a function of percentile of features
    score_means = list()
    score_stds = list()
    percentiles = (1,2,3,5,7,10,15,20,25,33,50,66,75,90, 100)
    # percentiles = (1,5,10,25,50,75,90)

    for percentile in percentiles:
        # print(percentile)
        clf.set_params(anova__percentile=percentile)
        this_scores = cross_val_score(clf, X, y,cv=StratifiedShuffleSplit(y, n_iter=5, test_size=0.4), n_jobs=-1)
        score_means.append(this_scores.mean())
        score_stds.append(this_scores.std())
    print("Outputting Graph:")

    plt.errorbar(percentiles, score_means, np.array(score_stds))

    plt.title(
        'Predictor Performance, varying percent of features used')
    plt.xlabel('Percentile')
    plt.ylabel('Prediction Performance')
    plt.axis('tight')
    plt.show() 
Example #11
Source File: Model_Parameters_CV.py    From ProFET with GNU General Public License v3.0 5 votes vote down vote up
def plot_BestKFeatures (X_train, y_train):
    '''
    http://nbviewer.ipython.org/github/gmonce/scikit-learn-book/blob/master/Chapter%204%20-%20Advanced%20Features%20-%20Feature%20Engineering%20and%20Selection.ipynb
    Find the best percentile of features to use,
    using cross-validation on the training set and get K best feats
    '''
    from sklearn import cross_validation
    from sklearn import feature_selection
    from sklearn import tree
    dt = tree.DecisionTreeClassifier(criterion='entropy')
    dt = RandomForestClassifier(n_jobs=2, bootstrap=True, n_estimators=250, criterion='gini')
    dt = dt.fit(X_train, y_train)

    percentiles = range(1, 95, 5)
    results = []
    for i in range(1, 95, 5):
        fs = feature_selection.SelectPercentile(feature_selection.chi2, percentile=i) #Original
        fs = feature_selection.SelectPercentile(feature_selection.f_classif, percentile=i) # alt
        X_train_fs = fs.fit_transform(X_train, y_train)
        scores = cross_validation.cross_val_score(dt, X_train_fs, y_train, cv=4)
        #print i,scores.mean()
        results = np.append(results, scores.mean())

    optimal_percentil = np.where(results == results.max())[0]
    print (("Optimal number of features:{0}".format(percentiles[optimal_percentil])), "\n")

    # Plot number of features VS. cross-validation scores
    import pylab as pl
    import matplotlib.pylab as pl
    pl.figure()
    pl.xlabel("Number of features selected")
    pl.ylabel("Cross validation accuracy)")
    pl.plot(percentiles,results)
    print ("Mean scores:",results)
    return 
Example #12
Source File: simulation.py    From jstsp2015 with MIT License 5 votes vote down vote up
def compute_svm_score(K, y, n_folds, scoring='accuracy', random_state=0):
    cv = StratifiedKFold(y, n_folds=n_folds, shuffle=True,
                         random_state=random_state)
    clf = SVC(C=1.0, kernel='precomputed')
    scores = cross_val_score(clf, K, y, scoring=scoring, cv=cv, n_jobs=1)
    score = scores.mean()
    return score 
Example #13
Source File: classif_and_ktst.py    From jstsp2015 with MIT License 5 votes vote down vote up
def compute_svm_cv(K, y, C=100.0, n_folds=5,
                   scoring=balanced_accuracy_scoring):
    """Compute cross-validated score of SVM with given precomputed kernel.
    """
    cv = StratifiedKFold(y, n_folds=n_folds)
    clf = SVC(C=C, kernel='precomputed', class_weight='auto')
    scores = cross_val_score(clf, K, y,
                             scoring=scoring, cv=cv)
    return scores.mean() 
Example #14
Source File: test_cross_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_cross_val_score():
    clf = MockClassifier()
    for a in range(-10, 10):
        clf.a = a
        # Smoke test
        scores = cval.cross_val_score(clf, X, y)
        assert_array_equal(scores, clf.score(X, y))

        # test with multioutput y
        scores = cval.cross_val_score(clf, X_sparse, X)
        assert_array_equal(scores, clf.score(X_sparse, X))

        scores = cval.cross_val_score(clf, X_sparse, y)
        assert_array_equal(scores, clf.score(X_sparse, y))

        # test with multioutput y
        scores = cval.cross_val_score(clf, X_sparse, X)
        assert_array_equal(scores, clf.score(X_sparse, X))

    # test with X and y as list
    list_check = lambda x: isinstance(x, list)
    clf = CheckingClassifier(check_X=list_check)
    scores = cval.cross_val_score(clf, X.tolist(), y.tolist())

    clf = CheckingClassifier(check_y=list_check)
    scores = cval.cross_val_score(clf, X, y.tolist())

    assert_raises(ValueError, cval.cross_val_score, clf, X, y,
                  scoring="sklearn")

    # test with 3d X and
    X_3d = X[:, :, np.newaxis]
    clf = MockClassifier(allow_nd=True)
    scores = cval.cross_val_score(clf, X_3d, y)

    clf = MockClassifier(allow_nd=False)
    assert_raises(ValueError, cval.cross_val_score, clf, X_3d, y) 
Example #15
Source File: test_cross_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_cross_val_score_pandas():
    # check cross_val_score doesn't destroy pandas dataframe
    types = [(MockDataFrame, MockDataFrame)]
    try:
        from pandas import Series, DataFrame
        types.append((Series, DataFrame))
    except ImportError:
        pass
    for TargetType, InputFeatureType in types:
        # X dataframe, y series
        X_df, y_ser = InputFeatureType(X), TargetType(y)
        check_df = lambda x: isinstance(x, InputFeatureType)
        check_series = lambda x: isinstance(x, TargetType)
        clf = CheckingClassifier(check_X=check_df, check_y=check_series)
        cval.cross_val_score(clf, X_df, y_ser) 
Example #16
Source File: test_cross_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_cross_val_score_score_func():
    clf = MockClassifier()
    _score_func_args = []

    def score_func(y_test, y_predict):
        _score_func_args.append((y_test, y_predict))
        return 1.0

    with warnings.catch_warnings(record=True):
        scoring = make_scorer(score_func)
        score = cval.cross_val_score(clf, X, y, scoring=scoring)
    assert_array_equal(score, [1.0, 1.0, 1.0])
    assert len(_score_func_args) == 3 
Example #17
Source File: test_cross_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_cross_val_score_errors():
    class BrokenEstimator:
        pass

    assert_raises(TypeError, cval.cross_val_score, BrokenEstimator(), X) 
Example #18
Source File: test_cross_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def train_test_split_pandas():
    # check cross_val_score doesn't destroy pandas dataframe
    types = [MockDataFrame]
    try:
        from pandas import DataFrame
        types.append(DataFrame)
    except ImportError:
        pass
    for InputFeatureType in types:
        # X dataframe
        X_df = InputFeatureType(X)
        X_train, X_test = cval.train_test_split(X_df)
        assert_true(isinstance(X_train, InputFeatureType))
        assert_true(isinstance(X_test, InputFeatureType)) 
Example #19
Source File: test_cross_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_cross_val_predict_pandas():
    # check cross_val_score doesn't destroy pandas dataframe
    types = [(MockDataFrame, MockDataFrame)]
    try:
        from pandas import Series, DataFrame
        types.append((Series, DataFrame))
    except ImportError:
        pass
    for TargetType, InputFeatureType in types:
        # X dataframe, y series
        X_df, y_ser = InputFeatureType(X), TargetType(y)
        check_df = lambda x: isinstance(x, InputFeatureType)
        check_series = lambda x: isinstance(x, TargetType)
        clf = CheckingClassifier(check_X=check_df, check_y=check_series)
        cval.cross_val_predict(clf, X_df, y_ser) 
Example #20
Source File: test_cross_validation.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_sparse_fit_params():
    iris = load_iris()
    X, y = iris.data, iris.target
    clf = MockClassifier()
    fit_params = {'sparse_sample_weight': coo_matrix(np.eye(X.shape[0]))}
    a = cval.cross_val_score(clf, X, y, fit_params=fit_params)
    assert_array_equal(a, np.ones(3)) 
Example #21
Source File: solution.py    From Kaggle with MIT License 5 votes vote down vote up
def optimize_logisticRegression():
    train_data = pd.read_csv(r"data/train.csv")
    print u"数据信息:\n",train_data.info()
    print u'数据描述:\n',train_data.describe()  
    #display_data(train_data)  # 简单显示数据信息
    #display_with_process(train_data) # 根据数据的理解,简单处理一下数据显示,验证猜想
    process_data = fe_preprocessData(train_data,'process_train_data')  # 数据预处理,要训练的数据
    train_data = process_data.filter(regex='Survived|Age|SibSp|Parch|Fare|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')  # 使用正则抽取想要的数据
    train_np = train_data.as_matrix()  # 转为矩阵
    '''训练model'''
    X = train_np[:,1:]
    y = train_np[:,0]
    #=X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
    #=model = linear_model.LogisticRegression(C=1.0,tol=1e-6).fit(X_train,y_train)
    model = linear_model.LogisticRegression(C=1.0,tol=1e-6).fit(X,y)
    print pd.DataFrame({"columns":list(train_data.columns)[1:],"coef_":list(model.coef_.T)})

    '''测试集上预测'''
    test_data = pd.read_csv(r"data/test.csv")
    process_test_data = fe_preprocessData(test_data,'process_test_data')  # 预处理数据
    test_data = process_test_data.filter(regex='Age|SibSp|Parch|Fare|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
    test_np = test_data.as_matrix()
    predict = model.predict(test_np)
    result = pd.DataFrame(data={'PassengerId':process_test_data['PassengerId'].as_matrix(),'Survived':predict.astype(np.int32)})
    result.to_csv(r'optimize_logisticRegression_result/prediction.csv',index=False)
    #clf = linear_model.LogisticRegression(C=1.0,tol=1e-6)
    #print cross_validation.cross_val_score(clf, X,y,cv=5)    
## 两项映射为多项式 
Example #22
Source File: ml.py    From EDeN with MIT License 5 votes vote down vote up
def estimate_model(positive_data_matrix=None,
                   negative_data_matrix=None,
                   target=None,
                   estimator=None,
                   n_jobs=4):
    """estimate_model."""
    X, y = make_data_matrix(positive_data_matrix=positive_data_matrix,
                            negative_data_matrix=negative_data_matrix,
                            target=target)
    logger.info('Test set')
    logger.info(describe(X))
    logger.info('-' * 80)
    logger.info('Test Estimate')
    predictions = estimator.predict(X)
    margins = estimator.decision_function(X)
    logger.info(classification_report(y, predictions))
    apr = average_precision_score(y, margins)
    logger.info('APR: %.3f' % apr)
    roc = roc_auc_score(y, margins)
    logger.info('ROC: %.3f' % roc)

    logger.info('Cross-validated estimate')
    scoring_strings = ['accuracy', 'precision', 'recall', 'f1',
                       'average_precision', 'roc_auc']
    for scoring in scoring_strings:
        scores = cross_validation.cross_val_score(
            estimator, X, y, cv=5,
            scoring=scoring, n_jobs=n_jobs)
        logger.info('%20s: %.3f +- %.3f' % (scoring,
                                            np.mean(scores),
                                            np.std(scores)))

    return roc, apr 
Example #23
Source File: Train Classifier and Test Video Feed.py    From Emotion-Recognition-Using-SVMs with MIT License 5 votes vote down vote up
def evaluate_cross_validation(clf, X, y, K):
    # create a k-fold cross validation iterator
    cv = KFold(len(y), K, shuffle=True, random_state=0)
    # by default the score used is the one returned by score method of the estimator (accuracy)
    scores = cross_val_score(clf, X, y, cv=cv)
    print "Scores: ", (scores)
    print ("Mean score: {0:.3f} (+/-{1:.3f})".format(np.mean(scores), sem(scores)))


# Confusion Matrix and Results 
Example #24
Source File: titanic.py    From MachineLearning with Apache License 2.0 5 votes vote down vote up
def compute_score(clf, X, y,scoring='accuracy'):
    xval = cross_val_score(clf, X, y, cv = 5,scoring=scoring)
    return np.mean(xval) 
Example #25
Source File: image-classification.py    From Building-Machine-Learning-Systems-With-Python-Second-Edition with MIT License 5 votes vote down vote up
def accuracy(features, labels):
    from sklearn.linear_model import LogisticRegression
    from sklearn.pipeline import Pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn import cross_validation
    # We use logistic regression because it is very fast.
    # Feel free to experiment with other classifiers
    clf = Pipeline([('preproc', StandardScaler()),
                ('classifier', LogisticRegression())])
    cv = cross_validation.LeaveOneOut(len(features))
    scores = cross_validation.cross_val_score(
        clf, features, labels, cv=cv)
    return scores.mean() 
Example #26
Source File: rank_tags.py    From TGIF-Release with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def stump(X, y):
    score = cross_val_score(LinearSVC(), X, y, cv = 5, n_jobs=5, scoring = 'average_precision')
    clf = LinearSVC()
    clf.fit(X, y)
    coef = clf.coef_[0,0]
    inter = clf.intercept_[0]
    return np.mean(score), np.sign(coef), inter / np.abs(coef) 
Example #27
Source File: sandpit.py    From automl-phase-2 with MIT License 5 votes vote down vote up
def _f(x):
        # iris = load_iris()
        X, y = X, y = make_hastie_10_2(random_state=0)
        x = np.ravel(x)
        f = np.zeros(x.shape)
        for i in range(f.size):
            clf = RandomForestClassifier(n_estimators=1, min_samples_leaf=int(np.round(x[i])), random_state=0)
            # scores = cross_val_score(clf, iris.data, iris.target)
            scores = cross_val_score(clf, X, y, cv=5)
            f[i] = -scores.mean()
        return f.ravel() 
Example #28
Source File: scorer.py    From scan with GNU Affero General Public License v3.0 5 votes vote down vote up
def train(self):
        feats = self.get_features()
        scores = np.array(self.scores)

        # Compute error metrics for the estimator.
        self.cv_scores = cross_validation.cross_val_score(self.classifier, feats, scores)
        self.cv_score = self.cv_scores.mean()
        self.cv_dev = self.cv_scores.std()

        self.classifier.fit(feats, scores)
        self.fit_done = True 
Example #29
Source File: sklearnbasemodel.py    From Supply-demand-forecasting with MIT License 5 votes vote down vote up
def run_croos_validation(self):
        features,labels,cv = self.getFeaturesLabel()
        scores = cross_validation.cross_val_score(self.clf, features, labels, cv=cv, scoring=mean_absolute_percentage_error_scoring, n_jobs = -1)
        print "cross validation scores: means, {}, std, {}, details,{}".format(np.absolute(scores.mean()), scores.std(), np.absolute(scores))
        return -np.absolute(scores.mean()) 
Example #30
Source File: solution.py    From Kaggle with MIT License 5 votes vote down vote up
def baseline_logisticRegression():
    train_data = pd.read_csv(r"data/train.csv")
    #print u"数据信息:\n",train_data.info()
    #print u'数据描述:\n',train_data.describe()  
    #display_data(train_data)  # 简单显示数据信息
    #display_with_process(train_data) # 根据数据的理解,简单处理一下数据显示,验证猜想
    process_data = pre_processData(train_data,'process_train_data')  # 数据预处理,要训练的数据
    train_data = process_data.filter(regex='Survived|Age|SibSp|Parch|Fare|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')  # 使用正则抽取想要的数据
    train_np = train_data.as_matrix()  # 转为矩阵
    '''训练model'''
    X = train_np[:,1:]
    y = train_np[:,0]
    #=X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
    #=model = linear_model.LogisticRegression(C=1.0,tol=1e-6).fit(X_train,y_train)
    model = linear_model.LogisticRegression(C=1.0,tol=1e-6).fit(X,y)
    print pd.DataFrame({"columns":list(train_data.columns)[1:],"coef_":list(model.coef_.T)})
    #=prediction = model.predict(X_test)
    #=cv_error = pd.DataFrame(data=list(X_test[np.where(prediction!=y_test)]),columns=list(train_data.columns)[1:])
    #=cv_error.to_csv(r'error.csv',index=True)
    #=print np.float32(np.sum(prediction == y_test))/np.float32(prediction.shape[0])
    
    '''测试集上预测'''
    test_data = pd.read_csv(r"data/test.csv")
    process_test_data = pre_processData(test_data,'process_test_data')  # 预处理数据
    test_data = process_test_data.filter(regex='Age|SibSp|Parch|Fare|Cabin_.*|Embarked_.*|Sex_.*|Pclass_.*')
    test_np = test_data.as_matrix()
    predict = model.predict(test_np)
    result = pd.DataFrame(data={'PassengerId':process_test_data['PassengerId'].as_matrix(),'Survived':predict.astype(np.int32)})
    result.to_csv(r'baseline_logisticRegression_result/prediction.csv',index=False)
    #clf = linear_model.LogisticRegression(C=1.0,tol=1e-6)
    #print cross_validation.cross_val_score(clf, X,y,cv=5)


# baseline:SVM模型——0.78947