Python sklearn.metrics.log_loss() Examples
The following are 30
code examples of sklearn.metrics.log_loss().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.metrics
, or try the search function
.
Example #1
Source File: multi_class_classification.py From edge2vec with BSD 3-Clause "New" or "Revised" License | 11 votes |
def multi_class_classification(data_X,data_Y): ''' calculate multi-class classification and return related evaluation metrics ''' svc = svm.SVC(C=1, kernel='linear') # X_train, X_test, y_train, y_test = train_test_split( data_X, data_Y, test_size=0.4, random_state=0) clf = svc.fit(data_X, data_Y) #svm # array = svc.coef_ # print array predicted = cross_val_predict(clf, data_X, data_Y, cv=2) print "accuracy",metrics.accuracy_score(data_Y, predicted) print "f1 score macro",metrics.f1_score(data_Y, predicted, average='macro') print "f1 score micro",metrics.f1_score(data_Y, predicted, average='micro') print "precision score",metrics.precision_score(data_Y, predicted, average='macro') print "recall score",metrics.recall_score(data_Y, predicted, average='macro') print "hamming_loss",metrics.hamming_loss(data_Y, predicted) print "classification_report", metrics.classification_report(data_Y, predicted) print "jaccard_similarity_score", metrics.jaccard_similarity_score(data_Y, predicted) # print "log_loss", metrics.log_loss(data_Y, predicted) print "zero_one_loss", metrics.zero_one_loss(data_Y, predicted) # print "AUC&ROC",metrics.roc_auc_score(data_Y, predicted) # print "matthews_corrcoef", metrics.matthews_corrcoef(data_Y, predicted)
Example #2
Source File: link_prediction.py From edge2vec with BSD 3-Clause "New" or "Revised" License | 7 votes |
def evaluation_analysis(true_label,predicted): ''' return all metrics results ''' print "accuracy",metrics.accuracy_score(true_label, predicted) print "f1 score macro",metrics.f1_score(true_label, predicted, average='macro') print "f1 score micro",metrics.f1_score(true_label, predicted, average='micro') print "precision score",metrics.precision_score(true_label, predicted, average='macro') print "recall score",metrics.recall_score(true_label, predicted, average='macro') print "hamming_loss",metrics.hamming_loss(true_label, predicted) print "classification_report", metrics.classification_report(true_label, predicted) print "jaccard_similarity_score", metrics.jaccard_similarity_score(true_label, predicted) print "log_loss", metrics.log_loss(true_label, predicted) print "zero_one_loss", metrics.zero_one_loss(true_label, predicted) print "AUC&ROC",metrics.roc_auc_score(true_label, predicted) print "matthews_corrcoef", metrics.matthews_corrcoef(true_label, predicted)
Example #3
Source File: test_logistic.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_warm_start_converge_LR(): # Test to see that the logistic regression converges on warm start, # with multi_class='multinomial'. Non-regressive test for #10836 rng = np.random.RandomState(0) X = np.concatenate((rng.randn(100, 2) + [1, 1], rng.randn(100, 2))) y = np.array([1] * 100 + [-1] * 100) lr_no_ws = LogisticRegression(multi_class='multinomial', solver='sag', warm_start=False, random_state=0) lr_ws = LogisticRegression(multi_class='multinomial', solver='sag', warm_start=True, random_state=0) lr_no_ws_loss = log_loss(y, lr_no_ws.fit(X, y).predict_proba(X)) for i in range(5): lr_ws.fit(X, y) lr_ws_loss = log_loss(y, lr_ws.predict_proba(X)) assert_allclose(lr_no_ws_loss, lr_ws_loss, rtol=1e-5)
Example #4
Source File: test_logistic.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_logreg_predict_proba_multinomial(): X, y = make_classification(n_samples=10, n_features=20, random_state=0, n_classes=3, n_informative=10) # Predicted probabilities using the true-entropy loss should give a # smaller loss than those using the ovr method. clf_multi = LogisticRegression(multi_class="multinomial", solver="lbfgs") clf_multi.fit(X, y) clf_multi_loss = log_loss(y, clf_multi.predict_proba(X)) clf_ovr = LogisticRegression(multi_class="ovr", solver="lbfgs") clf_ovr.fit(X, y) clf_ovr_loss = log_loss(y, clf_ovr.predict_proba(X)) assert_greater(clf_ovr_loss, clf_multi_loss) # Predicted probabilities using the soft-max function should give a # smaller loss than those using the logistic function. clf_multi_loss = log_loss(y, clf_multi.predict_proba(X)) clf_wrong_loss = log_loss(y, clf_multi._predict_proba_lr(X)) assert_greater(clf_wrong_loss, clf_multi_loss)
Example #5
Source File: base.py From stacking with MIT License | 6 votes |
def eval_pred( y_true, y_pred, eval_type): if eval_type == 'logloss':#eval_typeはここに追加 loss = ll( y_true, y_pred ) print "logloss: ", loss return loss elif eval_type == 'auc': loss = AUC( y_true, y_pred ) print "AUC: ", loss return loss elif eval_type == 'rmse': loss = np.sqrt(mean_squared_error(y_true, y_pred)) print "rmse: ", loss return loss ######### BaseModel Class #########
Example #6
Source File: run_lgb.py From rosetta_recsys2019 with Apache License 2.0 | 6 votes |
def evaluate(val_df, clf): incorrect_session = {} val_df['scores'] = clf.predict(val_df.drop(data_drop_columns, axis=1)) loss = log_loss(val_df.label.values, val_df.scores.values) grouped_val = val_df.groupby('session_id') rss_group = {i:[] for i in range(1,26)} rss = [] for session_id, group in grouped_val: scores = group.scores sorted_arg = np.flip(np.argsort(scores)) rss.append( group['label'].values[sorted_arg]) rss_group[len(group)].append(group['label'].values[sorted_arg]) if group['label'].values[sorted_arg][0] != 1: incorrect_session[session_id] = (sorted_arg.values, group['label'].values[sorted_arg]) mrr = compute_mean_reciprocal_rank(rss) mrr_group = {i:(len(rss_group[i]), compute_mean_reciprocal_rank(rss_group[i])) for i in range(1,26)} print(mrr_group) if not configuration.debug: pickle.dump( incorrect_session, open(f'../output/{model_name}_val_incorrect_order.p','wb')) return mrr, mrr_group, loss
Example #7
Source File: utils.py From kaggle_otto with BSD 3-Clause "New" or "Revised" License | 6 votes |
def make_blender_cv(classifier, x, y, calibrate=False): skf = StratifiedKFold(y, n_folds=5, random_state=23) scores, predictions = [], None for train_index, test_index in skf: if calibrate: # Make training and calibration calibrated_classifier = CalibratedClassifierCV(classifier, method='isotonic', cv=get_cv(y[train_index])) fitted_classifier = calibrated_classifier.fit(x[train_index, :], y[train_index]) else: fitted_classifier = classifier.fit(x[train_index, :], y[train_index]) preds = fitted_classifier.predict_proba(x[test_index, :]) # Free memory calibrated_classifier, fitted_classifier = None, None gc.collect() scores.append(log_loss(y[test_index], preds)) predictions = np.append(predictions, preds, axis=0) if predictions is not None else preds return scores, predictions
Example #8
Source File: hetero_stepwise.py From FATE with Apache License 2.0 | 6 votes |
def get_intercept_loss(self, model, data): y = np.array([x[1] for x in data.mapValues(lambda v: v.label).collect()]) X = np.ones((len(y), 1)) if model.model_name == 'HeteroLinearRegression' or model.model_name == 'HeteroPoissonRegression': intercept_model = LinearRegression(fit_intercept=False) trained_model = intercept_model.fit(X, y) pred = trained_model.predict(X) loss = metrics.mean_squared_error(y, pred) / 2 elif model.model_name == 'HeteroLogisticRegression': intercept_model = LogisticRegression(penalty='l1', C=1e8, fit_intercept=False, solver='liblinear') trained_model = intercept_model.fit(X, y) pred = trained_model.predict(X) loss = metrics.log_loss(y, pred) else: raise ValueError("Unknown model received. Stepwise stopped.") self.intercept = intercept_model.intercept_ return loss
Example #9
Source File: metrics.py From toxic_comments with MIT License | 6 votes |
def calc_metrics(y_true, y_hat, max_steps=1000): y_true = np.array(y_true) y_hat = np.array(y_hat) metrics = {} metrics['Logloss'] = float(log_loss(y_true, y_hat)) metrics['AUC'] = roc_auc_score(y_true, y_hat) metrics['F1'] = [] metrics['Precision'] = [] metrics['Recall'] = [] for i in range(1, max_steps): threshold = float(i) / max_steps y_tmp = y_hat > threshold metrics['F1'].append(f1_score(y_true, y_tmp)) metrics['Precision'].append(precision_score(y_true, y_tmp)) metrics['Recall'].append(recall_score(y_true, y_tmp)) max_idx = np.argmax(metrics['F1']) metrics['F1'] = metrics['F1'][max_idx] metrics['Precision'] = metrics['Precision'][max_idx] metrics['Recall'] = metrics['Recall'][max_idx] metrics['Threshold'] = float(max_idx + 1) / max_steps return metrics
Example #10
Source File: ch06-01-hopt.py From kagglebook with BSD 3-Clause "New" or "Revised" License | 6 votes |
def score(params): # パラメータを与えたときに最小化する評価指標を指定する # 具体的には、モデルにパラメータを指定して学習・予測させた場合のスコアを返すようにする # max_depthの型を整数型に修正する params['max_depth'] = int(params['max_depth']) # Modelクラスを定義しているものとする # Modelクラスは、fitで学習し、predictで予測値の確率を出力する model = Model(params) model.fit(tr_x, tr_y, va_x, va_y) va_pred = model.predict(va_x) score = log_loss(va_y, va_pred) print(f'params: {params}, logloss: {score:.4f}') # 情報を記録しておく history.append((params, score)) return {'loss': score, 'status': STATUS_OK} # 探索するパラメータの空間を指定する
Example #11
Source File: devol.py From devol with MIT License | 6 votes |
def _handle_broken_model(self, model, error): del model n = self.genome_handler.n_classes loss = log_loss(np.concatenate(([1], np.zeros(n - 1))), np.ones(n) / n) accuracy = 1 / n gc.collect() if K.backend() == 'tensorflow': K.clear_session() tf.reset_default_graph() print('An error occurred and the model could not train:') print(error) print(('Model assigned poor score. Please ensure that your model' 'constraints live within your computational resources.')) return loss, accuracy
Example #12
Source File: ch06-03-hopt_nn.py From kagglebook with BSD 3-Clause "New" or "Revised" License | 6 votes |
def score(params): # パラメータセットを指定したときに最小化すべき関数を指定する # モデルのパラメータ探索においては、モデルにパラメータを指定して学習・予測させた場合のスコアとする model = MLP(params) model.fit(tr_x, tr_y, va_x, va_y) va_pred = model.predict(va_x) score = log_loss(va_y, va_pred) print(f'params: {params}, logloss: {score:.4f}') # 情報を記録しておく history.append((params, score)) return {'loss': score, 'status': STATUS_OK} # hyperoptによるパラメータ探索の実行
Example #13
Source File: ch06-06-wrapper.py From kagglebook with BSD 3-Clause "New" or "Revised" License | 6 votes |
def evaluate(features): dtrain = xgb.DMatrix(tr_x[features], label=tr_y) dvalid = xgb.DMatrix(va_x[features], label=va_y) params = {'objective': 'binary:logistic', 'silent': 1, 'random_state': 71} num_round = 10 # 実際にはもっと多いround数が必要 early_stopping_rounds = 3 watchlist = [(dtrain, 'train'), (dvalid, 'eval')] model = xgb.train(params, dtrain, num_round, evals=watchlist, early_stopping_rounds=early_stopping_rounds, verbose_eval=0) va_pred = model.predict(dvalid) score = log_loss(va_y, va_pred) return score # --------------------------------- # Greedy Forward Selection # ----------------------------------
Example #14
Source File: stack.py From kaggle-avito with MIT License | 5 votes |
def cv_method(): tr_X, tr_y_true, te_X, te_y_true = get_train_data() if "nn" in args.model: tr_X = np.array(tr_X).astype(np.float32) tr_y_true = np.array(tr_y_true).astype(np.int32) model = get_nn_model(tr_X.shape) model.fit(tr_X, tr_y_true) write_dump("%s_model.dump"%args.model, model) if te_X: te_X = np.array(te_X).astype(np.float32) preds = model.predict_proba(te_X)[:, 1] np.savetxt("nn_preds.txt", preds) print log_loss(te_y_true, preds) elif "xgb" in args.model: dtrain = xgb.DMatrix(tr_X, label=tr_y_true) if args.predict == "cv": if te_X: dtest = xgb.DMatrix(te_X, label=te_y_true) param = { 'max_depth':3, 'eta':0.1, 'silent':1, 'objective':'binary:logistic', "eval_metric": "logloss", "nthread": 9, } if te_X: watchlist = [(dtrain,'train'), (dtest, "eval")] else: watchlist = [(dtrain,'train'),] num_round = 132 bst = xgb.train(param, dtrain, num_round, watchlist) bst.save_model("%s_model.dump"%args.model) if te_X: preds = bst.predict(dtest) np.savetxt("xgb_preds.txt", preds)
Example #15
Source File: negative_log_likelihood.py From ramp-workflow with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __call__(self, y_true_proba, y_proba): score = log_loss(y_true_proba, y_proba) return score
Example #16
Source File: model.py From RecommenderSystems with MIT License | 5 votes |
def evaluate(self, Xi, Xv, Xi_genre, Xv_genre, y): """ :param Xi: list of list of feature indices of each sample in the dataset :param Xv: list of list of feature values of each sample in the dataset :param y: label of each sample in the dataset :return: metric of the evaluation """ y_pred = self.predict(Xi, Xv, Xi_genre, Xv_genre) y_pred = np.clip(y_pred,1e-6,1-1e-6) return self.eval_metric(y, y_pred), log_loss(y, y_pred)
Example #17
Source File: perturb_importance.py From jh-kaggle-util with Apache License 2.0 | 5 votes |
def calculate_importance_perturb(model): fit_type = jhkaggle.jhkaggle_config['FIT_TYPE'] x = jhkaggle.util.load_pandas("train-joined-{}.pkl".format(model.data_source)) mask_test = np.array(x['fold'] == 1) x = x[mask_test] x.drop("id",axis=1,inplace=True) x.drop("fold",axis=1,inplace=True) y = x['target'] x.drop("target",axis=1,inplace=True) columns = x.columns x = x.values errors = [] for i in tqdm(range(x.shape[1])): hold = np.array(x[:, i]) np.random.shuffle(x[:, i]) pred = model.predict_model(model.model,x) if fit_type == jhkaggle.const.FIT_TYPE_REGRESSION: error = metrics.mean_squared_error(y, pred) else: error = metrics.log_loss(y, pred) errors.append(error) x[:, i] = hold max_error = np.max(errors) importance = [e/max_error for e in errors] data = {'name':columns,'error':errors,'importance':importance} result = pd.DataFrame(data, columns = ['name','error','importance']) result.sort_values(by=['importance'], ascending=[0], inplace=True) result.reset_index(inplace=True, drop=True) return result
Example #18
Source File: utils.py From open-solution-toxic-comments with MIT License | 5 votes |
def multi_log_loss(y_true, y_pred): assert y_true.shape == y_pred.shape columns = y_true.shape[1] column_losses = [] for i in range(0, columns): column_losses.append(log_loss(y_true[:, i], y_pred[:, i])) return np.array(column_losses).mean()
Example #19
Source File: util.py From jh-kaggle-util with Apache License 2.0 | 5 votes |
def _run_single(self): print("Training data: X_train: {}, Y_train: {}, X_test: {}".format(self.x_train.shape, len(self.y_train), self.x_submit.shape)) self.model = self.train_model(self.x_train, self.y_train, None, None) # if not self.run_single_fold: # self.preds_oos = self.predict_model(self.model, self.x_train) #score = 0 #log_loss(fold_y_valid, self.preds_oos) #self.final_preds_train = self.preds_oos self.final_preds_submit = self.predict_model(self.model, self.x_submit) self.pred_denom = 1
Example #20
Source File: util.py From jh-kaggle-util with Apache License 2.0 | 5 votes |
def model_score(y_pred,y_valid): final_eval = jhkaggle.jhkaggle_config['FINAL_EVAL'] if final_eval == jhkaggle.const.EVAL_R2: return r2_score(y_valid, y_pred) elif final_eval == jhkaggle.const.EVAL_LOGLOSS: return log_loss(y_valid, y_pred) elif final_eval == jhkaggle.const.EVAL_AUC: fpr, tpr, thresholds = roc_curve(y_valid, y_pred, pos_label=1) return auc(fpr, tpr) else: raise Exception(f"Unknown FINAL_EVAL: {final_eval}")
Example #21
Source File: runner.py From kagglebook with BSD 3-Clause "New" or "Revised" License | 5 votes |
def train_fold(self, i_fold: Union[int, str]) -> Tuple[ Model, Optional[np.array], Optional[np.array], Optional[float]]: """クロスバリデーションでのfoldを指定して学習・評価を行う 他のメソッドから呼び出すほか、単体でも確認やパラメータ調整に用いる :param i_fold: foldの番号(すべてのときには'all'とする) :return: (モデルのインスタンス、レコードのインデックス、予測値、評価によるスコア)のタプル """ # 学習データの読込 validation = i_fold != 'all' train_x = self.load_x_train() train_y = self.load_y_train() if validation: # 学習データ・バリデーションデータをセットする tr_idx, va_idx = self.load_index_fold(i_fold) tr_x, tr_y = train_x.iloc[tr_idx], train_y.iloc[tr_idx] va_x, va_y = train_x.iloc[va_idx], train_y.iloc[va_idx] # 学習を行う model = self.build_model(i_fold) model.train(tr_x, tr_y, va_x, va_y) # バリデーションデータへの予測・評価を行う va_pred = model.predict(va_x) score = log_loss(va_y, va_pred, eps=1e-15, normalize=True) # モデル、インデックス、予測値、評価を返す return model, va_idx, va_pred, score else: # 学習データ全てで学習を行う model = self.build_model(i_fold) model.train(train_x, train_y) # モデルを返す return model, None, None, None
Example #22
Source File: validate_sorted.py From adversarial-validation with MIT License | 5 votes |
def train_and_evaluate( y_train, x_train, y_val, x_val ): lr = LR() lr.fit( x_train, y_train ) p = lr.predict_proba( x_val ) p_bin = lr.predict( x_val ) acc = accuracy( y_val, p_bin ) auc = AUC( y_val, p[:,1] ) ll = log_loss( y_val, p[:,1] ) return ( auc, acc, ll )
Example #23
Source File: average.py From fnc-1 with Apache License 2.0 | 5 votes |
def stack_cv(param): #x_meta, y_meta = load_data() sumw = param['w0'] + param['w1'] pred_agree = (x_meta[:,0]*param['w0'] + x_meta[:,4]*param['w1']) / sumw pred_disagree = (x_meta[:,1]*param['w0'] + x_meta[:,5]*param['w1']) / sumw pred_discuss = (x_meta[:,2]*param['w0'] + x_meta[:,6]*param['w1']) / sumw pred_unrelated = (x_meta[:,3]*param['w0'] + x_meta[:,7]*param['w1']) / sumw pred_y = np.hstack([pred_agree.reshape((-1,1)), pred_disagree.reshape((-1,1)), pred_discuss.reshape((-1,1)), pred_unrelated.reshape((-1,1))]) print 'pred_agree.shape:' print pred_agree.shape print 'pred_disagree.shape:' print pred_disagree.shape print 'pred_discuss.shape:' print pred_discuss.shape print 'pred_unrelated.shape:' print pred_unrelated.shape print 'pred_y.shape:' print pred_y.shape print 'y_meta.shape:' print y_meta.shape pred_y_label = np.argmax(pred_y, axis=1) predicted = [LABELS[int(a)] for a in pred_y_label] actual = [LABELS[int(a)] for a in y_meta] score, _ = score_submission(actual, predicted) s_perf, _ = score_submission(actual, actual) cost = float(score) / s_perf #cost = log_loss(y_meta, pred_y, labels = [0, 1, 2, 3]) return -1.0 * cost
Example #24
Source File: test_basic.py From ngboost with Apache License 2.0 | 5 votes |
def test_classification(): from sklearn.datasets import load_breast_cancer from sklearn.metrics import roc_auc_score, log_loss data, target = load_breast_cancer(True) x_train, x_test, y_train, y_test = train_test_split( data, target, test_size=0.2, random_state=42 ) ngb = NGBClassifier(Dist=Bernoulli, verbose=False) ngb.fit(x_train, y_train) preds = ngb.predict(x_test) score = roc_auc_score(y_test, preds) assert score >= 0.95 preds = ngb.predict_proba(x_test) score = log_loss(y_test, preds) assert score <= 0.20 score = ngb.score(x_test, y_test) assert score <= 0.20 dist = ngb.pred_dist(x_test) assert isinstance(dist, Bernoulli) score = roc_auc_score(y_test, preds[:, 1]) assert score >= 0.95
Example #25
Source File: classifier_utils.py From human-rl with MIT License | 5 votes |
def predict_proba_with_loss(self, X, y): y_pred = self.predict_proba(X) loss = log_loss(y,y_pred) return y_pred, loss # smallest prob given to an actual catastrophe
Example #26
Source File: classification.py From Kaggler with MIT License | 5 votes |
def logloss(y, p): """Bounded log loss error. Args: y (numpy.array): target p (numpy.array): prediction Returns: bounded log loss error """ p[p < EPS] = EPS p[p > 1 - EPS] = 1 - EPS return log_loss(y, p)
Example #27
Source File: metrics.py From knowledge_graph_attention_network with MIT License | 5 votes |
def logloss(ground_truth, prediction): # preds = [max(min(p, 1. - 10e-12), 10e-12) for p in prediction] logloss = log_loss(np.asarray(ground_truth), np.asarray(prediction)) return logloss
Example #28
Source File: utils.py From QuickDraw with MIT License | 5 votes |
def get_evaluation(y_true, y_prob, list_metrics): y_pred = np.argmax(y_prob, -1) output = {} if 'accuracy' in list_metrics: output['accuracy'] = metrics.accuracy_score(y_true, y_pred) if 'loss' in list_metrics: try: output['loss'] = metrics.log_loss(y_true, y_prob) except ValueError: output['loss'] = -1 if 'confusion_matrix' in list_metrics: output['confusion_matrix'] = str(metrics.confusion_matrix(y_true, y_pred)) return output
Example #29
Source File: check_automl_with_regression.py From mljar-supervised with MIT License | 5 votes |
def test_fit_and_predict(self): seed = 1709 df = pd.read_csv( "./tests/data/housing_regression_missing_values_missing_target.csv" ) print(df.columns) x_cols = [c for c in df.columns if c != "MEDV"] X = df[x_cols] y = df["MEDV"] X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( X, y, test_size=0.3, random_state=seed ) automl = AutoML( total_time_limit=10, algorithms=["Xgboost"], # ["LightGBM", "RF", "NN", "CatBoost", "Xgboost"], start_random_models=1, hill_climbing_steps=0, top_models_to_improve=0, train_ensemble=True, verbose=True, ) automl.fit(X_train, y_train) response = automl.predict(X_test) # ["p_1"] print("Response", response) # Compute the logloss on test dataset # ll = log_loss(y_test, response) # print("(*) Dataset id {} logloss {}".format(dataset_id, ll))
Example #30
Source File: rgf.py From kaggle_otto with BSD 3-Clause "New" or "Revised" License | 5 votes |
def score(self, X, y, sample_weight=None): return log_loss(y, self.predict_proba(X))