Python scipy.maximum() Examples
The following are 30
code examples of scipy.maximum().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy
, or try the search function
.
Example #1
Source File: libscores.py From AutoML with MIT License | 6 votes |
def bac_metric (solution, prediction, task='binary.classification'): ''' Compute the normalized balanced accuracy. The binarization and the normalization differ for the multi-label and multi-class case. ''' label_num = solution.shape[1] score = np.zeros(label_num) bin_prediction = binarize_predictions(prediction, task) [tn,fp,tp,fn] = acc_stat(solution, bin_prediction) # Bounding to avoid division by 0 eps = 1e-15 tp = sp.maximum (eps, tp) pos_num = sp.maximum (eps, tp+fn) tpr = tp / pos_num # true positive rate (sensitivity) if (task != 'multiclass.classification') or (label_num==1): tn = sp.maximum (eps, tn) neg_num = sp.maximum (eps, tn+fp) tnr = tn / neg_num # true negative rate (specificity) bac = 0.5*(tpr + tnr) base_bac = 0.5 # random predictions for binary case else: bac = tpr base_bac = 1./label_num # random predictions for multiclass case bac = mvmean(bac) # average over all classes # Normalize: 0 for random, 1 for perfect score = (bac - base_bac) / sp.maximum(eps, (1 - base_bac)) return score
Example #2
Source File: libscores.py From automl_gpu with MIT License | 6 votes |
def bac_metric (solution, prediction, task='binary.classification'): ''' Compute the normalized balanced accuracy. The binarization and the normalization differ for the multi-label and multi-class case. ''' label_num = solution.shape[1] score = np.zeros(label_num) bin_prediction = binarize_predictions(prediction, task) [tn,fp,tp,fn] = acc_stat(solution, bin_prediction) # Bounding to avoid division by 0 eps = 1e-15 tp = sp.maximum (eps, tp) pos_num = sp.maximum (eps, tp+fn) tpr = tp / pos_num # true positive rate (sensitivity) if (task != 'multiclass.classification') or (label_num==1): tn = sp.maximum (eps, tn) neg_num = sp.maximum (eps, tn+fp) tnr = tn / neg_num # true negative rate (specificity) bac = 0.5*(tpr + tnr) base_bac = 0.5 # random predictions for binary case else: bac = tpr base_bac = 1./label_num # random predictions for multiclass case bac = mvmean(bac) # average over all classes # Normalize: 0 for random, 1 for perfect score = (bac - base_bac) / sp.maximum(eps, (1 - base_bac)) return score
Example #3
Source File: math_util.py From smappPy with GNU General Public License v2.0 | 6 votes |
def log_loss(actual, predicted, epsilon=1e-15): """ Calculates and returns the log loss (error) of a set of predicted probabilities (hint: see sklearn classifier's predict_proba methods). Source: https://www.kaggle.com/wiki/LogarithmicLoss In plain English, this error metric is typically used where you have to predict that something is true or false with a probability (likelihood) ranging from definitely true (1) to equally true (0.5) to definitely false(0). Note: also see (and use) scikitlearn: http://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html#sklearn.metrics.log_loss """ predicted = sp.maximum(epsilon, predicted) predicted = sp.minimum(1-epsilon, predicted) ll = sum(actual*sp.log(predicted) + sp.subtract(1,actual)*sp.log(sp.subtract(1,predicted))) ll = ll * -1.0/len(actual) return ll
Example #4
Source File: optics.py From REDPy with GNU General Public License v3.0 | 6 votes |
def expandClusterOrder(SetOfObjects, point, epsilon): """ Expands OPTICS ordered list of clustering structure SetofObjects: Instantiated and prepped instance of 'setOfObjects' class epsilon: Determines maximum object size that can be extracted. Smaller epsilons reduce run time. """ if SetOfObjects._core_dist[point] <= epsilon: while not SetOfObjects._processed[point]: SetOfObjects._processed[point] = True SetOfObjects._ordered_list.append(point) point = set_reach_dist(SetOfObjects, point, epsilon) else: SetOfObjects._processed[point] = True
Example #5
Source File: libscores.py From automl_gpu with MIT License | 6 votes |
def prior_log_loss(frac_pos, task = 'binary.classification'): ''' Baseline log loss. For multiplr classes ot labels return the volues for each column''' eps = 1e-15 frac_pos_ = sp.maximum (eps, frac_pos) if (task != 'multiclass.classification'): # binary case frac_neg = 1-frac_pos frac_neg_ = sp.maximum (eps, frac_neg) pos_class_log_loss_ = - frac_pos * np.log(frac_pos_) neg_class_log_loss_ = - frac_neg * np.log(frac_neg_) base_log_loss = pos_class_log_loss_ + neg_class_log_loss_ # base_log_loss = mvmean(base_log_loss) # print('binary {}'.format(base_log_loss)) # In the multilabel case, the right thing i to AVERAGE not sum # We return all the scores so we can normalize correctly later on else: # multiclass case fp = frac_pos_ / sum(frac_pos_) # Need to renormalize the lines in multiclass case # Only ONE label is 1 in the multiclass case active for each line pos_class_log_loss_ = - frac_pos * np.log(fp) base_log_loss = np.sum(pos_class_log_loss_) return base_log_loss # sklearn implementations for comparison
Example #6
Source File: optics.py From REDPy with GNU General Public License v3.0 | 6 votes |
def prep_optics(SetofObjects, epsilon): """ Prep data set for main OPTICS loop SetofObjects: Instantiated instance of 'setOfObjects' class epsilon: Determines maximum object size that can be extracted. Smaller epsilons reduce run time. Returns modified setOfObjects tree structure """ for j in SetofObjects._index: # Find smallest nonzero distance SetofObjects._core_dist[j] = np.sort(SetofObjects.data[j,:])[1]
Example #7
Source File: libscores.py From automl-phase-2 with MIT License | 6 votes |
def prior_log_loss(frac_pos, task = 'binary.classification'): ''' Baseline log loss. For multiplr classes ot labels return the volues for each column''' eps = 1e-15 frac_pos_ = sp.maximum (eps, frac_pos) if (task != 'multiclass.classification'): # binary case frac_neg = 1-frac_pos frac_neg_ = sp.maximum (eps, frac_neg) pos_class_log_loss_ = - frac_pos * np.log(frac_pos_) neg_class_log_loss_ = - frac_neg * np.log(frac_neg_) base_log_loss = pos_class_log_loss_ + neg_class_log_loss_ # base_log_loss = mvmean(base_log_loss) # print('binary {}'.format(base_log_loss)) # In the multilabel case, the right thing i to AVERAGE not sum # We return all the scores so we can normalize correctly later on else: # multiclass case fp = frac_pos_ / sum(frac_pos_) # Need to renormalize the lines in multiclass case # Only ONE label is 1 in the multiclass case active for each line pos_class_log_loss_ = - frac_pos * np.log(fp) base_log_loss = np.sum(pos_class_log_loss_) return base_log_loss # sklearn implementations for comparison
Example #8
Source File: libscores.py From AutoML with MIT License | 6 votes |
def prior_log_loss(frac_pos, task = 'binary.classification'): ''' Baseline log loss. For multiplr classes ot labels return the volues for each column''' eps = 1e-15 frac_pos_ = sp.maximum (eps, frac_pos) if (task != 'multiclass.classification'): # binary case frac_neg = 1-frac_pos frac_neg_ = sp.maximum (eps, frac_neg) pos_class_log_loss_ = - frac_pos * np.log(frac_pos_) neg_class_log_loss_ = - frac_neg * np.log(frac_neg_) base_log_loss = pos_class_log_loss_ + neg_class_log_loss_ # base_log_loss = mvmean(base_log_loss) # print('binary {}'.format(base_log_loss)) # In the multilabel case, the right thing i to AVERAGE not sum # We return all the scores so we can normalize correctly later on else: # multiclass case fp = frac_pos_ / sum(frac_pos_) # Need to renormalize the lines in multiclass case # Only ONE label is 1 in the multiclass case active for each line pos_class_log_loss_ = - frac_pos * np.log(fp) base_log_loss = np.sum(pos_class_log_loss_) return base_log_loss # sklearn implementations for comparison
Example #9
Source File: libscores.py From AutoML with MIT License | 5 votes |
def pac_metric (solution, prediction, task='binary.classification'): ''' Probabilistic Accuracy based on log_loss metric. We assume the solution is in {0, 1} and prediction in [0, 1]. Otherwise, run normalize_array.''' debug_flag=False [sample_num, label_num] = solution.shape if label_num==1: task='binary.classification' eps = 1e-15 the_log_loss = log_loss(solution, prediction, task) # Compute the base log loss (using the prior probabilities) pos_num = 1.* sum(solution) # float conversion! frac_pos = pos_num / sample_num # prior proba of positive class the_base_log_loss = prior_log_loss(frac_pos, task) # Alternative computation of the same thing (slower) # Should always return the same thing except in the multi-label case # For which the analytic solution makes more sense if debug_flag: base_prediction = np.empty(prediction.shape) for k in range(sample_num): base_prediction[k,:] = frac_pos base_log_loss = log_loss(solution, base_prediction, task) diff = np.array(abs(the_base_log_loss-base_log_loss)) if len(diff.shape)>0: diff=max(diff) if(diff)>1e-10: print('Arrggh {} != {}'.format(the_base_log_loss,base_log_loss)) # Exponentiate to turn into an accuracy-like score. # In the multi-label case, we need to average AFTER taking the exp # because it is an NL operation pac = mvmean(np.exp(-the_log_loss)) base_pac = mvmean(np.exp(-the_base_log_loss)) # Normalize: 0 for random, 1 for perfect score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac)) return score
Example #10
Source File: np_utils.py From seq2seq-keyphrase with MIT License | 5 votes |
def binary_logloss(p, y): epsilon = 1e-15 p = sp.maximum(epsilon, p) p = sp.minimum(1-epsilon, p) res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p))) res *= -1.0/len(y) return res
Example #11
Source File: np_utils.py From KerasNeuralFingerprint with MIT License | 5 votes |
def binary_logloss(p, y): epsilon = 1e-15 p = sp.maximum(epsilon, p) p = sp.minimum(1-epsilon, p) res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p))) res *= -1.0/len(y) return res
Example #12
Source File: libscores.py From AutoML with MIT License | 5 votes |
def log_loss(solution, prediction, task = 'binary.classification'): ''' Log loss for binary and multiclass. ''' [sample_num, label_num] = solution.shape eps = 1e-15 pred = np.copy(prediction) # beware: changes in prediction occur through this sol = np.copy(solution) if (task == 'multiclass.classification') and (label_num>1): # Make sure the lines add up to one for multi-class classification norma = np.sum(prediction, axis=1) for k in range(sample_num): pred[k,:] /= sp.maximum (norma[k], eps) # Make sure there is a single label active per line for multi-class classification sol = binarize_predictions(solution, task='multiclass.classification') # For the base prediction, this solution is ridiculous in the multi-label case # Bounding of predictions to avoid log(0),1/0,... pred = sp.minimum (1-eps, sp.maximum (eps, pred)) # Compute the log loss pos_class_log_loss = - mvmean(sol*np.log(pred), axis=0) if (task != 'multiclass.classification') or (label_num==1): # The multi-label case is a bunch of binary problems. # The second class is the negative class for each column. neg_class_log_loss = - mvmean((1-sol)*np.log(1-pred), axis=0) log_loss = pos_class_log_loss + neg_class_log_loss # Each column is an independent problem, so we average. # The probabilities in one line do not add up to one. # log_loss = mvmean(log_loss) # print('binary {}'.format(log_loss)) # In the multilabel case, the right thing i to AVERAGE not sum # We return all the scores so we can normalize correctly later on else: # For the multiclass case the probabilities in one line add up one. log_loss = pos_class_log_loss # We sum the contributions of the columns. log_loss = np.sum(log_loss) #print('multiclass {}'.format(log_loss)) return log_loss
Example #13
Source File: ml.py From kaggle_avazu_benchmark with Apache License 2.0 | 5 votes |
def llfun(act, pred): p_true = pred[:, 1] epsilon = 1e-15 p_true = sp.maximum(epsilon, p_true) p_true = sp.minimum(1 - epsilon, p_true) ll = sum(act * sp.log(p_true) + sp.subtract(1, act) * sp.log(sp.subtract(1, p_true))) ll = ll * -1.0 / len(act) return ll
Example #14
Source File: libscores.py From automl_gpu with MIT License | 5 votes |
def log_loss(solution, prediction, task = 'binary.classification'): ''' Log loss for binary and multiclass. ''' [sample_num, label_num] = solution.shape eps = 1e-15 pred = np.copy(prediction) # beware: changes in prediction occur through this sol = np.copy(solution) if (task == 'multiclass.classification') and (label_num>1): # Make sure the lines add up to one for multi-class classification norma = np.sum(prediction, axis=1) for k in range(sample_num): pred[k,:] /= sp.maximum (norma[k], eps) # Make sure there is a single label active per line for multi-class classification sol = binarize_predictions(solution, task='multiclass.classification') # For the base prediction, this solution is ridiculous in the multi-label case # Bounding of predictions to avoid log(0),1/0,... pred = sp.minimum (1-eps, sp.maximum (eps, pred)) # Compute the log loss pos_class_log_loss = - mvmean(sol*np.log(pred), axis=0) if (task != 'multiclass.classification') or (label_num==1): # The multi-label case is a bunch of binary problems. # The second class is the negative class for each column. neg_class_log_loss = - mvmean((1-sol)*np.log(1-pred), axis=0) log_loss = pos_class_log_loss + neg_class_log_loss # Each column is an independent problem, so we average. # The probabilities in one line do not add up to one. # log_loss = mvmean(log_loss) # print('binary {}'.format(log_loss)) # In the multilabel case, the right thing i to AVERAGE not sum # We return all the scores so we can normalize correctly later on else: # For the multiclass case the probabilities in one line add up one. log_loss = pos_class_log_loss # We sum the contributions of the columns. log_loss = np.sum(log_loss) #print('multiclass {}'.format(log_loss)) return log_loss
Example #15
Source File: classify_nodes.py From PyTorch-Luna16 with Apache License 2.0 | 5 votes |
def logloss(act, pred): epsilon = 1e-15 pred = sp.maximum(epsilon, pred) pred = sp.minimum(1-epsilon, pred) ll = sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred))) ll = ll * -1.0/len(act) return ll
Example #16
Source File: libscores.py From automl_gpu with MIT License | 5 votes |
def pac_metric (solution, prediction, task='binary.classification'): ''' Probabilistic Accuracy based on log_loss metric. We assume the solution is in {0, 1} and prediction in [0, 1]. Otherwise, run normalize_array.''' debug_flag=False [sample_num, label_num] = solution.shape if label_num==1: task='binary.classification' eps = 1e-15 the_log_loss = log_loss(solution, prediction, task) # Compute the base log loss (using the prior probabilities) pos_num = 1.* sum(solution) # float conversion! frac_pos = pos_num / sample_num # prior proba of positive class the_base_log_loss = prior_log_loss(frac_pos, task) # Alternative computation of the same thing (slower) # Should always return the same thing except in the multi-label case # For which the analytic solution makes more sense if debug_flag: base_prediction = np.empty(prediction.shape) for k in range(sample_num): base_prediction[k,:] = frac_pos base_log_loss = log_loss(solution, base_prediction, task) diff = np.array(abs(the_base_log_loss-base_log_loss)) if len(diff.shape)>0: diff=max(diff) if(diff)>1e-10: print('Arrggh {} != {}'.format(the_base_log_loss,base_log_loss)) # Exponentiate to turn into an accuracy-like score. # In the multi-label case, we need to average AFTER taking the exp # because it is an NL operation pac = mvmean(np.exp(-the_log_loss)) base_pac = mvmean(np.exp(-the_base_log_loss)) # Normalize: 0 for random, 1 for perfect score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac)) return score
Example #17
Source File: np_utils.py From CopyNet with MIT License | 5 votes |
def binary_logloss(p, y): epsilon = 1e-15 p = sp.maximum(epsilon, p) p = sp.minimum(1-epsilon, p) res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p))) res *= -1.0/len(y) return res
Example #18
Source File: log_loss.py From classifier-calibration with MIT License | 5 votes |
def log_loss( act, pred ): epsilon = 1e-15 pred = sp.maximum(epsilon, pred) pred = sp.minimum(1-epsilon, pred) ll = sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred))) ll = ll * -1.0/len(act) return ll
Example #19
Source File: utils.py From pCVR with Apache License 2.0 | 5 votes |
def logloss(act, pred): ''' 官方给的损失函数 :param act: :param pred: :return: ''' epsilon = 1e-15 pred = sp.maximum(epsilon, pred) pred = sp.minimum(1 - epsilon, pred) ll = sum(act * sp.log(pred) + sp.subtract(1, act) * sp.log(sp.subtract(1, pred))) ll = ll * -1.0 / len(act) return ll
Example #20
Source File: utils.py From pCVR with Apache License 2.0 | 5 votes |
def my_logloss(act, pred): epsilon = 1e-15 pred = K.maximum(epsilon, pred) pred = K.minimum(1 - epsilon, pred) ll = K.sum(act * K.log(pred) + (1 - act) * K.log(1 - pred)) ll = ll * -1.0 / K.shape(act)[0] return ll
Example #21
Source File: optics.py From REDPy with GNU General Public License v3.0 | 5 votes |
def set_reach_dist(SetOfObjects, point_index, epsilon): """ Sets reachability distance and ordering. This function is the primary workhorse of the OPTICS algorithm. SetofObjects: Instantiated and prepped instance of 'setOfObjects' class epsilon: Determines maximum object size that can be extracted. Smaller epsilons reduce run time. (float) """ row = [SetOfObjects.data[point_index,:]] indices = np.argsort(row) distances = np.sort(row) if scipy.iterable(distances): unprocessed = indices[(SetOfObjects._processed[indices] < 1)[0].T] rdistances = scipy.maximum(distances[(SetOfObjects._processed[indices] < 1)[0].T], SetOfObjects._core_dist[point_index]) SetOfObjects._reachability[unprocessed] = scipy.minimum( SetOfObjects._reachability[unprocessed], rdistances) if unprocessed.size > 0: return unprocessed[np.argsort(np.array(SetOfObjects._reachability[ unprocessed]))[0]] else: return point_index else: return point_index
Example #22
Source File: optics.py From REDPy with GNU General Public License v3.0 | 5 votes |
def build_optics(SetOfObjects, epsilon): """ Builds OPTICS ordered list of clustering structure SetofObjects: Instantiated and prepped instance of 'setOfObjects' class epsilon: Determines maximum object size that can be extracted. Smaller epsilons reduce run time. """ for point in SetOfObjects._index: if not SetOfObjects._processed[point]: expandClusterOrder(SetOfObjects, point, epsilon)
Example #23
Source File: metric.py From mljar-supervised with MIT License | 5 votes |
def logloss(y_true, y_predicted): epsilon = 1e-6 y_predicted = sp.maximum(epsilon, y_predicted) y_predicted = sp.minimum(1 - epsilon, y_predicted) ll = log_loss(y_true, y_predicted) return ll
Example #24
Source File: models.py From automl-phase-2 with MIT License | 5 votes |
def predict(self, X): prediction = self.predict_method(X) # Calibrate proba if self.task != 'regression' and self.postprocessor!=None: prediction = self.postprocessor.predict_proba(prediction) # Keep only 2nd column because the second one is 1-first if self.target_num==1 and len(prediction.shape)>1 and prediction.shape[1]>1: prediction = prediction[:,1] # Make sure the normalization is correct if self.task=='multiclass.classification': eps = 1e-15 norma = np.sum(prediction, axis=1) for k in range(prediction.shape[0]): prediction[k,:] /= sp.maximum(norma[k], eps) return prediction
Example #25
Source File: libscores.py From automl-phase-2 with MIT License | 5 votes |
def log_loss(solution, prediction, task = 'binary.classification'): ''' Log loss for binary and multiclass. ''' [sample_num, label_num] = solution.shape eps = 1e-15 pred = np.copy(prediction) # beware: changes in prediction occur through this sol = np.copy(solution) if (task == 'multiclass.classification') and (label_num>1): # Make sure the lines add up to one for multi-class classification norma = np.sum(prediction, axis=1) for k in range(sample_num): pred[k,:] /= sp.maximum (norma[k], eps) # Make sure there is a single label active per line for multi-class classification sol = binarize_predictions(solution, task='multiclass.classification') # For the base prediction, this solution is ridiculous in the multi-label case # Bounding of predictions to avoid log(0),1/0,... pred = sp.minimum (1-eps, sp.maximum (eps, pred)) # Compute the log loss pos_class_log_loss = - mvmean(sol*np.log(pred), axis=0) if (task != 'multiclass.classification') or (label_num==1): # The multi-label case is a bunch of binary problems. # The second class is the negative class for each column. neg_class_log_loss = - mvmean((1-sol)*np.log(1-pred), axis=0) log_loss = pos_class_log_loss + neg_class_log_loss # Each column is an independent problem, so we average. # The probabilities in one line do not add up to one. # log_loss = mvmean(log_loss) # print('binary {}'.format(log_loss)) # In the multilabel case, the right thing i to AVERAGE not sum # We return all the scores so we can normalize correctly later on else: # For the multiclass case the probabilities in one line add up one. log_loss = pos_class_log_loss # We sum the contributions of the columns. log_loss = np.sum(log_loss) #print('multiclass {}'.format(log_loss)) return log_loss
Example #26
Source File: libscores.py From automl-phase-2 with MIT License | 5 votes |
def pac_metric (solution, prediction, task='binary.classification'): ''' Probabilistic Accuracy based on log_loss metric. We assume the solution is in {0, 1} and prediction in [0, 1]. Otherwise, run normalize_array.''' debug_flag=False [sample_num, label_num] = solution.shape if label_num==1: task='binary.classification' eps = 1e-15 the_log_loss = log_loss(solution, prediction, task) # Compute the base log loss (using the prior probabilities) pos_num = 1.* sum(solution) # float conversion! frac_pos = pos_num / sample_num # prior proba of positive class the_base_log_loss = prior_log_loss(frac_pos, task) # Alternative computation of the same thing (slower) # Should always return the same thing except in the multi-label case # For which the analytic solution makes more sense if debug_flag: base_prediction = np.empty(prediction.shape) for k in range(sample_num): base_prediction[k,:] = frac_pos base_log_loss = log_loss(solution, base_prediction, task) diff = np.array(abs(the_base_log_loss-base_log_loss)) if len(diff.shape)>0: diff=max(diff) if(diff)>1e-10: print('Arrggh {} != {}'.format(the_base_log_loss,base_log_loss)) # Exponentiate to turn into an accuracy-like score. # In the multi-label case, we need to average AFTER taking the exp # because it is an NL operation pac = mvmean(np.exp(-the_log_loss)) base_pac = mvmean(np.exp(-the_base_log_loss)) # Normalize: 0 for random, 1 for perfect score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac)) return score
Example #27
Source File: libscores.py From automl-phase-2 with MIT License | 5 votes |
def bac_metric (solution, prediction, task='binary.classification'): ''' Compute the normalized balanced accuracy. The binarization and the normalization differ for the multi-label and multi-class case. ''' # Convert to 2d if necessary solution = np.array(solution, ndmin=2) if solution.shape[1] == 1: solution = solution.T prediction = np.array(prediction, ndmin=2) if prediction.shape[1] == 1: prediction = prediction.T # Carry on label_num = solution.shape[1] score = np.zeros(label_num) bin_prediction = binarize_predictions(prediction, task) [tn,fp,tp,fn] = acc_stat(solution, bin_prediction) # Bounding to avoid division by 0 eps = 1e-15 tp = sp.maximum (eps, tp) pos_num = sp.maximum (eps, tp+fn) tpr = tp / pos_num # true positive rate (sensitivity) if (task != 'multiclass.classification') or (label_num==1): tn = sp.maximum (eps, tn) neg_num = sp.maximum (eps, tn+fp) tnr = tn / neg_num # true negative rate (specificity) bac = 0.5*(tpr + tnr) base_bac = 0.5 # random predictions for binary case else: bac = tpr base_bac = 1./label_num # random predictions for multiclass case bac = mvmean(bac) # average over all classes # Normalize: 0 for random, 1 for perfect score = (bac - base_bac) / sp.maximum(eps, (1 - base_bac)) return score
Example #28
Source File: py_lh_20Sep2014.py From Predict-click-through-rates-on-display-ads with MIT License | 5 votes |
def logloss(p, y): epsilon = 1e-15 p = sp.maximum(epsilon, p) p = sp.minimum(1-epsilon, p) ll = sum(y*sp.log(p) + sp.subtract(1,y)*sp.log(sp.subtract(1,p))) ll = ll * -1.0/len(y) return ll # B. Apply hash trick of the original csv row # for simplicity, we treat both integer and categorical features as categorical # INPUT: # csv_row: a csv dictionary, ex: {'Lable': '1', 'I1': '357', 'I2': '', ...} # D: the max index that we can hash to # OUTPUT: # x: a list of indices that its value is 1
Example #29
Source File: np_utils.py From CAPTCHA-breaking with MIT License | 5 votes |
def binary_logloss(p, y): epsilon = 1e-15 p = sp.maximum(epsilon, p) p = sp.minimum(1-epsilon, p) res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p))) res *= -1.0/len(y) return res
Example #30
Source File: libscores.py From automl_gpu with MIT License | 4 votes |
def f1_metric (solution, prediction, task='binary.classification'): ''' Compute the normalized f1 measure. The binarization differs for the multi-label and multi-class case. A non-weighted average over classes is taken. The score is normalized.''' label_num = solution.shape[1] score = np.zeros(label_num) bin_prediction = binarize_predictions(prediction, task) [tn,fp,tp,fn] = acc_stat(solution, bin_prediction) # Bounding to avoid division by 0 eps = 1e-15 true_pos_num = sp.maximum (eps, tp+fn) found_pos_num = sp.maximum (eps, tp+fp) tp = sp.maximum (eps, tp) tpr = tp / true_pos_num # true positive rate (recall) ppv = tp / found_pos_num # positive predictive value (precision) arithmetic_mean = 0.5 * sp.maximum (eps, tpr+ppv) # Harmonic mean: f1 = tpr*ppv/arithmetic_mean # Average over all classes f1 = mvmean(f1) # Normalize: 0 for random, 1 for perfect if (task != 'multiclass.classification') or (label_num==1): # How to choose the "base_f1"? # For the binary/multilabel classification case, one may want to predict all 1. # In that case tpr = 1 and ppv = frac_pos. f1 = 2 * frac_pos / (1+frac_pos) # frac_pos = mvmean(solution.ravel()) # base_f1 = 2 * frac_pos / (1+frac_pos) # or predict random values with probability 0.5, in which case # base_f1 = 0.5 # the first solution is better only if frac_pos > 1/3. # The solution in which we predict according to the class prior frac_pos gives # f1 = tpr = ppv = frac_pos, which is worse than 0.5 if frac_pos<0.5 # So, because the f1 score is used if frac_pos is small (typically <0.1) # the best is to assume that base_f1=0.5 base_f1 = 0.5 # For the multiclass case, this is not possible (though it does not make much sense to # use f1 for multiclass problems), so the best would be to assign values at random to get # tpr=ppv=frac_pos, where frac_pos=1/label_num else: base_f1=1./label_num score = (f1 - base_f1) / sp.maximum(eps, (1 - base_f1)) return score