Python Examples of scipy.minimum

Source File: data.py From BERT with Apache License 2.0

6 votes

def load_question(params):
    df = pd.read_csv(config.QUESTION_FILE)
    df["words"] = df.words.str.split(" ").apply(lambda x: [_to_ind(z) for z in x])
    df["chars"] = df.chars.str.split(" ").apply(lambda x: [_to_ind(z) for z in x])
    Q = {}
    Q["seq_len_word"] = sp.minimum(df["words"].apply(len).values, params["max_seq_len_word"])
    Q["seq_len_char"] = sp.minimum(df["chars"].apply(len).values, params["max_seq_len_char"])
    Q["words"] = pad_sequences(df["words"],
                               maxlen=params["max_seq_len_word"],
                               padding=params["pad_sequences_padding"],
                               truncating=params["pad_sequences_truncating"],
                               value=config.PADDING_INDEX_WORD)
    Q["chars"] = pad_sequences(df["chars"],
                               maxlen=params["max_seq_len_char"],
                               padding=params["pad_sequences_padding"],
                               truncating=params["pad_sequences_truncating"],
                               value=config.PADDING_INDEX_CHAR)
    return Q

Source File: math_util.py From smappPy with GNU General Public License v2.0

6 votes

def log_loss(actual, predicted, epsilon=1e-15):
    """
    Calculates and returns the log loss (error) of a set of predicted probabilities
    (hint: see sklearn classifier's predict_proba methods).

    Source: https://www.kaggle.com/wiki/LogarithmicLoss
    
    In plain English, this error metric is typically used where you have to predict 
    that something is true or false with a probability (likelihood) ranging from 
    definitely true (1) to equally true (0.5) to definitely false(0).

    Note: also see (and use) scikitlearn: 
    http://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html#sklearn.metrics.log_loss
    """
    predicted = sp.maximum(epsilon, predicted)
    predicted = sp.minimum(1-epsilon, predicted)
    ll = sum(actual*sp.log(predicted) + sp.subtract(1,actual)*sp.log(sp.subtract(1,predicted)))
    ll = ll * -1.0/len(actual)
    return ll

Source File: data.py From tensorflow-DSMM with MIT License

6 votes

def load_question(params):
    df = pd.read_csv(config.QUESTION_FILE)
    df["words"] = df.words.str.split(" ").apply(lambda x: [_to_ind(z) for z in x])
    df["chars"] = df.chars.str.split(" ").apply(lambda x: [_to_ind(z) for z in x])
    Q = {}
    Q["seq_len_word"] = sp.minimum(df["words"].apply(len).values, params["max_seq_len_word"])
    Q["seq_len_char"] = sp.minimum(df["chars"].apply(len).values, params["max_seq_len_char"])
    Q["words"] = pad_sequences(df["words"],
                               maxlen=params["max_seq_len_word"],
                               padding=params["pad_sequences_padding"],
                               truncating=params["pad_sequences_truncating"],
                               value=config.PADDING_INDEX_WORD)
    Q["chars"] = pad_sequences(df["chars"],
                               maxlen=params["max_seq_len_char"],
                               padding=params["pad_sequences_padding"],
                               truncating=params["pad_sequences_truncating"],
                               value=config.PADDING_INDEX_CHAR)
    return Q

Source File: np_utils.py From CopyNet with MIT License

5 votes

def binary_logloss(p, y):
    epsilon = 1e-15
    p = sp.maximum(epsilon, p)
    p = sp.minimum(1-epsilon, p)
    res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p)))
    res *= -1.0/len(y)
    return res

Source File: np_utils.py From CAPTCHA-breaking with MIT License

5 votes

def binary_logloss(p, y):
    epsilon = 1e-15
    p = sp.maximum(epsilon, p)
    p = sp.minimum(1-epsilon, p)
    res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p)))
    res *= -1.0/len(y)
    return res

Source File: classify_nodes.py From PyTorch-Luna16 with Apache License 2.0

5 votes

def logloss(act, pred):
    epsilon = 1e-15
    pred = sp.maximum(epsilon, pred)
    pred = sp.minimum(1-epsilon, pred)
    ll = sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred)))
    ll = ll * -1.0/len(act)
    return ll

Source File: ml.py From kaggle_avazu_benchmark with Apache License 2.0

5 votes

def llfun(act, pred):
    p_true = pred[:, 1]
    epsilon = 1e-15
    p_true = sp.maximum(epsilon, p_true)
    p_true = sp.minimum(1 - epsilon, p_true)
    ll = sum(act * sp.log(p_true) + sp.subtract(1, act) * sp.log(sp.subtract(1, p_true)))
    ll = ll * -1.0 / len(act)
    return ll

Source File: libscores.py From AutoML with MIT License

5 votes

def log_loss(solution, prediction, task = 'binary.classification'):
    ''' Log loss for binary and multiclass. '''
    [sample_num, label_num] = solution.shape
    eps = 1e-15
    
    pred = np.copy(prediction) # beware: changes in prediction occur through this
    sol = np.copy(solution)
    if (task == 'multiclass.classification') and (label_num>1):
        # Make sure the lines add up to one for multi-class classification
        norma = np.sum(prediction, axis=1)
        for k in range(sample_num):
            pred[k,:] /= sp.maximum (norma[k], eps) 
        # Make sure there is a single label active per line for multi-class classification
        sol = binarize_predictions(solution, task='multiclass.classification')
        # For the base prediction, this solution is ridiculous in the multi-label case
    
    # Bounding of predictions to avoid log(0),1/0,...
    pred = sp.minimum (1-eps, sp.maximum (eps, pred))
    # Compute the log loss    
    pos_class_log_loss = - mvmean(sol*np.log(pred), axis=0)
    if (task != 'multiclass.classification') or (label_num==1):
        # The multi-label case is a bunch of binary problems.
        # The second class is the negative class for each column.
        neg_class_log_loss = - mvmean((1-sol)*np.log(1-pred), axis=0)
        log_loss = pos_class_log_loss + neg_class_log_loss
        # Each column is an independent problem, so we average.
        # The probabilities in one line do not add up to one.
        # log_loss = mvmean(log_loss) 
        # print('binary {}'.format(log_loss))
        # In the multilabel case, the right thing i to AVERAGE not sum
        # We return all the scores so we can normalize correctly later on
    else:
        # For the multiclass case the probabilities in one line add up one.
        log_loss = pos_class_log_loss
        # We sum the contributions of the columns.
        log_loss = np.sum(log_loss) 
        #print('multiclass {}'.format(log_loss))
    return log_loss

Source File: np_utils.py From KerasNeuralFingerprint with MIT License

5 votes

def binary_logloss(p, y):
    epsilon = 1e-15
    p = sp.maximum(epsilon, p)
    p = sp.minimum(1-epsilon, p)
    res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p)))
    res *= -1.0/len(y)
    return res

Source File: libscores.py From automl_gpu with MIT License

5 votes

def log_loss(solution, prediction, task = 'binary.classification'):
    ''' Log loss for binary and multiclass. '''
    [sample_num, label_num] = solution.shape
    eps = 1e-15
    
    pred = np.copy(prediction) # beware: changes in prediction occur through this
    sol = np.copy(solution)
    if (task == 'multiclass.classification') and (label_num>1):
        # Make sure the lines add up to one for multi-class classification
        norma = np.sum(prediction, axis=1)
        for k in range(sample_num):
            pred[k,:] /= sp.maximum (norma[k], eps) 
        # Make sure there is a single label active per line for multi-class classification
        sol = binarize_predictions(solution, task='multiclass.classification')
        # For the base prediction, this solution is ridiculous in the multi-label case
    
    # Bounding of predictions to avoid log(0),1/0,...
    pred = sp.minimum (1-eps, sp.maximum (eps, pred))
    # Compute the log loss    
    pos_class_log_loss = - mvmean(sol*np.log(pred), axis=0)
    if (task != 'multiclass.classification') or (label_num==1):
        # The multi-label case is a bunch of binary problems.
        # The second class is the negative class for each column.
        neg_class_log_loss = - mvmean((1-sol)*np.log(1-pred), axis=0)
        log_loss = pos_class_log_loss + neg_class_log_loss
        # Each column is an independent problem, so we average.
        # The probabilities in one line do not add up to one.
        # log_loss = mvmean(log_loss) 
        # print('binary {}'.format(log_loss))
        # In the multilabel case, the right thing i to AVERAGE not sum
        # We return all the scores so we can normalize correctly later on
    else:
        # For the multiclass case the probabilities in one line add up one.
        log_loss = pos_class_log_loss
        # We sum the contributions of the columns.
        log_loss = np.sum(log_loss) 
        #print('multiclass {}'.format(log_loss))
    return log_loss

Source File: connectivity.py From dynamo-release with BSD 3-Clause "New" or "Revised" License

5 votes

def mnn_from_list(knn_graph_list):
    """Apply reduce function to calculate the mutual kNN.
    """
    import functools

    mnn = (
        functools.reduce(scipy.sparse.csr.csr_matrix.minimum, knn_graph_list)
        if issparse(knn_graph_list[0])
        else functools.reduce(scipy.minimum, knn_graph_list)
    )

    return mnn

Source File: log_loss.py From classifier-calibration with MIT License

5 votes

def log_loss( act, pred ):
	epsilon = 1e-15
	pred = sp.maximum(epsilon, pred)
	pred = sp.minimum(1-epsilon, pred)
	ll = sum(act*sp.log(pred) + sp.subtract(1,act)*sp.log(sp.subtract(1,pred)))
	ll = ll * -1.0/len(act)
	return ll

Source File: utils.py From pCVR with Apache License 2.0

5 votes

def logloss(act, pred):
    '''
    官方给的损失函数
    :param act: 
    :param pred: 
    :return: 
    '''
    epsilon = 1e-15
    pred = sp.maximum(epsilon, pred)
    pred = sp.minimum(1 - epsilon, pred)
    ll = sum(act * sp.log(pred) + sp.subtract(1, act) * sp.log(sp.subtract(1, pred)))
    ll = ll * -1.0 / len(act)
    return ll

Source File: utils.py From pCVR with Apache License 2.0

5 votes

def my_logloss(act, pred):
    epsilon = 1e-15
    pred = K.maximum(epsilon, pred)
    pred = K.minimum(1 - epsilon, pred)
    ll = K.sum(act * K.log(pred) + (1 - act) * K.log(1 - pred))
    ll = ll * -1.0 / K.shape(act)[0]

    return ll

Source File: optics.py From REDPy with GNU General Public License v3.0

5 votes

def set_reach_dist(SetOfObjects, point_index, epsilon):

    """
    Sets reachability distance and ordering. This function is the primary workhorse of
    the OPTICS algorithm.
    
    SetofObjects: Instantiated and prepped instance of 'setOfObjects' class
    epsilon: Determines maximum object size that can be extracted. Smaller epsilons
        reduce run time. (float)

    """
    
    row = [SetOfObjects.data[point_index,:]]
    indices = np.argsort(row)
    distances = np.sort(row)

    if scipy.iterable(distances):

        unprocessed = indices[(SetOfObjects._processed[indices] < 1)[0].T]
        rdistances = scipy.maximum(distances[(SetOfObjects._processed[indices] < 1)[0].T],
            SetOfObjects._core_dist[point_index])
        SetOfObjects._reachability[unprocessed] = scipy.minimum(
            SetOfObjects._reachability[unprocessed], rdistances)

        if unprocessed.size > 0:
            return unprocessed[np.argsort(np.array(SetOfObjects._reachability[
                unprocessed]))[0]]
        else:
            return point_index
    else:
        return point_index

Source File: metric.py From mljar-supervised with MIT License

5 votes

def logloss(y_true, y_predicted):
    epsilon = 1e-6
    y_predicted = sp.maximum(epsilon, y_predicted)
    y_predicted = sp.minimum(1 - epsilon, y_predicted)
    ll = log_loss(y_true, y_predicted)
    return ll

Source File: libscores.py From automl-phase-2 with MIT License

5 votes

def log_loss(solution, prediction, task = 'binary.classification'):
    ''' Log loss for binary and multiclass. '''
    [sample_num, label_num] = solution.shape
    eps = 1e-15
    
    pred = np.copy(prediction) # beware: changes in prediction occur through this
    sol = np.copy(solution)
    if (task == 'multiclass.classification') and (label_num>1):
        # Make sure the lines add up to one for multi-class classification
        norma = np.sum(prediction, axis=1)
        for k in range(sample_num):
            pred[k,:] /= sp.maximum (norma[k], eps) 
        # Make sure there is a single label active per line for multi-class classification
        sol = binarize_predictions(solution, task='multiclass.classification')
        # For the base prediction, this solution is ridiculous in the multi-label case
    
    # Bounding of predictions to avoid log(0),1/0,...
    pred = sp.minimum (1-eps, sp.maximum (eps, pred))
    # Compute the log loss    
    pos_class_log_loss = - mvmean(sol*np.log(pred), axis=0)
    if (task != 'multiclass.classification') or (label_num==1):
        # The multi-label case is a bunch of binary problems.
        # The second class is the negative class for each column.
        neg_class_log_loss = - mvmean((1-sol)*np.log(1-pred), axis=0)
        log_loss = pos_class_log_loss + neg_class_log_loss
        # Each column is an independent problem, so we average.
        # The probabilities in one line do not add up to one.
        # log_loss = mvmean(log_loss) 
        # print('binary {}'.format(log_loss))
        # In the multilabel case, the right thing i to AVERAGE not sum
        # We return all the scores so we can normalize correctly later on
    else:
        # For the multiclass case the probabilities in one line add up one.
        log_loss = pos_class_log_loss
        # We sum the contributions of the columns.
        log_loss = np.sum(log_loss) 
        #print('multiclass {}'.format(log_loss))
    return log_loss

Source File: py_lh_20Sep2014.py From Predict-click-through-rates-on-display-ads with MIT License

5 votes

def logloss(p, y):
    epsilon = 1e-15
    p = sp.maximum(epsilon, p)
    p = sp.minimum(1-epsilon, p)
    ll = sum(y*sp.log(p) + sp.subtract(1,y)*sp.log(sp.subtract(1,p)))
    ll = ll * -1.0/len(y)
    return ll

# B. Apply hash trick of the original csv row
# for simplicity, we treat both integer and categorical features as categorical
# INPUT:
#     csv_row: a csv dictionary, ex: {'Lable': '1', 'I1': '357', 'I2': '', ...}
#     D: the max index that we can hash to
# OUTPUT:
#     x: a list of indices that its value is 1

Source File: np_utils.py From seq2seq-keyphrase with MIT License

5 votes

def binary_logloss(p, y):
    epsilon = 1e-15
    p = sp.maximum(epsilon, p)
    p = sp.minimum(1-epsilon, p)
    res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p)))
    res *= -1.0/len(y)
    return res

Python scipy.minimum() Examples