Python Examples of sklearn.metrics.f1

Source File: multi_class_classification.py From edge2vec with BSD 3-Clause "New" or "Revised" License

11 votes

def multi_class_classification(data_X,data_Y):
    '''
    calculate multi-class classification and return related evaluation metrics
    '''

    svc = svm.SVC(C=1, kernel='linear')
    # X_train, X_test, y_train, y_test = train_test_split( data_X, data_Y, test_size=0.4, random_state=0) 
    clf = svc.fit(data_X, data_Y) #svm
    # array = svc.coef_
    # print array
    predicted = cross_val_predict(clf, data_X, data_Y, cv=2)
    print "accuracy",metrics.accuracy_score(data_Y, predicted)
    print "f1 score macro",metrics.f1_score(data_Y, predicted, average='macro') 
    print "f1 score micro",metrics.f1_score(data_Y, predicted, average='micro') 
    print "precision score",metrics.precision_score(data_Y, predicted, average='macro') 
    print "recall score",metrics.recall_score(data_Y, predicted, average='macro') 
    print "hamming_loss",metrics.hamming_loss(data_Y, predicted)
    print "classification_report", metrics.classification_report(data_Y, predicted)
    print "jaccard_similarity_score", metrics.jaccard_similarity_score(data_Y, predicted)
    # print "log_loss", metrics.log_loss(data_Y, predicted)
    print "zero_one_loss", metrics.zero_one_loss(data_Y, predicted)
    # print "AUC&ROC",metrics.roc_auc_score(data_Y, predicted)
    # print "matthews_corrcoef", metrics.matthews_corrcoef(data_Y, predicted)

Source File: metrics_util.py From DeepLearningSmells with Apache License 2.0

8 votes

def get_all_metrics_(eval_labels, pred_labels):
    fpr, tpr, thresholds_keras = roc_curve(eval_labels, pred_labels)
    auc_ = auc(fpr, tpr)
    print("auc_keras:" + str(auc_))

    precision = precision_score(eval_labels, pred_labels)
    print('Precision score: {0:0.2f}'.format(precision))

    recall = recall_score(eval_labels, pred_labels)
    print('Recall score: {0:0.2f}'.format(recall))

    f1 = f1_score(eval_labels, pred_labels)
    print('F1 score: {0:0.2f}'.format(f1))

    average_precision = average_precision_score(eval_labels, pred_labels)
    print('Average precision-recall score: {0:0.2f}'.format(average_precision))

    return auc_, precision, recall, f1, average_precision, fpr, tpr

Source File: link_prediction.py From edge2vec with BSD 3-Clause "New" or "Revised" License

7 votes

def evaluation_analysis(true_label,predicted): 
    '''
    return all metrics results
    '''
    print "accuracy",metrics.accuracy_score(true_label, predicted)
    print "f1 score macro",metrics.f1_score(true_label, predicted, average='macro')     
    print "f1 score micro",metrics.f1_score(true_label, predicted, average='micro') 
    print "precision score",metrics.precision_score(true_label, predicted, average='macro') 
    print "recall score",metrics.recall_score(true_label, predicted, average='macro') 
    print "hamming_loss",metrics.hamming_loss(true_label, predicted)
    print "classification_report", metrics.classification_report(true_label, predicted)
    print "jaccard_similarity_score", metrics.jaccard_similarity_score(true_label, predicted)
    print "log_loss", metrics.log_loss(true_label, predicted)
    print "zero_one_loss", metrics.zero_one_loss(true_label, predicted)
    print "AUC&ROC",metrics.roc_auc_score(true_label, predicted)
    print "matthews_corrcoef", metrics.matthews_corrcoef(true_label, predicted)

Source File: test.py From malss with MIT License

6 votes

def test_classification_2classes_big():
    X, y = make_classification(n_samples=200000,
                               n_features=20,
                               n_classes=2,
                               n_clusters_per_class=1,
                               random_state=0)
    X = pd.DataFrame(X)
    y = pd.Series(y)
    cls = MALSS('classification').fit(X, y,
                                      'test_classification_2classes_big')
    cls.generate_module_sample()

    from sklearn.metrics import f1_score
    pred = cls.predict(X)
    print(f1_score(y, pred, average=None))

    assert len(cls.algorithms) == 1
    assert cls.algorithms[0].best_score is not None

Source File: conv_featuremaps_visualization.py From MCF-3D-CNN with MIT License

6 votes

def accuracy(y_true, y_pred):        
    # 计算混淆矩阵
    y = np.zeros(len(y_true))
    y_ = np.zeros(len(y_true))    
    for i in range(len(y_true)): 
        y[i] = np.argmax(y_true[i,:])
        y_[i] = np.argmax(y_pred[i,:])
    cnf_mat = confusion_matrix(y, y_)
    
    # Acc = 1.0*(cnf_mat[1][1]+cnf_mat[0][0])/len(y_true)
    # Sens = 1.0*cnf_mat[1][1]/(cnf_mat[1][1]+cnf_mat[1][0])
    # Spec = 1.0*cnf_mat[0][0]/(cnf_mat[0][0]+cnf_mat[0][1])
    
    # # 绘制ROC曲线
    # fpr, tpr, thresholds = roc_curve(y_true[:,0], y_pred[:,0])
    # Auc = auc(fpr, tpr)
    
    
    # 计算多分类评价值
    Sens = recall_score(y, y_, average='macro')
    Prec = precision_score(y, y_, average='macro')
    F1 = f1_score(y, y_, average='weighted') 
    Support = precision_recall_fscore_support(y, y_, beta=0.5, average=None)
    return Sens, Prec, F1, cnf_mat

Source File: test.py From malss with MIT License

6 votes

def test_classification_multiclass_small():
    X, y = make_classification(n_samples=1000,
                               n_features=10,
                               n_classes=3,
                               n_clusters_per_class=1,
                               random_state=0)
    X = pd.DataFrame(X)
    y = pd.Series(y)
    cls = MALSS('classification').fit(X, y,
                                      'test_classification_multiclass_small')
    cls.generate_module_sample()

    from sklearn.metrics import f1_score
    pred = cls.predict(X)
    print(f1_score(y, pred, average=None))

    assert len(cls.algorithms) == 5
    assert cls.algorithms[0].best_score is not None

Source File: test.py From malss with MIT License

6 votes

def test_classification_2classes_small_jp():
    X, y = make_classification(n_samples=1000,
                               n_features=10,
                               n_classes=2,
                               n_clusters_per_class=1,
                               random_state=0)
    X = pd.DataFrame(X)
    y = pd.Series(y)
    cls = MALSS('classification',
                lang='jp').fit(X, y, 'test_classification_2classes_small_jp')
    cls.generate_module_sample()

    from sklearn.metrics import f1_score
    pred = cls.predict(X)
    print(f1_score(y, pred, average=None))

    assert len(cls.algorithms) == 5
    assert cls.algorithms[0].best_score is not None

Source File: test.py From malss with MIT License

6 votes

def test_classification_2classes_small():
    X, y = make_classification(n_samples=1000,
                               n_features=10,
                               n_classes=2,
                               n_clusters_per_class=1,
                               random_state=0)
    X = pd.DataFrame(X)
    y = pd.Series(y)
    cls = MALSS('classification').fit(X, y,
                                      'test_classification_2classes_small')
    cls.generate_module_sample()

    from sklearn.metrics import f1_score
    pred = cls.predict(X)
    print(f1_score(y, pred, average=None))

    assert len(cls.algorithms) == 5
    assert cls.algorithms[0].best_score is not None

Source File: utils.py From Attention-Gated-Networks with MIT License

6 votes

def classification_scores(gts, preds, labels):
    accuracy        = metrics.accuracy_score(gts,  preds)
    class_accuracies = []
    for lab in labels: # TODO Fix
        class_accuracies.append(metrics.accuracy_score(gts[gts == lab], preds[gts == lab]))
    class_accuracies = np.array(class_accuracies)

    f1_micro        = metrics.f1_score(gts,        preds, average='micro')
    precision_micro = metrics.precision_score(gts, preds, average='micro')
    recall_micro    = metrics.recall_score(gts,    preds, average='micro')
    f1_macro        = metrics.f1_score(gts,        preds, average='macro')
    precision_macro = metrics.precision_score(gts, preds, average='macro')
    recall_macro    = metrics.recall_score(gts,    preds, average='macro')

    # class wise score
    f1s        = metrics.f1_score(gts,        preds, average=None)
    precisions = metrics.precision_score(gts, preds, average=None)
    recalls    = metrics.recall_score(gts,    preds, average=None)

    confusion = metrics.confusion_matrix(gts,preds, labels=labels)

    #TODO confusion matrix, recall, precision
    return accuracy, f1_micro, precision_micro, recall_micro, f1_macro, precision_macro, recall_macro, confusion, class_accuracies, f1s, precisions, recalls

Source File: test_oqmrc.py From BERT with Apache License 2.0

6 votes

def eval_fn(result):
        i = 0
        total_accuracy = 0
        label, label_id = [], []
        while True:
            try:
                eval_result = sess.run(result)
                total_accuracy += eval_result["accuracy"]
                label_id.extend(eval_result["label_ids"])
                label.extend(eval_result["pred_label"])
                i += 1
            except tf.errors.OutOfRangeError:
                print("End of dataset")
                break
        f1 = f1_score(label_id, label, average="macro")
        accuracy = accuracy_score(label_id, label)
        print("test accuracy accuracy {}".format(total_accuracy/i, accuracy))
        return total_accuracy/ i

Source File: test_wsdm.py From BERT with Apache License 2.0

6 votes

def eval_fn(result):
        i = 0
        total_accuracy = 0
        label, label_id = [], []
        while True:
            try:
                eval_result = sess.run(result)
                total_accuracy += eval_result["accuracy"]
                label_id.extend(eval_result["label_ids"])
                label.extend(eval_result["pred_label"])
                i += 1
            except tf.errors.OutOfRangeError:
                print("End of dataset")
                break
        f1 = f1_score(label_id, label, average="macro")
        accuracy = accuracy_score(label_id, label)
        print("test accuracy accuracy {} {} f1 {}".format(total_accuracy/i, 
            accuracy, f1))
        return total_accuracy/ i, f1

Source File: test_oqmrc_final.py From BERT with Apache License 2.0

6 votes

def eval_fn(result):
        i = 0
        total_accuracy = 0
        label, label_id = [], []
        while True:
            try:
                eval_result = sess.run(result)
                total_accuracy += eval_result["accuracy"]
                label_id.extend(eval_result["label_ids"])
                label.extend(eval_result["pred_label"])
                i += 1
            except tf.errors.OutOfRangeError:
                print("End of dataset")
                break
        f1 = f1_score(label_id, label, average="macro")
        accuracy = accuracy_score(label_id, label)
        print("test accuracy accuracy {} {}, f1 {}".format(total_accuracy/i, 
            accuracy, f1))
        return total_accuracy/ i, f1

Source File: eval_oqmrc.py From BERT with Apache License 2.0

6 votes

def eval_fn(result):
        i = 0
        total_accuracy = 0
        total_loss = 0.0
        label, label_id = [], []
        while True:
            try:
                eval_result = sess.run(result)
                total_accuracy += eval_result["accuracy"]
                label_id.extend(eval_result["label_ids"])
                label.extend(eval_result["pred_label"])
                total_loss += eval_result["loss"]
                i += 1
            except tf.errors.OutOfRangeError:
                print("End of dataset")
                break
        f1 = f1_score(label_id, label, average="macro")
        accuracy = accuracy_score(label_id, label)
        print("test accuracy {} accuracy {} loss {}, f1 {}".format(total_accuracy/i, 
            accuracy, total_loss/i, f1))
        return total_accuracy/ i

Source File: eval_oqmrc_test.py From BERT with Apache License 2.0

6 votes

def eval_fn(result):
        i = 0
        total_accuracy = 0
        total_loss = 0.0
        pred_prob = []
        label, label_id = [], []
        while True:
            try:
                eval_result = sess.run(result)
                total_accuracy += eval_result["accuracy"]
                label_id.extend(eval_result["label_ids"])
                label.extend(eval_result["pred_label"])
                total_loss += eval_result["loss"]
                pred_prob.extend(eval_result["pred_prob"])
                i += 1
            except tf.errors.OutOfRangeError:
                print("End of dataset")
                break
        f1 = f1_score(label_id, label, average="macro")
        accuracy = accuracy_score(label_id, label)
        print("test accuracy {} accuracy {} loss {} f1 {}".format(total_accuracy/i, 
            accuracy, total_loss/i, f1))
        return pred_prob

Source File: eval_wsdm_interaction_test.py From BERT with Apache License 2.0

6 votes

def eval_fn(result):
        i = 0
        total_accuracy = 0
        total_loss = 0.0
        pred_prob = []
        label, label_id = [], []
        while True:
            try:
                eval_result = sess.run(result)
                total_accuracy += eval_result["accuracy"]
                label_id.extend(eval_result["label_ids"])
                label.extend(eval_result["pred_label"])
                total_loss += eval_result["loss"]
                pred_prob.extend(eval_result["pred_prob"])
                i += 1
            except tf.errors.OutOfRangeError:
                print("End of dataset")
                break
        f1 = f1_score(label_id, label, average="macro")
        accuracy = accuracy_score(label_id, label)
        print("test accuracy {} accuracy {} loss {} f1 {}".format(total_accuracy/i, 
            accuracy, total_loss/i, f1))
        return pred_prob

Source File: test_wsdm_interaction.py From BERT with Apache License 2.0

6 votes

def eval_fn(result):
        i = 0
        total_accuracy = 0
        label, label_id = [], []
        while True:
            try:
                eval_result = sess.run(result)
                total_accuracy += eval_result["accuracy"]
                label_id.extend(eval_result["label_ids"])
                label.extend(eval_result["pred_label"])
                i += 1
            except tf.errors.OutOfRangeError:
                print("End of dataset")
                break
        f1 = f1_score(label_id, label, average="macro")
        accuracy = accuracy_score(label_id, label)
        print("test accuracy accuracy {} {} f1 {}".format(total_accuracy/i, 
            accuracy, f1))
        return total_accuracy/ i, f1

Source File: GCN.py From nettack with MIT License

6 votes

def eval_class(ids_to_eval, model, z_obs):
    """
    Evaluate the model's classification performance.

    Parameters
    ----------
    ids_to_eval: np.array
        The indices of the nodes whose predictions will be evaluated.

    model: GCN
        The model to evaluate.

    z_obs: np.array
        The labels of the nodes in ids_to_eval

    Returns
    -------
    [f1_micro, f1_macro] scores

    """
    test_pred = model.predictions.eval(session=model.session, feed_dict={model.node_ids: ids_to_eval}).argmax(1)
    test_real = z_obs[ids_to_eval]

    return f1_score(test_real, test_pred, average='micro'), f1_score(test_real, test_pred, average='macro')

Source File: test.py From malss with MIT License

6 votes

def test_ndarray():
    data = pd.read_csv('http://faculty.marshall.usc.edu/gareth-james/ISL/Heart.csv',
                       index_col=0, na_values=[''])

    y = data['AHD']
    del data['AHD']

    cls = MALSS('classification').fit(np.array(data), np.array(y),
                                      'test_ndarray')
    cls.generate_module_sample()

    from sklearn.metrics import f1_score
    pred = cls.predict(np.array(data))
    print(f1_score(y, pred, average=None))

    assert len(cls.algorithms) == 5
    assert cls.algorithms[0].best_score is not None

Source File: test.py From malss with MIT License

6 votes

def test_classification_2classes_medium():
    X, y = make_classification(n_samples=100000,
                               n_features=10,
                               n_classes=2,
                               n_clusters_per_class=1,
                               random_state=0)
    X = pd.DataFrame(X)
    y = pd.Series(y)
    cls = MALSS('classification').fit(X, y,
                                      'test_classification_2classes_medium')

    from sklearn.metrics import f1_score
    pred = cls.predict(X)
    print(f1_score(y, pred, average=None))

    assert len(cls.algorithms) == 4
    assert cls.algorithms[0].best_score is not None

Source File: textpro.py From comparable-text-miner with Apache License 2.0

6 votes

def evaluate(trueValues, predicted, decimals, note):
	print note
	label = 1
	avg = 'weighted'
	a = accuracy_score(trueValues, predicted)
	p = precision_score(trueValues, predicted, pos_label=label, average=avg)
	r = recall_score(trueValues, predicted, pos_label=label, average=avg)
	avg_f1 = f1_score(trueValues, predicted, pos_label=label, average=avg)
	fclasses = f1_score(trueValues, predicted, average=None)
	f1c1 = fclasses[0]; f1c2 = fclasses[1]
	fw = (f1c1 + f1c2)/2.0

	print 'accuracy:\t', str(round(a,decimals))
	print 'precision:\t', str(round(p,decimals))
	print 'recall:\t', str(round(r,decimals))
	print 'avg f1:\t', str(round(avg_f1,decimals))
	print 'c1 f1:\t', str(round(f1c1,decimals))
	print 'c2 f1:\t', str(round(f1c2,decimals))
	print 'avg(c1,c2):\t', str(round(fw,decimals))
	print '------------'

###################################################################################


# split a parallel or comparable corpus into two parts

Source File: classifier.py From Document-Classifier-LSTM with MIT License

6 votes

def f1_score(y_true, y_pred):
	"""
	Compute the micro f(b) score with b=1.
	"""
	y_true = tf.cast(y_true, "float32")
	y_pred = tf.cast(tf.round(y_pred), "float32") # implicit 0.5 threshold via tf.round
	y_correct = y_true * y_pred


	sum_true = tf.reduce_sum(y_true, axis=1)
	sum_pred = tf.reduce_sum(y_pred, axis=1)
	sum_correct = tf.reduce_sum(y_correct, axis=1)


	precision = sum_correct / sum_pred
	recall = sum_correct / sum_true
	f_score = 2 * precision * recall / (precision + recall)
	f_score = tf.where(tf.is_nan(f_score), tf.zeros_like(f_score), f_score)


	return tf.reduce_mean(f_score)

Source File: classifier.py From Document-Classifier-LSTM with MIT License

6 votes

def load_model(stamp):
	"""
	"""

	json_file = open(stamp+'.json', 'r')
	loaded_model_json = json_file.read()
	json_file.close()
	model = model_from_json(loaded_model_json, {'AttentionWithContext': AttentionWithContext})

	model.load_weights(stamp+'.h5')
	print("Loaded model from disk")

	model.summary()


	adam = Adam(lr=0.001)
	model.compile(loss='binary_crossentropy',
		optimizer=adam,
		metrics=[f1_score])


	return model

Source File: hatt_classifier.py From Document-Classifier-LSTM with MIT License

6 votes

def f1_score(y_true, y_pred):
	"""
	Compute the micro f(b) score with b=1.
	"""
	y_true = tf.cast(y_true, "float32")
	y_pred = tf.cast(tf.round(y_pred), "float32") # implicit 0.5 threshold via tf.round
	y_correct = y_true * y_pred


	sum_true = tf.reduce_sum(y_true, axis=1)
	sum_pred = tf.reduce_sum(y_pred, axis=1)
	sum_correct = tf.reduce_sum(y_correct, axis=1)


	precision = sum_correct / sum_pred
	recall = sum_correct / sum_true
	f_score = 2 * precision * recall / (precision + recall)
	f_score = tf.where(tf.is_nan(f_score), tf.zeros_like(f_score), f_score)


	return tf.reduce_mean(f_score)

Source File: tf_hisan.py From Projects with MIT License

6 votes

def score(self,data,labels,batch_size=64):
        '''
        return the micro and macro f-score of predicted labels on given data

        parameters:
          - data: numpy array
            3d numpy array (doc x sentence x word ids) of input data
          - labels: numpy array
            1d numpy array of labels for given data
          - batch size: int (default: 64)
            batch size to use during inference
        
        outputs:
            tuple of floats (micro,macro) representing micro and macro f-score
            of predicted labels on given data
        '''     
        
        y_pred = self.predict(data,batch_size)
        micro = f1_score(labels,y_pred,average='micro')
        macro = f1_score(labels,y_pred,average='macro')
        return micro,macro

Source File: tf_han.py From Projects with MIT License

6 votes

def score(self,data,labels,batch_size=64):
        '''
        return the micro and macro f-score of predicted labels on given data

        parameters:
          - data: numpy array
            3d numpy array (doc x sentence x word ids) of input data
          - labels: numpy array
            1d numpy array of labels for given data
          - batch size: int (default: 64)
            batch size to use during inference
        
        outputs:
            tuple of floats (micro,macro) representing micro and macro f-score
            of predicted labels on given data
        '''  
        
        y_pred = self.predict(data,batch_size)
        micro = f1_score(labels,y_pred,average='micro')
        macro = f1_score(labels,y_pred,average='macro')
        return micro,macro

Source File: metrics_util.py From DeepLearningSmells with Apache License 2.0

6 votes

def get_all_metrics(model, eval_data, eval_labels, pred_labels):
    fpr, tpr, thresholds_keras = roc_curve(eval_labels, pred_labels)
    auc_ = auc(fpr, tpr)
    print("auc_keras:" + str(auc_))

    score = model.evaluate(eval_data, eval_labels, verbose=0)
    print("Test accuracy: " + str(score[1]))

    precision = precision_score(eval_labels, pred_labels)
    print('Precision score: {0:0.2f}'.format(precision))

    recall = recall_score(eval_labels, pred_labels)
    print('Recall score: {0:0.2f}'.format(recall))

    f1 = f1_score(eval_labels, pred_labels)
    print('F1 score: {0:0.2f}'.format(f1))

    average_precision = average_precision_score(eval_labels, pred_labels)
    print('Average precision-recall score: {0:0.2f}'.format(average_precision))

    return auc_, score[1], precision, recall, f1, average_precision, fpr, tpr

Source File: tf_cnn.py From Projects with MIT License

6 votes

def score(self,data,labels,batch_size=64):
        '''
        return the micro and macro f-score of predicted labels on given data

        parameters:
          - data: numpy array
            2d numpy array (doc x word ids) of input data
          - labels: numpy array
            1d numpy array of labels for given data
          - batch size: int (default: 64)
            batch size to use during inference
        
        outputs:
            tuple of floats (micro,macro) representing micro and macro f-score
            of predicted labels on given data
        ''' 
        
        y_pred = self.predict(data,batch_size)
        micro = f1_score(labels,y_pred,average='micro')
        macro = f1_score(labels,y_pred,average='macro')
        return micro,macro

Source File: tf_san.py From Projects with MIT License

6 votes

def score(self,data,labels,batch_size=64):
        '''
        return the micro and macro f-score of predicted labels on given data

        parameters:
          - data: numpy array
            2d numpy array (doc x word ids) of input data
          - labels: numpy array
            1d numpy array of labels for given data
          - batch size: int (default: 64)
            batch size to use during inference
        
        outputs:
            tuple of floats (micro,macro) representing micro and macro f-score
            of predicted labels on given data
        ''' 
        
        y_pred = self.predict(data,batch_size)
        micro = f1_score(labels,y_pred,average='micro')
        macro = f1_score(labels,y_pred,average='macro')
        return micro,macro

Source File: pcnn_model.py From PCNN with Apache License 2.0

6 votes

def run_evaluate(self, test):
        """Evaluates performance on test set

        Args:
            test: dataset that yields tuple of (sentences, relation tags)

        Returns:
            metrics: (dict) metrics["acc"] = 98.4, ...

        """
        y_true, y_pred = [], []
        for data in minibatches(test, self.config.batch_size):
            word_batch, pos1_batch, pos2_batch, pos_batch, y_batch = data
            relations_pred = self.predict_batch(word_batch, pos1_batch, pos2_batch, pos_batch)
            assert len(relations_pred) == len(y_batch)
            y_true += y_batch
            y_pred += relations_pred.tolist()

        acc = accuracy_score(y_true, y_pred)
        p   = precision_score(y_true, y_pred, average='macro')
        r   = recall_score(y_true, y_pred, average='macro')
        f1  = f1_score(y_true, y_pred, average='macro')

        return {"acc":acc, "p":p, "r":r, "f1":f1}

Source File: train_sampling_unsupervised.py From dgl with Apache License 2.0

6 votes

def compute_acc(emb, labels, train_nids, val_nids, test_nids):
    """
    Compute the accuracy of prediction given the labels.
    """
    emb = emb.cpu().numpy()
    train_nids = train_nids.cpu().numpy()
    train_labels = labels[train_nids].cpu().numpy()
    val_nids = val_nids.cpu().numpy()
    val_labels = labels[val_nids].cpu().numpy()
    test_nids = test_nids.cpu().numpy()
    test_labels = labels[test_nids].cpu().numpy()

    emb = (emb - emb.mean(0, keepdims=True)) / emb.std(0, keepdims=True)

    lr = lm.LogisticRegression(multi_class='multinomial', max_iter=10000)
    lr.fit(emb[train_nids], labels[train_nids])

    pred = lr.predict(emb)
    f1_micro_eval = skm.f1_score(labels[val_nids], pred[val_nids], average='micro')
    f1_micro_test = skm.f1_score(labels[test_nids], pred[test_nids], average='micro')
    f1_macro_eval = skm.f1_score(labels[val_nids], pred[val_nids], average='macro')
    f1_macro_test = skm.f1_score(labels[test_nids], pred[test_nids], average='macro')
    return f1_micro_eval, f1_micro_test

Python sklearn.metrics.f1_score() Examples