Python Examples of sklearn.metrics.cluster.adjusted_rand

Source File: test_supervised.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_non_consecutive_labels():
    # regression tests for labels with gaps
    h, c, v = homogeneity_completeness_v_measure(
        [0, 0, 0, 2, 2, 2],
        [0, 1, 0, 1, 2, 2])
    assert_almost_equal(h, 0.67, 2)
    assert_almost_equal(c, 0.42, 2)
    assert_almost_equal(v, 0.52, 2)

    h, c, v = homogeneity_completeness_v_measure(
        [0, 0, 0, 1, 1, 1],
        [0, 4, 0, 4, 2, 2])
    assert_almost_equal(h, 0.67, 2)
    assert_almost_equal(c, 0.42, 2)
    assert_almost_equal(v, 0.52, 2)

    ari_1 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2])
    ari_2 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 4, 0, 4, 2, 2])
    assert_almost_equal(ari_1, 0.24, 2)
    assert_almost_equal(ari_2, 0.24, 2)

Source File: test_gaussian_mixture.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_gaussian_mixture_predict_predict_proba():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        Y = rand_data.Y
        g = GaussianMixture(n_components=rand_data.n_components,
                            random_state=rng, weights_init=rand_data.weights,
                            means_init=rand_data.means,
                            precisions_init=rand_data.precisions[covar_type],
                            covariance_type=covar_type)

        # Check a warning message arrive if we don't do fit
        assert_raise_message(NotFittedError,
                             "This GaussianMixture instance is not fitted "
                             "yet. Call 'fit' with appropriate arguments "
                             "before using this method.", g.predict, X)

        g.fit(X)
        Y_pred = g.predict(X)
        Y_pred_proba = g.predict_proba(X).argmax(axis=1)
        assert_array_equal(Y_pred, Y_pred_proba)
        assert_greater(adjusted_rand_score(Y, Y_pred), .95)

Source File: test_supervised.py From twitter-stock-recommendation with MIT License

6 votes

def test_non_consicutive_labels():
    # regression tests for labels with gaps
    h, c, v = homogeneity_completeness_v_measure(
        [0, 0, 0, 2, 2, 2],
        [0, 1, 0, 1, 2, 2])
    assert_almost_equal(h, 0.67, 2)
    assert_almost_equal(c, 0.42, 2)
    assert_almost_equal(v, 0.52, 2)

    h, c, v = homogeneity_completeness_v_measure(
        [0, 0, 0, 1, 1, 1],
        [0, 4, 0, 4, 2, 2])
    assert_almost_equal(h, 0.67, 2)
    assert_almost_equal(c, 0.42, 2)
    assert_almost_equal(v, 0.52, 2)

    ari_1 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2])
    ari_2 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 4, 0, 4, 2, 2])
    assert_almost_equal(ari_1, 0.24, 2)
    assert_almost_equal(ari_2, 0.24, 2)

Source File: test_gaussian_mixture.py From twitter-stock-recommendation with MIT License

6 votes

def test_gaussian_mixture_predict_predict_proba():
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        Y = rand_data.Y
        g = GaussianMixture(n_components=rand_data.n_components,
                            random_state=rng, weights_init=rand_data.weights,
                            means_init=rand_data.means,
                            precisions_init=rand_data.precisions[covar_type],
                            covariance_type=covar_type)

        # Check a warning message arrive if we don't do fit
        assert_raise_message(NotFittedError,
                             "This GaussianMixture instance is not fitted "
                             "yet. Call 'fit' with appropriate arguments "
                             "before using this method.", g.predict, X)

        g.fit(X)
        Y_pred = g.predict(X)
        Y_pred_proba = g.predict_proba(X).argmax(axis=1)
        assert_array_equal(Y_pred, Y_pred_proba)
        assert_greater(adjusted_rand_score(Y, Y_pred), .95)

Source File: test_score_objects.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_scoring_is_not_metric():
    assert_raises_regexp(ValueError, 'make_scorer', check_scoring,
                         LogisticRegression(), f1_score)
    assert_raises_regexp(ValueError, 'make_scorer', check_scoring,
                         LogisticRegression(), roc_auc_score)
    assert_raises_regexp(ValueError, 'make_scorer', check_scoring,
                         Ridge(), r2_score)
    assert_raises_regexp(ValueError, 'make_scorer', check_scoring,
                         KMeans(), cluster_module.adjusted_rand_score)

Source File: test_supervised.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_adjustment_for_chance():
    # Check that adjusted scores are almost zero on random labels
    n_clusters_range = [2, 10, 50, 90]
    n_samples = 100
    n_runs = 10

    scores = uniform_labelings_scores(
        adjusted_rand_score, n_samples, n_clusters_range, n_runs)

    max_abs_scores = np.abs(scores).max(axis=1)
    assert_array_almost_equal(max_abs_scores, [0.02, 0.03, 0.03, 0.02], 2)

Source File: test_bayesian_mixture.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_bayesian_mixture_predict_predict_proba():
    # this is the same test as test_gaussian_mixture_predict_predict_proba()
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    for prior_type in PRIOR_TYPE:
        for covar_type in COVARIANCE_TYPE:
            X = rand_data.X[covar_type]
            Y = rand_data.Y
            bgmm = BayesianGaussianMixture(
                n_components=rand_data.n_components,
                random_state=rng,
                weight_concentration_prior_type=prior_type,
                covariance_type=covar_type)

            # Check a warning message arrive if we don't do fit
            assert_raise_message(NotFittedError,
                                 "This BayesianGaussianMixture instance"
                                 " is not fitted yet. Call 'fit' with "
                                 "appropriate arguments before using "
                                 "this method.", bgmm.predict, X)

            bgmm.fit(X)
            Y_pred = bgmm.predict(X)
            Y_pred_proba = bgmm.predict_proba(X).argmax(axis=1)
            assert_array_equal(Y_pred, Y_pred_proba)
            assert_greater_equal(adjusted_rand_score(Y, Y_pred), .95)

Source File: eval-partition-accuracy.py From topic-ensemble with Apache License 2.0

5 votes

def validate( measure, classes, clustering ):
	if measure == "nmi":
		return normalized_mutual_info_score( classes, clustering )
	elif measure == "ami":
		return adjusted_mutual_info_score( classes, clustering )
	elif measure == "ari":
		return adjusted_rand_score( classes, clustering )
	log.error("Unknown validation measure: %s" % measure )
	return None

# --------------------------------------------------------------

Source File: metric.py From L2C with MIT License

5 votes

def clusterscores(self):
        target,pred = self.conf2label()
        NMI = normalized_mutual_info_score(target,pred)
        ARI = adjusted_rand_score(target,pred)
        AMI = adjusted_mutual_info_score(target,pred)
        return {'NMI':NMI,'ARI':ARI,'AMI':AMI}

Source File: test_score_objects.py From twitter-stock-recommendation with MIT License

5 votes

def test_scoring_is_not_metric():
    assert_raises_regexp(ValueError, 'make_scorer', check_scoring,
                         LogisticRegression(), f1_score)
    assert_raises_regexp(ValueError, 'make_scorer', check_scoring,
                         LogisticRegression(), roc_auc_score)
    assert_raises_regexp(ValueError, 'make_scorer', check_scoring,
                         Ridge(), r2_score)
    assert_raises_regexp(ValueError, 'make_scorer', check_scoring,
                         KMeans(), cluster_module.adjusted_rand_score)

Source File: test_supervised.py From twitter-stock-recommendation with MIT License

5 votes

def test_adjustment_for_chance():
    # Check that adjusted scores are almost zero on random labels
    n_clusters_range = [2, 10, 50, 90]
    n_samples = 100
    n_runs = 10

    scores = uniform_labelings_scores(
        adjusted_rand_score, n_samples, n_clusters_range, n_runs)

    max_abs_scores = np.abs(scores).max(axis=1)
    assert_array_almost_equal(max_abs_scores, [0.02, 0.03, 0.03, 0.02], 2)

Source File: test_gmm.py From twitter-stock-recommendation with MIT License

5 votes

def assert_fit_predict_correct(model, X):
    model2 = copy.deepcopy(model)

    predictions_1 = model.fit(X).predict(X)
    predictions_2 = model2.fit_predict(X)

    assert adjusted_rand_score(predictions_1, predictions_2) == 1.0


# This function tests the deprecated old GMM class

Source File: validation.py From topic-stability with Apache License 2.0

5 votes

def evaluate( self, partition, clustered_ids ):
		# no class info?
		if not self.has_class_info():
			return {}
		# get two clusterings that we can compare
		n = len(clustered_ids)
		classes_subset = np.zeros( n )
		for row in range(n):
			classes_subset[row] = self.class_map[clustered_ids[row]]		
		scores = {}
		scores["external-nmi"] = normalized_mutual_info_score( classes_subset, partition )
		scores["external-ami"] = adjusted_mutual_info_score( classes_subset, partition )
		scores["external-ari"] = adjusted_rand_score( classes_subset, partition )
		return scores

Source File: main.py From DCCM with GNU General Public License v3.0

4 votes

def test(loader, model, epoch, tb_logger):
	logger = logging.getLogger('global_logger')

	model.eval()

	# Forward and save predicted labels
	gnd_labels = []
	pred_labels = []
	for i, (input_tensor, target) in enumerate(loader):
		input_var = torch.autograd.Variable(input_tensor.cuda())
		with torch.no_grad():
			if args.split:
				vec_list = []
				bs = args.large_bs // args.split
				for kk in range(args.split):
					temp, _, _ = forward(model, input_var[kk*bs:(kk+1)*bs], 
						  args.layers, args.c_layer)
					vec_list.append(temp)
				vec = torch.cat(vec_list, dim=0)
			else:
				vec, _, _ = forward(model, input_var, args.layers, args.c_layer)

		_, indices = torch.max(vec, 1)
		gnd_labels.extend(target.data.numpy())
		pred_labels.extend(indices.data.cpu().numpy())

	# Computing Evaluations
	gnd_labels = np.array(gnd_labels)
	pred_labels = np.array(pred_labels)
	
	nmi = normalized_mutual_info_score(gnd_labels, pred_labels)
	acc = clustering_acc(gnd_labels, pred_labels)
	ari = adjusted_rand_score(gnd_labels, pred_labels)

	# Logging
	logger.info('Epoch: [{0}/{1}]\t ARI against ground truth label: {2:.3f}'.format(epoch, args.epochs, ari))
	logger.info('Epoch: [{0}/{1}]\t NMI against ground truth label: {2:.3f}'.format(epoch, args.epochs, nmi)) 
	logger.info('Epoch: [{0}/{1}]\t ACC against ground truth label: {2:.3f}'.format(epoch, args.epochs, acc)) 
	step = epoch * len(loader)
	tb_logger.add_scalar('ARI', ari, step)
	tb_logger.add_scalar('NMI', nmi, step)
	tb_logger.add_scalar('ACC', acc, step)

	return nmi, acc, ari

Source File: ARI.py From altanalyze with Apache License 2.0

4 votes

def ari(truelabel,predlabel):
    lab={}
    truelab=[]
    predlab=[]
    for line in open(truelabel,'rU').xreadlines():
        data = line.rstrip()
        t = string.split(data,'\t')
        lab[t[0]]=[int(t[1]),]
    for line in open(predlabel,'rU').xreadlines():
        data = line.rstrip()
        t = string.split(data,'\t')
        try:lab[t[0]].append(int(t[1]))
        except Exception: print "Sample missing true label"
    for key in lab:

        try:
            predlab.append(lab[key][1])
            truelab.append(lab[key][0])
        except Exception:
            print "Sample missing predicted label"
            continue
        
    print len(truelab)
    truelab=np.array(truelab)
    predlab=np.array(predlab)
  
    ari=adjusted_rand_score(truelab,predlab)
    return ari

#truelabel="/Volumes/Pass/Archive_Zeisel/SVMOutputs/groups.round1SVC_Results_max.txt"
#predlabel="/Volumes/Pass/Singlecellbest/Zeisel_upd/SVMOutputs/round1SVC_Results.txt"
#predlabel="/Volumes/Pass/Singlecellbest/Zeisel_upd/SVMOutputs/round1SVC_Results.txt"
#truelabel="/Volumes/Pass/Singlecellbest/Pollen_upd/SVMOutputs/groups.round1SVC_Results_max.txt"
#predlabel="/Volumes/Pass/Singlecellbest/Pollen_upd/SVMOutputs/round1SVC_Results.txt"
#predlabel="/Volumes/Pass/Data/Pollen_cluster.txt"
#predlabel="/Users/meenakshi/Usoskin_Sc3_test.txt"
#truelabel="/Volumes/Pass/Singlecellbest/Usoskin_upd/SVMOutputs/groups.round1SVC_Results_max.txt"
#predlabel="/Users/meenakshi/Downloads/k-11-Usoskin.txt"
#predlabel="/Users/meenakshi/Documents/ZeiselCluster.txt"
#truelabel="/Users/meenakshi/Desktop/groups.Pollen.txt"
#predlabel="/Users/meenakshi/Downloads/SC3_pollen.txt"
#predlabel="/Users/meenakshi/groups-filtered.txt"

Python sklearn.metrics.cluster.adjusted_rand_score() Examples