Python sklearn.metrics.cluster.normalized_mutual_info_score() Examples

The following are 30 code examples of sklearn.metrics.cluster.normalized_mutual_info_score(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.metrics.cluster , or try the search function .
Example #1
Source File: imagenet2cifar_DTC.py    From DTC with MIT License 6 votes vote down vote up
def init_prob_kmeans(model, eval_loader, args):
    torch.manual_seed(1)
    model = model.to(device)
    # cluster parameter initiate
    model.eval()
    targets = np.zeros(len(eval_loader.dataset)) 
    feats = np.zeros((len(eval_loader.dataset), 512))
    for _, (x, label, idx) in enumerate(eval_loader):
        x = x.to(device)
        feat = model(x)
        idx = idx.data.cpu().numpy()
        feats[idx, :] = feat.data.cpu().numpy()
        targets[idx] = label.data.cpu().numpy()
    pca = PCA(n_components=args.n_clusters)
    feats = pca.fit_transform(feats)
    kmeans = KMeans(n_clusters=args.n_clusters, n_init=20)
    y_pred = kmeans.fit_predict(feats) 
    acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred)
    print('Init acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = feat2prob(torch.from_numpy(feats), torch.from_numpy(kmeans.cluster_centers_))
    return acc, nmi, ari, kmeans.cluster_centers_, probs 
Example #2
Source File: omniglot_DTC.py    From DTC with MIT License 6 votes vote down vote up
def test(model, eval_loader, args):
    model.eval()
    targets = np.zeros(len(eval_loader.dataset)) 
    y_pred = np.zeros(len(eval_loader.dataset)) 
    probs= np.zeros((len(eval_loader.dataset), args.n_clusters))
    for _, (x, _, label, idx) in enumerate(eval_loader):
        x = x.to(device)
        _, feat = model(x)
        prob = feat2prob(feat, model.center)
        #  prob = F.softmax(logit, dim=1)
        idx = idx.data.cpu().numpy()
        y_pred[idx] = prob.data.cpu().detach().numpy().argmax(1)
        targets[idx] = label.data.cpu().numpy()
        probs[idx, :] = prob.cpu().detach().numpy()
    # evaluate clustering performance
    y_pred = y_pred.astype(np.int64)
    acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred)
    print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = torch.from_numpy(probs)
    return acc, nmi, ari, probs 
Example #3
Source File: omniglot_DTC.py    From DTC with MIT License 6 votes vote down vote up
def init_prob_kmeans(model, eval_loader, args):
    torch.manual_seed(1)
    model = model.to(device)
    # cluster parameter initiate
    model.eval()
    targets = np.zeros(len(eval_loader.dataset)) 
    feats = np.zeros((len(eval_loader.dataset), 1024))
    for _, (x, _, label, idx) in enumerate(eval_loader):
        x = x.to(device)
        _, feat = model(x)
        feat = feat.view(x.size(0), -1)
        idx = idx.data.cpu().numpy()
        feats[idx, :] = feat.data.cpu().numpy()
        targets[idx] = label.data.cpu().numpy()
    # evaluate clustering performance
    pca = PCA(n_components=args.n_clusters)
    feats = pca.fit_transform(feats)
    kmeans = KMeans(n_clusters=args.n_clusters, n_init=20)
    y_pred = kmeans.fit_predict(feats) 
    acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred)
    print('Init acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = feat2prob(torch.from_numpy(feats), torch.from_numpy(kmeans.cluster_centers_))
    return kmeans.cluster_centers_, probs 
Example #4
Source File: cifar100_DTC.py    From DTC with MIT License 6 votes vote down vote up
def test(model, test_loader, args, epoch='test'):
    model.eval()
    preds=np.array([])
    targets=np.array([])
    feats = np.zeros((len(test_loader.dataset), args.n_clusters))
    probs= np.zeros((len(test_loader.dataset), args.n_clusters))
    for batch_idx, (x, label, idx) in enumerate(tqdm(test_loader)):
        x, label = x.to(device), label.to(device)
        _, feat = model(x)
        prob = feat2prob(feat, model.center)
        _, pred = prob.max(1)
        targets=np.append(targets, label.cpu().numpy())
        preds=np.append(preds, pred.cpu().numpy())
        idx = idx.data.cpu().numpy()
        feats[idx, :] = feat.cpu().detach().numpy()
        probs[idx, :] = prob.cpu().detach().numpy()
    acc, nmi, ari = cluster_acc(targets.astype(int), preds.astype(int)), nmi_score(targets, preds), ari_score(targets, preds)
    print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = torch.from_numpy(probs)
    return acc, nmi, ari, probs 
Example #5
Source File: cifar10_DTC.py    From DTC with MIT License 6 votes vote down vote up
def test(model, test_loader, args):
    model.eval()
    preds=np.array([])
    targets=np.array([])
    feats = np.zeros((len(test_loader.dataset), args.n_clusters))
    probs= np.zeros((len(test_loader.dataset), args.n_clusters))
    for batch_idx, (x, label, idx) in enumerate(tqdm(test_loader)):
        x, label = x.to(device), label.to(device)
        feat = model(x)
        prob = feat2prob(feat, model.center)
        _, pred = prob.max(1)
        targets=np.append(targets, label.cpu().numpy())
        preds=np.append(preds, pred.cpu().numpy())
        idx = idx.data.cpu().numpy()
        feats[idx, :] = feat.cpu().detach().numpy()
        probs[idx, :] = prob.cpu().detach().numpy()
    acc, nmi, ari = cluster_acc(targets.astype(int), preds.astype(int)), nmi_score(targets, preds), ari_score(targets, preds)
    print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = torch.from_numpy(probs)
    return acc, nmi, ari, probs 
Example #6
Source File: cifar10_DTC.py    From DTC with MIT License 6 votes vote down vote up
def init_prob_kmeans(model, eval_loader, args):
    torch.manual_seed(1)
    model = model.to(device)
    # cluster parameter initiate
    model.eval()
    targets = np.zeros(len(eval_loader.dataset)) 
    feats = np.zeros((len(eval_loader.dataset), 512))
    for _, (x, label, idx) in enumerate(eval_loader):
        x = x.to(device)
        feat = model(x)
        idx = idx.data.cpu().numpy()
        feats[idx, :] = feat.data.cpu().numpy()
        targets[idx] = label.data.cpu().numpy()
    # evaluate clustering performance
    pca = PCA(n_components=args.n_clusters)
    feats = pca.fit_transform(feats)
    kmeans = KMeans(n_clusters=args.n_clusters, n_init=20)
    y_pred = kmeans.fit_predict(feats) 
    acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred)
    print('Init acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = feat2prob(torch.from_numpy(feats), torch.from_numpy(kmeans.cluster_centers_))
    return acc, nmi, ari, kmeans.cluster_centers_, probs 
Example #7
Source File: omniglot_DTC_unknown.py    From DTC with MIT License 6 votes vote down vote up
def init_prob_kmeans(model, eval_loader, args):
    torch.manual_seed(1)
    model = model.to(device)
    # cluster parameter initiate
    model.eval()
    targets = np.zeros(len(eval_loader.dataset)) 
    feats = np.zeros((len(eval_loader.dataset), 1024))
    for _, (x, _, label, idx) in enumerate(eval_loader):
        x = x.to(device)
        _, feat = model(x)
        feat = feat.view(x.size(0), -1)
        idx = idx.data.cpu().numpy()
        feats[idx, :] = feat.data.cpu().numpy()
        targets[idx] = label.data.cpu().numpy()
    # evaluate clustering performance
    pca = PCA(n_components=args.n_clusters)
    feats = pca.fit_transform(feats)
    kmeans = KMeans(n_clusters=args.n_clusters, n_init=20)
    y_pred = kmeans.fit_predict(feats) 
    acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred)
    print('Init acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = feat2prob(torch.from_numpy(feats), torch.from_numpy(kmeans.cluster_centers_))
    return kmeans.cluster_centers_, probs 
Example #8
Source File: omniglot_DTC_unknown.py    From DTC with MIT License 6 votes vote down vote up
def test(model, eval_loader, args):
    model.eval()
    targets = np.zeros(len(eval_loader.dataset)) 
    y_pred = np.zeros(len(eval_loader.dataset)) 
    probs= np.zeros((len(eval_loader.dataset), args.n_clusters))
    for _, (x, _, label, idx) in enumerate(eval_loader):
        x = x.to(device)
        _, feat = model(x)
        prob = feat2prob(feat, model.center)
        idx = idx.data.cpu().numpy()
        y_pred[idx] = prob.data.cpu().detach().numpy().argmax(1)
        targets[idx] = label.data.cpu().numpy()
        probs[idx, :] = prob.cpu().detach().numpy()
    # evaluate clustering performance
    y_pred = y_pred.astype(np.int64)
    acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred)
    print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = torch.from_numpy(probs)
    return acc, nmi, ari, probs 
Example #9
Source File: svhn_DTC.py    From DTC with MIT License 6 votes vote down vote up
def test(model, test_loader, args):
    model.eval()
    acc_record = AverageMeter()
    preds=np.array([])
    targets=np.array([])
    feats = np.zeros((len(test_loader.dataset), args.n_clusters))
    probs= np.zeros((len(test_loader.dataset), args.n_clusters))
    for batch_idx, (x, label, idx) in enumerate(tqdm(test_loader)):
        x, label = x.to(device), label.to(device)
        feat = model(x)
        prob = feat2prob(feat, model.center)
        _, pred = prob.max(1)
        targets=np.append(targets, label.cpu().numpy())
        preds=np.append(preds, pred.cpu().numpy())
        idx = idx.data.cpu().numpy()
        feats[idx, :] = feat.cpu().detach().numpy()
        probs[idx, :] = prob.cpu().detach().numpy()
    acc, nmi, ari = cluster_acc(targets.astype(int), preds.astype(int)), nmi_score(targets, preds), ari_score(targets, preds)
    print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = torch.from_numpy(probs)
    return acc, nmi, ari, probs 
Example #10
Source File: imagenet_DTC.py    From DTC with MIT License 6 votes vote down vote up
def test(model, test_loader, args, epoch=0):
    model.eval()
    acc_record = AverageMeter()
    preds=np.array([])
    targets=np.array([])
    feats = np.zeros((len(test_loader.dataset), args.n_clusters))
    probs = np.zeros((len(test_loader.dataset), args.n_clusters))
    for batch_idx, (x, label, idx) in enumerate(tqdm(test_loader)):
        x, label = x.to(device), label.to(device)
        output = model(x)
        prob = feat2prob(output, model.center)
        _, pred = prob.max(1)
        targets=np.append(targets, label.cpu().numpy())
        preds=np.append(preds, pred.cpu().numpy())
        idx = idx.data.cpu().numpy()
        feats[idx, :] = output.cpu().detach().numpy()
        probs[idx, :]= prob.cpu().detach().numpy()
    acc, nmi, ari = cluster_acc(targets.astype(int), preds.astype(int)), nmi_score(targets, preds), ari_score(targets, preds)
    print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    return acc, nmi, ari, torch.from_numpy(probs) 
Example #11
Source File: imagenet_DTC.py    From DTC with MIT License 6 votes vote down vote up
def init_prob_kmeans(model, eval_loader, args):
    torch.manual_seed(1)
    model = model.to(device)
    # cluster parameter initiate
    model.eval()
    targets = np.zeros(len(eval_loader.dataset)) 
    feats = np.zeros((len(eval_loader.dataset), 512))
    for _, (x, label, idx) in enumerate(eval_loader):
        x = x.to(device)
        feat = model(x)
        feat = feat.view(x.size(0), -1)
        idx = idx.data.cpu().numpy()
        feats[idx, :] = feat.data.cpu().numpy()
        targets[idx] = label.data.cpu().numpy()
    # evaluate clustering performance
    pca = PCA(n_components=args.n_clusters)
    feats = pca.fit_transform(feats)
    kmeans = KMeans(n_clusters=args.n_clusters, n_init=20)
    y_pred = kmeans.fit_predict(feats) 
    acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred)
    print('Init acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = feat2prob(torch.from_numpy(feats), torch.from_numpy(kmeans.cluster_centers_))
    return acc, nmi, ari, kmeans.cluster_centers_, probs 
Example #12
Source File: imagenet2cifar_DTC.py    From DTC with MIT License 6 votes vote down vote up
def test(model, test_loader, args):
    model.eval()
    acc_record = AverageMeter()
    preds=np.array([])
    targets=np.array([])
    feats = np.zeros((len(test_loader.dataset), args.n_clusters))
    probs= np.zeros((len(test_loader.dataset), args.n_clusters))
    for batch_idx, (x, label, idx) in enumerate(tqdm(test_loader)):
        x, label = x.to(device), label.to(device)
        feat = model(x)
        prob = feat2prob(feat, model.center)
        _, pred = prob.max(1)
        targets=np.append(targets, label.cpu().numpy())
        preds=np.append(preds, pred.cpu().numpy())
        idx = idx.data.cpu().numpy()
        feats[idx, :] = feat.cpu().detach().numpy()
        probs[idx, :] = prob.cpu().detach().numpy()
    acc, nmi, ari = cluster_acc(targets.astype(int), preds.astype(int)), nmi_score(targets, preds), ari_score(targets, preds)
    print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = torch.from_numpy(probs)
    return acc, nmi, ari, probs 
Example #13
Source File: nmi.py    From classification_metric_learning with Apache License 2.0 6 votes vote down vote up
def test_nmi_faiss(embeddings, labels):
    res = faiss.StandardGpuResources()
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.device = 0

    unique_labels = np.unique(labels)
    d = embeddings.shape[1]
    kmeans = faiss.Clustering(d, unique_labels.size)
    kmeans.verbose = True
    kmeans.niter = 300
    kmeans.nredo = 10
    kmeans.seed = 0

    index = faiss.GpuIndexFlatL2(res, d, flat_config)

    kmeans.train(embeddings, index)

    dists, pred_labels = index.search(embeddings, 1)

    pred_labels = pred_labels.squeeze()

    nmi = normalized_mutual_info_score(labels, pred_labels)

    print("NMI: {}".format(nmi))
    return nmi 
Example #14
Source File: evaluation.py    From SoftTriple with Apache License 2.0 6 votes vote down vote up
def evaluation(X, Y, Kset):
    num = X.shape[0]
    classN = np.max(Y)+1
    kmax = np.max(Kset)
    recallK = np.zeros(len(Kset))
    #compute NMI
    kmeans = KMeans(n_clusters=classN).fit(X)
    nmi = normalized_mutual_info_score(Y, kmeans.labels_, average_method='arithmetic')
    #compute Recall@K
    sim = X.dot(X.T)
    minval = np.min(sim) - 1.
    sim -= np.diag(np.diag(sim))
    sim += np.diag(np.ones(num) * minval)
    indices = np.argsort(-sim, axis=1)[:, : kmax]
    YNN = Y[indices]
    for i in range(0, len(Kset)):
        pos = 0.
        for j in range(0, num):
            if Y[j] in YNN[j, :Kset[i]]:
                pos += 1.
        recallK[i] = pos/num
    return nmi, recallK 
Example #15
Source File: test_hierarchical.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_single_linkage_clustering():
    # Check that we get the correct result in two emblematic cases
    moons, moon_labels = make_moons(noise=0.05, random_state=42)
    clustering = AgglomerativeClustering(n_clusters=2, linkage='single')
    clustering.fit(moons)
    assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
                                                     moon_labels), 1)

    circles, circle_labels = make_circles(factor=0.5, noise=0.025,
                                          random_state=42)
    clustering = AgglomerativeClustering(n_clusters=2, linkage='single')
    clustering.fit(circles)
    assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
                                                     circle_labels), 1) 
Example #16
Source File: test_supervised.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_perfect_matches():
    for score_func in score_funcs:
        assert_equal(score_func([], []), 1.0)
        assert_equal(score_func([0], [1]), 1.0)
        assert_equal(score_func([0, 0, 0], [0, 0, 0]), 1.0)
        assert_equal(score_func([0, 1, 0], [42, 7, 42]), 1.0)
        assert_equal(score_func([0., 1., 0.], [42., 7., 42.]), 1.0)
        assert_equal(score_func([0., 1., 2.], [42., 7., 2.]), 1.0)
        assert_equal(score_func([0, 1, 2], [42, 7, 2]), 1.0)
    score_funcs_with_changing_means = [
        normalized_mutual_info_score,
        adjusted_mutual_info_score,
    ]
    means = {"min", "geometric", "arithmetic", "max"}
    for score_func in score_funcs_with_changing_means:
        for mean in means:
            assert score_func([], [], mean) == 1.0
            assert score_func([0], [1], mean) == 1.0
            assert score_func([0, 0, 0], [0, 0, 0], mean) == 1.0
            assert score_func([0, 1, 0], [42, 7, 42], mean) == 1.0
            assert score_func([0., 1., 0.], [42., 7., 42.], mean) == 1.0
            assert score_func([0., 1., 2.], [42., 7., 2.], mean) == 1.0
            assert score_func([0, 1, 2], [42, 7, 2], mean) == 1.0 
Example #17
Source File: validation.py    From topic-stability with Apache License 2.0 5 votes vote down vote up
def evaluate( self, partition, clustered_ids ):
		# no class info?
		if not self.has_class_info():
			return {}
		# get two clusterings that we can compare
		n = len(clustered_ids)
		classes_subset = np.zeros( n )
		for row in range(n):
			classes_subset[row] = self.class_map[clustered_ids[row]]		
		scores = {}
		scores["external-nmi"] = normalized_mutual_info_score( classes_subset, partition )
		scores["external-ami"] = adjusted_mutual_info_score( classes_subset, partition )
		scores["external-ari"] = adjusted_rand_score( classes_subset, partition )
		return scores 
Example #18
Source File: NMI.py    From Deep_Metric with Apache License 2.0 5 votes vote down vote up
def NMI(X, ground_truth, n_cluster=3):
    X = [to_numpy(x) for x in X]
    # list to numpy
    X = np.array(X)
    ground_truth = np.array(ground_truth)
    # print('x_type:', type(X))
    # print('label_type:', type(ground_truth))
    kmeans = KMeans(n_clusters=n_cluster, n_jobs=-1, random_state=0).fit(X)

    print('K-means done')
    nmi = normalized_mutual_info_score(ground_truth, kmeans.labels_)
    return nmi 
Example #19
Source File: metric.py    From L2C with MIT License 5 votes vote down vote up
def clusterscores(self):
        target,pred = self.conf2label()
        NMI = normalized_mutual_info_score(target,pred)
        ARI = adjusted_rand_score(target,pred)
        AMI = adjusted_mutual_info_score(target,pred)
        return {'NMI':NMI,'ARI':ARI,'AMI':AMI} 
Example #20
Source File: nmi.py    From classification_metric_learning with Apache License 2.0 5 votes vote down vote up
def test_nmi(embeddings, labels, output_file):
    unique_labels = np.unique(labels)
    kmeans = KMeans(n_clusters=unique_labels.size, random_state=0, n_jobs=-1).fit(embeddings)

    nmi = normalized_mutual_info_score(kmeans.labels_, labels)

    print("NMI: {}".format(nmi))
    return nmi 
Example #21
Source File: cluster.py    From interpret_bert with GNU General Public License v3.0 5 votes vote down vote up
def cluster(args):
  idx = np.random.permutation(3500) 
  train_idx = idx[0:3000]
  test_idx = idx[3000:]

  num_layers, layer_id = None, 0
  while True:
    # extract X, labels <= create tsne input
    X, y = [], []
    id2lab, lab2id = {}, {}
    with open(args.feat_file, 'r') as f:
      for line in f:
        info = json.loads(line.strip())
        span_start = np.array(info['start_layer'][layer_id], dtype=np.float32)
        span_end = np.array(info['end_layer'][layer_id], dtype=np.float32)
        label = info['label']
        if label not in lab2id:
          lab2id[label] = len(lab2id)
          id2lab[lab2id[label]] = label
        y.append(lab2id[label])
        X.append(np.concatenate((span_start, span_end, np.multiply(span_start, span_end), span_start-span_end)))
        if not num_layers:
          num_layers = len(info['end_layer'])

    train_X, train_y, test_X, test_y = [], [], [], []
    for idi in train_idx:
      train_X.append(X[idi])
      train_y.append(y[idi])
    for idi in test_idx:
      test_X.append(X[idi])
      test_y.append(y[idi])

    kmeans = KMeans(n_clusters=len(lab2id), random_state=123).fit(train_X)
    pred_y = kmeans.predict(test_X)
    layer_id += 1
    print('layer %d => NMI = %.2f'%(layer_id, normalized_mutual_info_score(test_y, pred_y)))
    
    if layer_id == num_layers:
      break 
Example #22
Source File: test_supervised.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_exactly_zero_info_score():
    # Check numerical stability when information is exactly zero
    for i in np.logspace(1, 4, 4).astype(np.int):
        labels_a, labels_b = (np.ones(i, dtype=np.int),
                              np.arange(i, dtype=np.int))
        assert_equal(normalized_mutual_info_score(labels_a, labels_b), 0.0)
        assert_equal(v_measure_score(labels_a, labels_b), 0.0)
        assert_equal(adjusted_mutual_info_score(labels_a, labels_b), 0.0)
        assert_equal(normalized_mutual_info_score(labels_a, labels_b), 0.0) 
Example #23
Source File: test_supervised.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_future_warning():
    score_funcs_with_changing_means = [
        normalized_mutual_info_score,
        adjusted_mutual_info_score,
    ]
    warning_msg = "The behavior of "
    args = [0, 0, 0], [0, 0, 0]
    for score_func in score_funcs_with_changing_means:
        assert_warns_message(FutureWarning, warning_msg, score_func, *args) 
Example #24
Source File: eval-partition-accuracy.py    From topic-ensemble with Apache License 2.0 5 votes vote down vote up
def validate( measure, classes, clustering ):
	if measure == "nmi":
		return normalized_mutual_info_score( classes, clustering )
	elif measure == "ami":
		return adjusted_mutual_info_score( classes, clustering )
	elif measure == "ari":
		return adjusted_rand_score( classes, clustering )
	log.error("Unknown validation measure: %s" % measure )
	return None

# -------------------------------------------------------------- 
Example #25
Source File: Metrics.py    From GMVAE with MIT License 5 votes vote down vote up
def nmi(self, Y_pred, Y):
    Y_pred, Y = np.array(Y_pred), np.array(Y)
    assert Y_pred.size == Y.size
    return normalized_mutual_info_score(Y_pred, Y, average_method='arithmetic') 
Example #26
Source File: Metrics.py    From GMVAE with MIT License 5 votes vote down vote up
def nmi(self, Y_pred, Y):
    Y_pred, Y = np.array(Y_pred), np.array(Y)
    assert Y_pred.size == Y.size
    return normalized_mutual_info_score(Y_pred, Y, average_method='arithmetic') 
Example #27
Source File: unsupervised_evaluation.py    From scikit-feature with GNU General Public License v2.0 5 votes vote down vote up
def evaluation(X_selected, n_clusters, y):
    """
    This function calculates ARI, ACC and NMI of clustering results

    Input
    -----
    X_selected: {numpy array}, shape (n_samples, n_selected_features}
            input data on the selected features
    n_clusters: {int}
            number of clusters
    y: {numpy array}, shape (n_samples,)
            true labels

    Output
    ------
    nmi: {float}
        Normalized Mutual Information
    acc: {float}
        Accuracy
    """
    k_means = KMeans(n_clusters=n_clusters, init='k-means++', n_init=10, max_iter=300,
                     tol=0.0001, precompute_distances=True, verbose=0,
                     random_state=None, copy_x=True, n_jobs=1)

    k_means.fit(X_selected)
    y_predict = k_means.labels_

    # calculate NMI
    nmi = normalized_mutual_info_score(y, y_predict)

    # calculate ACC
    y_permuted_predict = best_map(y, y_predict)
    acc = accuracy_score(y, y_permuted_predict)

    return nmi, acc 
Example #28
Source File: test_supervised.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_v_measure_and_mutual_information(seed=36):
    # Check relation between v_measure, entropy and mutual information
    for i in np.logspace(1, 4, 4).astype(np.int):
        random_state = np.random.RandomState(seed)
        labels_a, labels_b = (random_state.randint(0, 10, i),
                              random_state.randint(0, 10, i))
        assert_almost_equal(v_measure_score(labels_a, labels_b),
                            2.0 * mutual_info_score(labels_a, labels_b) /
                            (entropy(labels_a) + entropy(labels_b)), 0)
        avg = 'arithmetic'
        assert_almost_equal(v_measure_score(labels_a, labels_b),
                            normalized_mutual_info_score(labels_a, labels_b,
                                                         average_method=avg)
                            ) 
Example #29
Source File: test_supervised.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_exactly_zero_info_score():
    # Check numerical stability when information is exactly zero
    for i in np.logspace(1, 4, 4).astype(np.int):
        labels_a, labels_b = (np.ones(i, dtype=np.int),
                              np.arange(i, dtype=np.int))
        assert_equal(normalized_mutual_info_score(labels_a, labels_b), 0.0)
        assert_equal(v_measure_score(labels_a, labels_b), 0.0)
        assert_equal(adjusted_mutual_info_score(labels_a, labels_b), 0.0)
        assert_equal(normalized_mutual_info_score(labels_a, labels_b), 0.0)
        for method in ["min", "geometric", "arithmetic", "max"]:
            assert adjusted_mutual_info_score(labels_a, labels_b,
                                              method) == 0.0
            assert normalized_mutual_info_score(labels_a, labels_b,
                                                method) == 0.0 
Example #30
Source File: faster_mix_k_means_pytorch.py    From DTC with MIT License 4 votes vote down vote up
def main():
    import matplotlib.pyplot as plt
    from matplotlib import style
    import pandas as pd 
    style.use('ggplot')
    from sklearn.datasets import make_blobs
    from sklearn.metrics.cluster import normalized_mutual_info_score as nmi_score
    X, y = make_blobs(n_samples=500,
                      n_features=2,
                      centers=4,
                      cluster_std=1,
                      center_box=(-10.0, 10.0),
                      shuffle=True,
                      random_state=1)  # For reproducibility

    cuda = torch.cuda.is_available()
    device = torch.device("cuda" if cuda else "cpu")
    #  X = torch.from_numpy(X).float().to(device)


    y = np.array(y)
    l_targets = y[y>1]
    l_feats = X[y>1]
    u_feats = X[y<2]
    cat_feats = np.concatenate((l_feats, u_feats))
    y = np.concatenate((y[y>1], y[y<2]))
    cat_feats = torch.from_numpy(cat_feats).to(device)
    u_feats = torch.from_numpy(u_feats).to(device)
    l_feats = torch.from_numpy(l_feats).to(device)
    l_targets = torch.from_numpy(l_targets).to(device)

    km = K_Means(k=4, init='k-means++', random_state=1, n_jobs=None, pairwise_batch_size=10)

    #  km.fit(X)

    km.fit_mix(u_feats, l_feats, l_targets)
    #  X = X.cpu()
    X = cat_feats.cpu()
    centers = km.cluster_centers_.cpu()
    pred = km.labels_.cpu()
    print('nmi', nmi_score(pred, y))

    # Plotting starts here
    colors = 10*["g", "c", "b", "k", "r", "m"]

    for i in range(len(X)):
        x = X[i]
        plt.scatter(x[0], x[1], color = colors[pred[i]],s = 10)
 
    for i in range(4):
        plt.scatter(centers[i][0], centers[i][1], s = 130, marker = "*", color='r')
    plt.show()