Python sklearn.metrics.adjusted_rand_score() Examples
The following are 30
code examples of sklearn.metrics.adjusted_rand_score().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.metrics
, or try the search function
.
Example #1
Source File: test_spectral.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_discretize(seed=8): # Test the discretize using a noise assignment matrix random_state = np.random.RandomState(seed) for n_samples in [50, 100, 150, 500]: for n_class in range(2, 10): # random class labels y_true = random_state.randint(0, n_class + 1, n_samples) y_true = np.array(y_true, np.float) # noise class assignment matrix y_indicator = sparse.coo_matrix((np.ones(n_samples), (np.arange(n_samples), y_true)), shape=(n_samples, n_class + 1)) y_true_noisy = (y_indicator.toarray() + 0.1 * random_state.randn(n_samples, n_class + 1)) y_pred = discretize(y_true_noisy, random_state) assert_greater(adjusted_rand_score(y_true, y_pred), 0.8)
Example #2
Source File: test_spectral.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_discretize(n_samples): # Test the discretize using a noise assignment matrix random_state = np.random.RandomState(seed=8) for n_class in range(2, 10): # random class labels y_true = random_state.randint(0, n_class + 1, n_samples) y_true = np.array(y_true, np.float) # noise class assignment matrix y_indicator = sparse.coo_matrix((np.ones(n_samples), (np.arange(n_samples), y_true)), shape=(n_samples, n_class + 1)) y_true_noisy = (y_indicator.toarray() + 0.1 * random_state.randn(n_samples, n_class + 1)) y_pred = discretize(y_true_noisy, random_state) assert adjusted_rand_score(y_true, y_pred) > 0.8
Example #3
Source File: test_spectral.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_spectral_clustering(eigen_solver, assign_labels): S = np.array([[1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0], [0.2, 0.2, 0.2, 1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]]) for mat in (S, sparse.csr_matrix(S)): model = SpectralClustering(random_state=0, n_clusters=2, affinity='precomputed', eigen_solver=eigen_solver, assign_labels=assign_labels ).fit(mat) labels = model.labels_ if labels[0] == 0: labels = 1 - labels assert adjusted_rand_score(labels, [1, 1, 1, 0, 0, 0, 0]) == 1 model_copy = pickle.loads(pickle.dumps(model)) assert model_copy.n_clusters == model.n_clusters assert model_copy.eigen_solver == model.eigen_solver assert_array_equal(model_copy.labels_, model.labels_)
Example #4
Source File: DCCComputation.py From DCC with MIT License | 6 votes |
def benchmarking(gtlabels, labels): # TODO: Please note that the AMI definition used in the paper differs from that in the sklearn python package. # TODO: Please modify it accordingly. numeval = len(gtlabels) ari = metrics.adjusted_rand_score(gtlabels[:numeval], labels[:numeval]) ami = metrics.adjusted_mutual_info_score(gtlabels[:numeval], labels[:numeval]) nmi = metrics.normalized_mutual_info_score(gtlabels[:numeval], labels[:numeval]) acc = clustering_accuracy(gtlabels[:numeval], labels[:numeval]) return ari, ami, nmi, acc
Example #5
Source File: utils.py From aaltd18 with GNU General Public License v3.0 | 6 votes |
def calculate_metrics(y_true, y_pred,duration,clustering=False): """ Return a data frame that contains the precision, accuracy, recall and the duration For clustering it applys the adjusted rand index """ if clustering == False: res = pd.DataFrame(data = np.zeros((1,5),dtype=np.float), index=[0], columns=['precision','accuracy','error','recall','duration']) res['precision'] = precision_score(y_true,y_pred,average='macro') res['accuracy'] = accuracy_score(y_true,y_pred) res['recall'] = recall_score(y_true,y_pred,average='macro') res['duration'] = duration res['error'] = 1-res['accuracy'] return res else: res = pd.DataFrame(data = np.zeros((1,2),dtype=np.float), index=[0], columns=['ari','duration']) res['duration']=duration res['ari'] = adjusted_rand_score(y_pred,y_true) return res
Example #6
Source File: run_segm_slic_model_graphcut.py From pyImSegm with BSD 3-Clause "New" or "Revised" License | 6 votes |
def compare_segms_metric_ars(dict_segm_a, dict_segm_b, suffix=''): """ compute ARS for each pair of segmentation :param {str: ndarray} dict_segm_a: :param {str: ndarray} dict_segm_b: :param str suffix: :return DF: """ df_ars = pd.DataFrame() for n in dict_segm_a: if n not in dict_segm_b: logging.warning('particular key "%s" is missing in dictionary', n) continue y_a = dict_segm_a[n].ravel() y_b = dict_segm_b[n].ravel() dict_ars = {'image': n, 'ARS' + suffix: metrics.adjusted_rand_score(y_a, y_b)} df_ars = df_ars.append(dict_ars, ignore_index=True) df_ars.set_index(['image'], inplace=True) return df_ars
Example #7
Source File: imagenet2cifar_DTC.py From DTC with MIT License | 6 votes |
def init_prob_kmeans(model, eval_loader, args): torch.manual_seed(1) model = model.to(device) # cluster parameter initiate model.eval() targets = np.zeros(len(eval_loader.dataset)) feats = np.zeros((len(eval_loader.dataset), 512)) for _, (x, label, idx) in enumerate(eval_loader): x = x.to(device) feat = model(x) idx = idx.data.cpu().numpy() feats[idx, :] = feat.data.cpu().numpy() targets[idx] = label.data.cpu().numpy() pca = PCA(n_components=args.n_clusters) feats = pca.fit_transform(feats) kmeans = KMeans(n_clusters=args.n_clusters, n_init=20) y_pred = kmeans.fit_predict(feats) acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred) print('Init acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari)) probs = feat2prob(torch.from_numpy(feats), torch.from_numpy(kmeans.cluster_centers_)) return acc, nmi, ari, kmeans.cluster_centers_, probs
Example #8
Source File: imagenet2cifar_DTC.py From DTC with MIT License | 6 votes |
def test(model, test_loader, args): model.eval() acc_record = AverageMeter() preds=np.array([]) targets=np.array([]) feats = np.zeros((len(test_loader.dataset), args.n_clusters)) probs= np.zeros((len(test_loader.dataset), args.n_clusters)) for batch_idx, (x, label, idx) in enumerate(tqdm(test_loader)): x, label = x.to(device), label.to(device) feat = model(x) prob = feat2prob(feat, model.center) _, pred = prob.max(1) targets=np.append(targets, label.cpu().numpy()) preds=np.append(preds, pred.cpu().numpy()) idx = idx.data.cpu().numpy() feats[idx, :] = feat.cpu().detach().numpy() probs[idx, :] = prob.cpu().detach().numpy() acc, nmi, ari = cluster_acc(targets.astype(int), preds.astype(int)), nmi_score(targets, preds), ari_score(targets, preds) print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari)) probs = torch.from_numpy(probs) return acc, nmi, ari, probs
Example #9
Source File: imagenet_DTC.py From DTC with MIT License | 6 votes |
def init_prob_kmeans(model, eval_loader, args): torch.manual_seed(1) model = model.to(device) # cluster parameter initiate model.eval() targets = np.zeros(len(eval_loader.dataset)) feats = np.zeros((len(eval_loader.dataset), 512)) for _, (x, label, idx) in enumerate(eval_loader): x = x.to(device) feat = model(x) feat = feat.view(x.size(0), -1) idx = idx.data.cpu().numpy() feats[idx, :] = feat.data.cpu().numpy() targets[idx] = label.data.cpu().numpy() # evaluate clustering performance pca = PCA(n_components=args.n_clusters) feats = pca.fit_transform(feats) kmeans = KMeans(n_clusters=args.n_clusters, n_init=20) y_pred = kmeans.fit_predict(feats) acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred) print('Init acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari)) probs = feat2prob(torch.from_numpy(feats), torch.from_numpy(kmeans.cluster_centers_)) return acc, nmi, ari, kmeans.cluster_centers_, probs
Example #10
Source File: imagenet_DTC.py From DTC with MIT License | 6 votes |
def test(model, test_loader, args, epoch=0): model.eval() acc_record = AverageMeter() preds=np.array([]) targets=np.array([]) feats = np.zeros((len(test_loader.dataset), args.n_clusters)) probs = np.zeros((len(test_loader.dataset), args.n_clusters)) for batch_idx, (x, label, idx) in enumerate(tqdm(test_loader)): x, label = x.to(device), label.to(device) output = model(x) prob = feat2prob(output, model.center) _, pred = prob.max(1) targets=np.append(targets, label.cpu().numpy()) preds=np.append(preds, pred.cpu().numpy()) idx = idx.data.cpu().numpy() feats[idx, :] = output.cpu().detach().numpy() probs[idx, :]= prob.cpu().detach().numpy() acc, nmi, ari = cluster_acc(targets.astype(int), preds.astype(int)), nmi_score(targets, preds), ari_score(targets, preds) print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari)) return acc, nmi, ari, torch.from_numpy(probs)
Example #11
Source File: svhn_DTC.py From DTC with MIT License | 6 votes |
def test(model, test_loader, args): model.eval() acc_record = AverageMeter() preds=np.array([]) targets=np.array([]) feats = np.zeros((len(test_loader.dataset), args.n_clusters)) probs= np.zeros((len(test_loader.dataset), args.n_clusters)) for batch_idx, (x, label, idx) in enumerate(tqdm(test_loader)): x, label = x.to(device), label.to(device) feat = model(x) prob = feat2prob(feat, model.center) _, pred = prob.max(1) targets=np.append(targets, label.cpu().numpy()) preds=np.append(preds, pred.cpu().numpy()) idx = idx.data.cpu().numpy() feats[idx, :] = feat.cpu().detach().numpy() probs[idx, :] = prob.cpu().detach().numpy() acc, nmi, ari = cluster_acc(targets.astype(int), preds.astype(int)), nmi_score(targets, preds), ari_score(targets, preds) print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari)) probs = torch.from_numpy(probs) return acc, nmi, ari, probs
Example #12
Source File: cifar10_DTC.py From DTC with MIT License | 6 votes |
def test(model, test_loader, args): model.eval() preds=np.array([]) targets=np.array([]) feats = np.zeros((len(test_loader.dataset), args.n_clusters)) probs= np.zeros((len(test_loader.dataset), args.n_clusters)) for batch_idx, (x, label, idx) in enumerate(tqdm(test_loader)): x, label = x.to(device), label.to(device) feat = model(x) prob = feat2prob(feat, model.center) _, pred = prob.max(1) targets=np.append(targets, label.cpu().numpy()) preds=np.append(preds, pred.cpu().numpy()) idx = idx.data.cpu().numpy() feats[idx, :] = feat.cpu().detach().numpy() probs[idx, :] = prob.cpu().detach().numpy() acc, nmi, ari = cluster_acc(targets.astype(int), preds.astype(int)), nmi_score(targets, preds), ari_score(targets, preds) print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari)) probs = torch.from_numpy(probs) return acc, nmi, ari, probs
Example #13
Source File: posterior.py From scVI with MIT License | 6 votes |
def clustering_scores(self, prediction_algorithm: str = "knn") -> Tuple: if self.gene_dataset.n_labels > 1: latent, _, labels = self.get_latent() if prediction_algorithm == "knn": labels_pred = KMeans( self.gene_dataset.n_labels, n_init=200 ).fit_predict( latent ) # n_jobs>1 ? elif prediction_algorithm == "gmm": gmm = GMM(self.gene_dataset.n_labels) gmm.fit(latent) labels_pred = gmm.predict(latent) asw_score = silhouette_score(latent, labels) nmi_score = NMI(labels, labels_pred) ari_score = ARI(labels, labels_pred) uca_score = unsupervised_clustering_accuracy(labels, labels_pred)[0] logger.debug( "Clustering Scores:\nSilhouette: %.4f\nNMI: %.4f\nARI: %.4f\nUCA: %.4f" % (asw_score, nmi_score, ari_score, uca_score) ) return asw_score, nmi_score, ari_score, uca_score
Example #14
Source File: k_means_plot.py From machine-learning with GNU General Public License v3.0 | 6 votes |
def bench_k_means(estimator, name, data): t0 = time() estimator.fit(data) print('% 9s %.2fs %i %.3f %.3f %.3f %.3f %.3f %.3f' % (name, (time() - t0), estimator.inertia_, metrics.homogeneity_score(labels, estimator.labels_), metrics.completeness_score(labels, estimator.labels_), metrics.v_measure_score(labels, estimator.labels_), metrics.adjusted_rand_score(labels, estimator.labels_), metrics.adjusted_mutual_info_score(labels, estimator.labels_), metrics.silhouette_score(data, estimator.labels_, metric='euclidean', sample_size=sample_size)))
Example #15
Source File: test_models.py From graspy with Apache License 2.0 | 6 votes |
def test_DCSBM_fit_unsupervised(self): np.random.seed(12345) n_verts = 1500 distances = np.random.beta(4, 1, n_verts) B = np.array([[0.7, 0.1, 0.1], [0.1, 0.9, 0.1], [0.05, 0.1, 0.75]]) n = np.array([500, 500, 500]) labels = _n_to_labels(n) p_mat = _block_to_full(B, labels, (n_verts, n_verts)) p_mat = p_mat * np.outer(distances, distances) p_mat -= np.diag(np.diag(p_mat)) graph = sample_edges(p_mat, directed=True, loops=False) dcsbe = DCSBMEstimator(directed=True, loops=False) dcsbe.fit(graph) assert adjusted_rand_score(labels, dcsbe.vertex_assignments_) > 0.95 assert_allclose(p_mat, dcsbe.p_mat_, atol=0.12)
Example #16
Source File: omniglot_DTC_unknown.py From DTC with MIT License | 6 votes |
def test(model, eval_loader, args): model.eval() targets = np.zeros(len(eval_loader.dataset)) y_pred = np.zeros(len(eval_loader.dataset)) probs= np.zeros((len(eval_loader.dataset), args.n_clusters)) for _, (x, _, label, idx) in enumerate(eval_loader): x = x.to(device) _, feat = model(x) prob = feat2prob(feat, model.center) idx = idx.data.cpu().numpy() y_pred[idx] = prob.data.cpu().detach().numpy().argmax(1) targets[idx] = label.data.cpu().numpy() probs[idx, :] = prob.cpu().detach().numpy() # evaluate clustering performance y_pred = y_pred.astype(np.int64) acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred) print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari)) probs = torch.from_numpy(probs) return acc, nmi, ari, probs
Example #17
Source File: omniglot_DTC_unknown.py From DTC with MIT License | 6 votes |
def init_prob_kmeans(model, eval_loader, args): torch.manual_seed(1) model = model.to(device) # cluster parameter initiate model.eval() targets = np.zeros(len(eval_loader.dataset)) feats = np.zeros((len(eval_loader.dataset), 1024)) for _, (x, _, label, idx) in enumerate(eval_loader): x = x.to(device) _, feat = model(x) feat = feat.view(x.size(0), -1) idx = idx.data.cpu().numpy() feats[idx, :] = feat.data.cpu().numpy() targets[idx] = label.data.cpu().numpy() # evaluate clustering performance pca = PCA(n_components=args.n_clusters) feats = pca.fit_transform(feats) kmeans = KMeans(n_clusters=args.n_clusters, n_init=20) y_pred = kmeans.fit_predict(feats) acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred) print('Init acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari)) probs = feat2prob(torch.from_numpy(feats), torch.from_numpy(kmeans.cluster_centers_)) return kmeans.cluster_centers_, probs
Example #18
Source File: omniglot_DTC.py From DTC with MIT License | 6 votes |
def test(model, eval_loader, args): model.eval() targets = np.zeros(len(eval_loader.dataset)) y_pred = np.zeros(len(eval_loader.dataset)) probs= np.zeros((len(eval_loader.dataset), args.n_clusters)) for _, (x, _, label, idx) in enumerate(eval_loader): x = x.to(device) _, feat = model(x) prob = feat2prob(feat, model.center) # prob = F.softmax(logit, dim=1) idx = idx.data.cpu().numpy() y_pred[idx] = prob.data.cpu().detach().numpy().argmax(1) targets[idx] = label.data.cpu().numpy() probs[idx, :] = prob.cpu().detach().numpy() # evaluate clustering performance y_pred = y_pred.astype(np.int64) acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred) print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari)) probs = torch.from_numpy(probs) return acc, nmi, ari, probs
Example #19
Source File: omniglot_DTC.py From DTC with MIT License | 6 votes |
def init_prob_kmeans(model, eval_loader, args): torch.manual_seed(1) model = model.to(device) # cluster parameter initiate model.eval() targets = np.zeros(len(eval_loader.dataset)) feats = np.zeros((len(eval_loader.dataset), 1024)) for _, (x, _, label, idx) in enumerate(eval_loader): x = x.to(device) _, feat = model(x) feat = feat.view(x.size(0), -1) idx = idx.data.cpu().numpy() feats[idx, :] = feat.data.cpu().numpy() targets[idx] = label.data.cpu().numpy() # evaluate clustering performance pca = PCA(n_components=args.n_clusters) feats = pca.fit_transform(feats) kmeans = KMeans(n_clusters=args.n_clusters, n_init=20) y_pred = kmeans.fit_predict(feats) acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred) print('Init acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari)) probs = feat2prob(torch.from_numpy(feats), torch.from_numpy(kmeans.cluster_centers_)) return kmeans.cluster_centers_, probs
Example #20
Source File: cifar100_DTC.py From DTC with MIT License | 6 votes |
def init_prob_kmeans(model, eval_loader, args): torch.manual_seed(1) model = model.to(device) # cluster parameter initiate model.eval() targets = np.zeros(len(eval_loader.dataset)) feats = np.zeros((len(eval_loader.dataset), 512)) for _, (x, label, idx) in enumerate(eval_loader): x = x.to(device) _, feat = model(x) idx = idx.data.cpu().numpy() feats[idx, :] = feat.data.cpu().numpy() targets[idx] = label.data.cpu().numpy() # evaluate clustering performance pca = PCA(n_components=args.n_clusters) feats = pca.fit_transform(feats) kmeans = KMeans(n_clusters=args.n_clusters, n_init=20) y_pred = kmeans.fit_predict(feats) acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred) print('Init acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari)) probs = feat2prob(torch.from_numpy(feats), torch.from_numpy(kmeans.cluster_centers_)) return acc, nmi, ari, kmeans.cluster_centers_, probs
Example #21
Source File: cifar10_DTC.py From DTC with MIT License | 6 votes |
def init_prob_kmeans(model, eval_loader, args): torch.manual_seed(1) model = model.to(device) # cluster parameter initiate model.eval() targets = np.zeros(len(eval_loader.dataset)) feats = np.zeros((len(eval_loader.dataset), 512)) for _, (x, label, idx) in enumerate(eval_loader): x = x.to(device) feat = model(x) idx = idx.data.cpu().numpy() feats[idx, :] = feat.data.cpu().numpy() targets[idx] = label.data.cpu().numpy() # evaluate clustering performance pca = PCA(n_components=args.n_clusters) feats = pca.fit_transform(feats) kmeans = KMeans(n_clusters=args.n_clusters, n_init=20) y_pred = kmeans.fit_predict(feats) acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred) print('Init acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari)) probs = feat2prob(torch.from_numpy(feats), torch.from_numpy(kmeans.cluster_centers_)) return acc, nmi, ari, kmeans.cluster_centers_, probs
Example #22
Source File: my_surgery.py From GraphRicciCurvature with Apache License 2.0 | 5 votes |
def ARI(G, cc, clustering_label="club"): """ Computer the Adjust Rand Index (clustering accuray) of clustering "cc" with clustering_label as ground truth. :param G: A networkx graph :param cc: A clustering result as list of connected components list :param clustering_label: Node label for clustering groundtruth """ if importlib.util.find_spec("sklearn") is not None: from sklearn import preprocessing, metrics else: print("scikit-learn not installed...") return -1 complexlist = nx.get_node_attributes(G, clustering_label) le = preprocessing.LabelEncoder() y_true = le.fit_transform(list(complexlist.values())) predict_dict = {} for idx, comp in enumerate(cc): for c in list(comp): predict_dict[c] = idx y_pred = [] for v in complexlist.keys(): y_pred.append(predict_dict[v]) y_pred = np.array(y_pred) return metrics.adjusted_rand_score(y_true, y_pred)
Example #23
Source File: test_metrics.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_adjusted_rand_score(self): result = self.df.metrics.adjusted_rand_score() expected = metrics.adjusted_rand_score(self.target, self.pred) self.assertEqual(result, expected)
Example #24
Source File: plot_kmeans_digits.py From Computer-Vision-with-Python-3 with MIT License | 5 votes |
def bench_k_means(estimator, name, data): t0 = time() estimator.fit(data) print('% 9s %.2fs %i %.3f %.3f %.3f %.3f %.3f %.3f' % (name, (time() - t0), estimator.inertia_, metrics.homogeneity_score(labels, estimator.labels_), metrics.completeness_score(labels, estimator.labels_), metrics.v_measure_score(labels, estimator.labels_), metrics.adjusted_rand_score(labels, estimator.labels_), metrics.adjusted_mutual_info_score(labels, estimator.labels_), metrics.silhouette_score(data, estimator.labels_, metric='euclidean', sample_size=sample_size)))
Example #25
Source File: graph_eval.py From nodevectors with MIT License | 5 votes |
def evalClusteringOnLabels(clusters, groupLabels, verbose=True): """ Evaluates clustering against labels Alternative methodology to label prediction for testing """ results = [] results.append(metrics.adjusted_mutual_info_score(clusters, groupLabels)) results.append(metrics.adjusted_rand_score(clusters, groupLabels)) results.append(metrics.fowlkes_mallows_score(clusters, groupLabels)) if verbose: print(f"MI: {results[0]:.2f}, RAND {results[2]:.2f}, FM: {results[2]:.2f}") return dict(zip(['MI', 'RAND', 'FM'], np.array(results)))
Example #26
Source File: test_models.py From graspy with Apache License 2.0 | 5 votes |
def test_SBM_fit_unsupervised(self): np.random.seed(12345) n_verts = 1500 B = np.array([[0.7, 0.1, 0.1], [0.1, 0.9, 0.1], [0.05, 0.1, 0.75]]) n = np.array([500, 500, 500]) labels = _n_to_labels(n) p_mat = _block_to_full(B, labels, (n_verts, n_verts)) p_mat -= np.diag(np.diag(p_mat)) graph = sample_edges(p_mat, directed=True, loops=False) sbe = SBMEstimator(directed=True, loops=False) sbe.fit(graph) assert adjusted_rand_score(labels, sbe.vertex_assignments_) > 0.95 assert_allclose(p_mat, sbe.p_mat_, atol=0.12)
Example #27
Source File: HAN.py From OpenHINE with MIT License | 5 votes |
def my_Kmeans(x, y, k=4, time=10, return_NMI=False): x = np.array(x) x = np.squeeze(x) y = np.array(y) if len(y.shape) > 1: y = np.argmax(y, axis=1) estimator = KMeans(n_clusters=k) ARI_list = [] # adjusted_rand_score( NMI_list = [] if time: # print('KMeans exps {}次 æ±~B平å~]~G '.format(time)) for i in range(time): estimator.fit(x, y) y_pred = estimator.predict(x) score = normalized_mutual_info_score(y, y_pred) NMI_list.append(score) s2 = adjusted_rand_score(y, y_pred) ARI_list.append(s2) # print('NMI_list: {}'.format(NMI_list)) score = sum(NMI_list) / len(NMI_list) s2 = sum(ARI_list) / len(ARI_list) print('NMI (10 avg): {:.4f} , ARI (10avg): {:.4f}'.format(score, s2)) else: estimator.fit(x, y) y_pred = estimator.predict(x) score = normalized_mutual_info_score(y, y_pred) print("NMI on all label data: {:.5f}".format(score)) if return_NMI: return score, s2
Example #28
Source File: test.py From OpenHINE with MIT License | 5 votes |
def evaluate_cluster(self, embedding_list): X = [] Y = [] for p in self.label: X.append(embedding_list[p]) Y.append(self.label[p]) Y_pred = KMeans(self.n_label, random_state=self.seed).fit(np.array(X)).predict(X) nmi = normalized_mutual_info_score(np.array(Y), Y_pred) ari = adjusted_rand_score(np.array(Y), Y_pred) return nmi, ari
Example #29
Source File: test_spectral_embed.py From graspy with Apache License 2.0 | 5 votes |
def _kmeans_comparison(data, labels, n_clusters): """ Function for comparing the ARIs of kmeans clustering for arbitrary number of data/labels Parameters ---------- data: list-like each element in the list is a dataset to perform k-means on labels: list-like each element in the list is a set of lables with the same number of points as the corresponding data n_clusters: int the number of clusters to use for k-means Returns ------- aris: list, length the same as data/labels the i-th element in the list is an ARI (Adjusted Rand Index) corresponding to the result of k-means clustering on the i-th data/labels """ if len(data) != len(labels): raise ValueError("Must have same number of labels and data") aris = [] for i in range(0, len(data)): kmeans_prediction = KMeans(n_clusters=n_clusters).fit_predict(data[i]) aris.append(adjusted_rand_score(labels[i], kmeans_prediction)) return aris
Example #30
Source File: base.py From graspy with Apache License 2.0 | 5 votes |
def predict(self, X, y=None): # pragma: no cover """ Predict clusters based on best model. Parameters ---------- X : array-like, shape (n_samples, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. y : array-like, shape (n_samples, ), optional (default=None) List of labels for X if available. Used to compute ARI scores. Returns ------- labels : array, shape (n_samples,) Component labels. ari : float Adjusted Rand index. Only returned if y is given. """ # Check if fit is already called check_is_fitted(self, ["model_"], all_or_any=all) labels = self.model_.predict(X) if y is None: return labels else: ari = adjusted_rand_score(y, labels) return labels, ari