Python Examples of sklearn.metrics.adjusted_mutual_info

Source File: DCCComputation.py From DCC with MIT License

6 votes

def benchmarking(gtlabels, labels):
    # TODO: Please note that the AMI definition used in the paper differs from that in the sklearn python package.
    # TODO: Please modify it accordingly.
    numeval = len(gtlabels)
    ari = metrics.adjusted_rand_score(gtlabels[:numeval], labels[:numeval])
    ami = metrics.adjusted_mutual_info_score(gtlabels[:numeval], labels[:numeval])
    nmi = metrics.normalized_mutual_info_score(gtlabels[:numeval], labels[:numeval])
    acc = clustering_accuracy(gtlabels[:numeval], labels[:numeval])

    return ari, ami, nmi, acc

Source File: test_builder.py From gordo with GNU Affero General Public License v3.0

6 votes

def test_metrics_from_list():
    """
    Check getting functions from a list of metric names
    """
    default = ModelBuilder.metrics_from_list()
    assert default == [
        metrics.explained_variance_score,
        metrics.r2_score,
        metrics.mean_squared_error,
        metrics.mean_absolute_error,
    ]

    specifics = ModelBuilder.metrics_from_list(
        ["sklearn.metrics.adjusted_mutual_info_score", "sklearn.metrics.r2_score"]
    )
    assert specifics == [metrics.adjusted_mutual_info_score, metrics.r2_score]

Source File: k_means_plot.py From machine-learning with GNU General Public License v3.0

6 votes

def bench_k_means(estimator, name, data):
    t0 = time()
    estimator.fit(data)
    print('% 9s   %.2fs    %i   %.3f   %.3f   %.3f   %.3f   %.3f    %.3f'
          % (name, (time() - t0), estimator.inertia_,
             metrics.homogeneity_score(labels, estimator.labels_),
             metrics.completeness_score(labels, estimator.labels_),
             metrics.v_measure_score(labels, estimator.labels_),
             metrics.adjusted_rand_score(labels, estimator.labels_),
             metrics.adjusted_mutual_info_score(labels,  estimator.labels_),
             metrics.silhouette_score(data, estimator.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size)))

Source File: utils.py From Neural-EM with MIT License

6 votes

def evaluate_groups(true_groups, predicted):
    """ Compute the AMI score and corresponding mean confidence for given gammas.
    :param true_groups: (B, 1, W, H, 1)
    :param predicted: (B, K, W, H, 1)
    :return: scores, confidences (B,)
    """
    scores, confidences = [], []
    assert true_groups.ndim == predicted.ndim == 5, true_groups.shape
    batch_size, K = predicted.shape[:2]
    true_groups = true_groups.reshape(batch_size, -1)
    predicted = predicted.reshape(batch_size, K, -1)
    predicted_groups = predicted.argmax(1)
    predicted_conf = predicted.max(1)
    for i in range(batch_size):
        true_group = true_groups[i]
        idxs = np.where(true_group != 0.0)[0]
        scores.append(adjusted_mutual_info_score(true_group[idxs], predicted_groups[i, idxs]))
        confidences.append(np.mean(predicted_conf[i, idxs]))

    return scores, confidences

Source File: utils.py From Relational-NEM with MIT License

6 votes

def evaluate_groups(true_groups, predicted):
    """ Compute the AMI score and corresponding mean confidence for given gammas.
    :param true_groups: (B, 1, W, H, 1)
    :param predicted: (B, K, W, H, 1)
    :return: scores, confidences (B,)
    """
    scores, confidences = [], []
    assert true_groups.ndim == predicted.ndim == 5, true_groups.shape
    batch_size, K = predicted.shape[:2]
    true_groups = true_groups.reshape(batch_size, -1)
    predicted = predicted.reshape(batch_size, K, -1)
    predicted_groups = predicted.argmax(1)
    predicted_conf = predicted.max(1)
    for i in range(batch_size):
        true_group = true_groups[i]
        idxs = np.where(true_group != 0.0)[0]
        scores.append(adjusted_mutual_info_score(true_group[idxs], predicted_groups[i, idxs]))
        confidences.append(np.mean(predicted_conf[i, idxs]))

    return scores, confidences

Source File: all_types.py From CausalDiscoveryToolbox with MIT License

5 votes

def predict(self, a, b, **kwargs):
        """Perform the independence test.

        :param a: input data
        :param b: input data
        :type a: array-like, numerical data
        :type b: array-like, numerical data
        :return: dependency statistic (1=Highly dependent, 0=Not dependent)
        :rtype: float
        """
        binning_alg = kwargs.get('bins', 'fd')
        return metrics.adjusted_mutual_info_score(bin_variable(a, bins=binning_alg),
                                                  bin_variable(b, bins=binning_alg))

Source File: perf_indicators.py From SecuML with GNU General Public License v2.0

5 votes

def to_json(self):
        return {'homogeneity': self.homogeneity,
                'completeness': self.completeness,
                'v_measure': self.v_measure,
                'adjusted_rand_score': self.adjusted_rand_score,
                'adjusted_mutual_info_score': self.adjusted_mutual_info_score}

Source File: perf_indicators.py From SecuML with GNU General Public License v2.0

5 votes

def compute_adjusted_evaluations(self, labels_families,
                                     predicted_clusters):
        if labels_families is None:
            self.adjusted_rand_score = 0
            self.adjusted_mutual_info_score = 0
            return
        self.adjusted_rand_score = metrics.adjusted_rand_score(
            labels_families, predicted_clusters)
        self.adjusted_mutual_info_score = metrics.adjusted_mutual_info_score(
            labels_families, predicted_clusters, average_method='arithmetic')

Source File: Clique.py From Clique with MIT License

5 votes

def evaluate_clustering_performance(clusters, labels):
    set_of_dimensionality = set()
    for cluster in clusters:
        set_of_dimensionality.add(frozenset(cluster.dimensions))

    # Evaluating performance in all dimensionality
    for dim in set_of_dimensionality:
        print("\nEvaluating clusters in dimension: ", list(dim))
        # Finding clusters with same dimensions
        clusters_in_dim = []
        for c in clusters:
            if c.dimensions == dim:
                clusters_in_dim.append(c)
        clustering_labels = np.zeros(np.shape(labels))
        for i, c in enumerate(clusters_in_dim):
            clustering_labels[list(c.data_point_ids)] = i + 1

        print("Number of clusters: ", len(clusters_in_dim))
        print("Adjusted Rand index: ", metrics.adjusted_rand_score(
            labels, clustering_labels))
        print("Mutual Information: ", metrics.adjusted_mutual_info_score(
            labels, clustering_labels))

        print("Homogeneity, completeness, V-measure: ",
              metrics.homogeneity_completeness_v_measure(labels, clustering_labels))

        print("Fowlkes-Mallows: ",
              metrics.fowlkes_mallows_score(labels, clustering_labels))

Source File: accuracy_calculator.py From pytorch-metric-learning with MIT License

5 votes

def calculate_AMI(self, query_labels, cluster_labels, **kwargs):
        return adjusted_mutual_info_score(query_labels, cluster_labels)

Source File: test_metrics.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def test_adjusted_mutual_info_score(self):
        result = self.df.metrics.adjusted_mutual_info_score()
        expected = metrics.adjusted_mutual_info_score(self.target, self.pred)
        self.assertEqual(result, expected)

Source File: plot_kmeans_digits.py From Computer-Vision-with-Python-3 with MIT License

5 votes

def bench_k_means(estimator, name, data):
    t0 = time()
    estimator.fit(data)
    print('% 9s   %.2fs    %i   %.3f   %.3f   %.3f   %.3f   %.3f    %.3f'
          % (name, (time() - t0), estimator.inertia_,
             metrics.homogeneity_score(labels, estimator.labels_),
             metrics.completeness_score(labels, estimator.labels_),
             metrics.v_measure_score(labels, estimator.labels_),
             metrics.adjusted_rand_score(labels, estimator.labels_),
             metrics.adjusted_mutual_info_score(labels,  estimator.labels_),
             metrics.silhouette_score(data, estimator.labels_,
                                      metric='euclidean',
                                      sample_size=sample_size)))

Source File: all_types.py From CausalDiscoveryToolbox with MIT License

5 votes

def predict(self, a, b, **kwargs):
        """Perform the independence test.

        :param a: input data
        :param b: input data
        :type a: array-like, numerical data
        :type b: array-like, numerical data
        :return: dependency statistic (1=Highly dependent, 0=Not dependent)
        :rtype: float
        """
        binning_alg = kwargs.get('bins', 'fd')
        return metrics.adjusted_mutual_info_score(bin_variable(a, bins=binning_alg),
                                                  bin_variable(b, bins=binning_alg))

Source File: k_means_clustering.py From FunUtils with MIT License

5 votes

def bench_k_means(estimator, name, data):
    estimator.fit(data)
    # A short explanation for every score:
    # homogeneity:          each cluster contains only members of a single class (range 0 - 1)
    # completeness:         all members of a given class are assigned to the same cluster (range 0 - 1)
    # v_measure:            harmonic mean of homogeneity and completeness
    # adjusted_rand:        similarity of the actual values and their predictions,
    #                       ignoring permutations and with chance normalization
    #                       (range -1 to 1, -1 being bad, 1 being perfect and 0 being random)
    # adjusted_mutual_info: agreement of the actual values and predictions, ignoring permutations
    #                       (range 0 - 1, with 0 being random agreement and 1 being perfect agreement)
    # silhouette:           uses the mean distance between a sample and all other points in the same class,
    #                       as well as the mean distance between a sample and all other points in the nearest cluster
    #                       to calculate a score (range: -1 to 1, with the former being incorrect,
    #                       and the latter standing for highly dense clustering.
    #                       0 indicates overlapping clusters.
    print('%-9s \t%i \thomogeneity: %.3f \tcompleteness: %.3f \tv-measure: %.3f \tadjusted-rand: %.3f \t'
          'adjusted-mutual-info: %.3f \tsilhouette: %.3f'
          % (name, estimator.inertia_,
             metrics.homogeneity_score(y, estimator.labels_),
             metrics.completeness_score(y, estimator.labels_),
             metrics.v_measure_score(y, estimator.labels_),
             metrics.adjusted_rand_score(y, estimator.labels_),
             metrics.adjusted_mutual_info_score(y,  estimator.labels_),
             metrics.silhouette_score(data, estimator.labels_,
                                      metric='euclidean')))

Source File: features.py From CausalDiscoveryToolbox with MIT License

5 votes

def adjusted_mutual_information(x, tx, y, ty, ffactor=3, maxdev=3):
    x, y = discretized_sequences(x, tx, y, ty, ffactor, maxdev)
    try:
        return adjusted_mutual_info_score(x, y)
    except ValueError:
        return adjusted_mutual_info_score(x.squeeze(1), y.squeeze(1))

Source File: metric_loss_ops.py From cluster-loss-tensorflow with BSD 2-Clause "Simplified" License

5 votes

def _compute_ami_score(labels, predictions):
  ami_score = math_ops.to_float(
      script_ops.py_func(
          metrics.adjusted_mutual_info_score, [labels, predictions],
          [dtypes.float64],
          name='ami'))
  return math_ops.maximum(0.0, ami_score)

Source File: structural_tests.py From drifter_ml with MIT License

5 votes

def mutual_info_dbscan_scorer(self, min_similarity):
        return self.dbscan_scorer(
            metrics.adjusted_mutual_info_score,
            min_similarity
        )

Source File: structural_tests.py From drifter_ml with MIT License

5 votes

def mutual_info_kmeans_scorer(self, min_similarity):
        return self.kmeans_scorer(
            metrics.adjusted_mutual_info_score,
            min_similarity
        )

Source File: graph_eval.py From nodevectors with MIT License

5 votes

def evalClusteringOnLabels(clusters, groupLabels, verbose=True):
    """
    Evaluates clustering against labels
    Alternative methodology to label prediction for testing
    """
    results = []
    results.append(metrics.adjusted_mutual_info_score(clusters, groupLabels))
    results.append(metrics.adjusted_rand_score(clusters, groupLabels))
    results.append(metrics.fowlkes_mallows_score(clusters, groupLabels))
    if verbose:
        print(f"MI: {results[0]:.2f}, RAND {results[2]:.2f}, FM: {results[2]:.2f}")
    return dict(zip(['MI', 'RAND', 'FM'], np.array(results)))

Source File: comparison.py From cdlib with BSD 2-Clause "Simplified" License

4 votes

def adjusted_mutual_information(first_partition, second_partition):
    """Adjusted Mutual Information between two clusterings.

    Adjusted Mutual Information (AMI) is an adjustment of the Mutual
    Information (MI) score to account for chance. It accounts for the fact that
    the MI is generally higher for two clusterings with a larger number of
    clusters, regardless of whether there is actually more information shared.
    For two clusterings :math:`U` and :math:`V`, the AMI is given as::

        AMI(U, V) = [MI(U, V) - E(MI(U, V))] / [max(H(U), H(V)) - E(MI(U, V))]

    This metric is independent of the absolute values of the labels:
    a permutation of the class or cluster label values won't change the
    score value in any way.

    This metric is furthermore symmetric: switching ``label_true`` with
    ``label_pred`` will return the same score value. This can be useful to
    measure the agreement of two independent label assignments strategies
    on the same dataset when the real ground truth is not known.

    Be mindful that this function is an order of magnitude slower than other
    metrics, such as the Adjusted Rand Index.

    :param first_partition: NodeClustering object
    :param second_partition: NodeClustering object
    :return: MatchingResult object

    :Example:

    >>> from cdlib import evaluation, algorithms
    >>> g = nx.karate_club_graph()
    >>> louvain_communities = algorithms.louvain(g)
    >>> leiden_communities = algorithms.leiden(g)
    >>> evaluation.adjusted_mutual_information(louvain_communities,leiden_communities)

    :Reference:

    1. Vinh, N. X., Epps, J., & Bailey, J. (2010). `Information theoretic measures for clusterings comparison: Variants, properties, normalization and correction for chance. <http://jmlr.csail.mit.edu/papers/volume11/vinh10a/vinh10a.pdf/>`_ Journal of Machine Learning Research, 11(Oct), 2837-2854.
    """

    __check_partition_coverage(first_partition, second_partition)
    __check_partition_overlap(first_partition, second_partition)

    first_partition_c = [x[1]
                         for x in sorted([(node, nid)
                                          for nid, cluster in enumerate(first_partition.communities)
                                          for node in cluster], key=lambda x: x[0])]

    second_partition_c = [x[1]
                          for x in sorted([(node, nid)
                                           for nid, cluster in enumerate(second_partition.communities)
                                           for node in cluster], key=lambda x: x[0])]

    from sklearn.metrics import adjusted_mutual_info_score
    return MatchingResult(score=adjusted_mutual_info_score(first_partition_c, second_partition_c))

Python sklearn.metrics.adjusted_mutual_info_score() Examples