Python Examples of scipy.optimize.linear_sum

Source File: sort.py From sort with GNU General Public License v3.0

7 votes

def linear_assignment(cost_matrix):
  try:
    import lap
    _, x, y = lap.lapjv(cost_matrix, extend_cost=True)
    return np.array([[y[i],i] for i in x if i >= 0]) #
  except ImportError:
    from scipy.optimize import linear_sum_assignment
    x, y = linear_sum_assignment(cost_matrix)
    return np.array(list(zip(x, y)))

Source File: mot.py From PoseWarper with Apache License 2.0

6 votes

def _sanitize_dists(self, dists):
        """Replace invalid distances."""
        
        dists = np.copy(dists)
        
        # Note there is an issue in scipy.optimize.linear_sum_assignment where
        # it runs forever if an entire row/column is infinite or nan. We therefore
        # make a copy of the distance matrix and compute a safe value that indicates
        # 'cannot assign'. Also note + 1 is necessary in below inv-dist computation
        # to make invdist bigger than max dist in case max dist is zero.
        
        valid_dists = dists[np.isfinite(dists)]
        INVDIST = 2 * valid_dists.max() + 1 if valid_dists.shape[0] > 0 else 1.
        dists[~np.isfinite(dists)] = INVDIST  

        return dists, INVDIST

Source File: eval.py From stacked_capsule_autoencoders with Apache License 2.0

6 votes

def bipartite_match(pred, gt, n_classes=None, presence=None):
  """Does maximum biprartite matching between `pred` and `gt`."""

  if n_classes is not None:
    n_gt_labels, n_pred_labels = n_classes, n_classes
  else:
    n_gt_labels = np.unique(gt).shape[0]
    n_pred_labels = np.unique(pred).shape[0]

  cost_matrix = np.zeros([n_gt_labels, n_pred_labels], dtype=np.int32)
  for label in range(n_gt_labels):
    label_idx = (gt == label)
    for new_label in range(n_pred_labels):
      errors = np.equal(pred[label_idx], new_label).astype(np.float32)
      if presence is not None:
        errors *= presence[label_idx]

      num_errors = errors.sum()
      cost_matrix[label, new_label] = -num_errors

  row_idx, col_idx = linear_sum_assignment(cost_matrix)
  num_correct = -cost_matrix[row_idx, col_idx].sum()
  acc = float(num_correct) / gt.shape[0]
  return AttrDict(assingment=(row_idx, col_idx), acc=acc,
                  num_correct=num_correct)

Source File: utils.py From pt-sdae with MIT License

6 votes

def cluster_accuracy(y_true, y_predicted, cluster_number: Optional[int] = None):
    """
    Calculate clustering accuracy after using the linear_sum_assignment function in SciPy to
    determine reassignments.

    :param y_true: list of true cluster numbers, an integer array 0-indexed
    :param y_predicted: list  of predicted cluster numbers, an integer array 0-indexed
    :param cluster_number: number of clusters, if None then calculated from input
    :return: reassignment dictionary, clustering accuracy
    """
    if cluster_number is None:
        cluster_number = (
            max(y_predicted.max(), y_true.max()) + 1
        )  # assume labels are 0-indexed
    count_matrix = np.zeros((cluster_number, cluster_number), dtype=np.int64)
    for i in range(y_predicted.size):
        count_matrix[y_predicted[i], y_true[i]] += 1

    row_ind, col_ind = linear_sum_assignment(count_matrix.max() - count_matrix)
    reassignment = dict(zip(row_ind, col_ind))
    accuracy = count_matrix[row_ind, col_ind].sum() / y_predicted.size
    return reassignment, accuracy

Source File: posterior_utils.py From scVI with MIT License

6 votes

def unsupervised_clustering_accuracy(
    y: Union[np.ndarray, torch.Tensor], y_pred: Union[np.ndarray, torch.Tensor]
) -> tuple:
    """Unsupervised Clustering Accuracy
    """
    assert len(y_pred) == len(y)
    u = np.unique(np.concatenate((y, y_pred)))
    n_clusters = len(u)
    mapping = dict(zip(u, range(n_clusters)))
    reward_matrix = np.zeros((n_clusters, n_clusters), dtype=np.int64)
    for y_pred_, y_ in zip(y_pred, y):
        if y_ in mapping:
            reward_matrix[mapping[y_pred_], mapping[y_]] += 1
    cost_matrix = reward_matrix.max() - reward_matrix
    row_assign, col_assign = linear_sum_assignment(cost_matrix)

    # Construct optimal assignments matrix
    row_assign = row_assign.reshape((-1, 1))  # (n,) to (n, 1) reshape
    col_assign = col_assign.reshape((-1, 1))  # (n,) to (n, 1) reshape
    assignments = np.concatenate((row_assign, col_assign), axis=1)

    optimal_reward = reward_matrix[row_assign, col_assign].sum() * 1.0
    return optimal_reward / y_pred.size, assignments

Source File: mot.py From DetectAndTrack with Apache License 2.0

6 votes

def _sanitize_dists(self, dists):
        """Replace invalid distances."""
        
        dists = np.copy(dists)
        
        # Note there is an issue in scipy.optimize.linear_sum_assignment where
        # it runs forever if an entire row/column is infinite or nan. We therefore
        # make a copy of the distance matrix and compute a safe value that indicates
        # 'cannot assign'. Also note + 1 is necessary in below inv-dist computation
        # to make invdist bigger than max dist in case max dist is zero.
        
        valid_dists = dists[np.isfinite(dists)]
        INVDIST = 2 * valid_dists.max() + 1 if valid_dists.shape[0] > 0 else 1.
        dists[~np.isfinite(dists)] = INVDIST  

        return dists, INVDIST

Source File: tfa.py From brainiak with Apache License 2.0

6 votes

def _assign_posterior(self):
        """assign posterior to prior based on Hungarian algorithm

        Returns
        -------
        TFA
            Returns the instance itself.
        """

        prior_centers = self.get_centers(self.local_prior)
        posterior_centers = self.get_centers(self.local_posterior_)
        posterior_widths = self.get_widths(self.local_posterior_)
        # linear assignment on centers
        cost = distance.cdist(prior_centers, posterior_centers, 'euclidean')
        _, col_ind = linear_sum_assignment(cost)
        # reorder centers/widths based on cost assignment
        self.set_centers(self.local_posterior_, posterior_centers[col_ind])
        self.set_widths(self.local_posterior_, posterior_widths[col_ind])
        return self

Source File: utils.py From pt-dec with MIT License

6 votes

def cluster_accuracy(y_true, y_predicted, cluster_number: Optional[int] = None):
    """
    Calculate clustering accuracy after using the linear_sum_assignment function in SciPy to
    determine reassignments.

    :param y_true: list of true cluster numbers, an integer array 0-indexed
    :param y_predicted: list  of predicted cluster numbers, an integer array 0-indexed
    :param cluster_number: number of clusters, if None then calculated from input
    :return: reassignment dictionary, clustering accuracy
    """
    if cluster_number is None:
        cluster_number = (
            max(y_predicted.max(), y_true.max()) + 1
        )  # assume labels are 0-indexed
    count_matrix = np.zeros((cluster_number, cluster_number), dtype=np.int64)
    for i in range(y_predicted.size):
        count_matrix[y_predicted[i], y_true[i]] += 1

    row_ind, col_ind = linear_sum_assignment(count_matrix.max() - count_matrix)
    reassignment = dict(zip(row_ind, col_ind))
    accuracy = count_matrix[row_ind, col_ind].sum() / y_predicted.size
    return reassignment, accuracy

Source File: test_hungarian.py From GraphicDesignPatternByPython with MIT License

6 votes

def test_linear_sum_assignment_input_validation():
    assert_raises(ValueError, linear_sum_assignment, [1, 2, 3])

    C = [[1, 2, 3], [4, 5, 6]]
    assert_array_equal(linear_sum_assignment(C),
                       linear_sum_assignment(np.asarray(C)))
    assert_array_equal(linear_sum_assignment(C),
                       linear_sum_assignment(np.matrix(C)))

    I = np.identity(3)
    assert_array_equal(linear_sum_assignment(I.astype(np.bool)),
                       linear_sum_assignment(I))
    assert_raises(ValueError, linear_sum_assignment, I.astype(str))

    I[0][0] = np.nan
    assert_raises(ValueError, linear_sum_assignment, I)

    I = np.identity(3)
    I[1][1] = np.inf
    assert_raises(ValueError, linear_sum_assignment, I)

Source File: davis_evaluation.py From video_analyst with MIT License

5 votes

def _evaluate_unsupervised(all_gt_masks,
                               all_res_masks,
                               all_void_masks,
                               metric,
                               max_n_proposals=20):
        if all_res_masks.shape[0] > max_n_proposals:
            sys.stdout.write(
                "\nIn your PNG files there is an index higher than the maximum number ({}) of proposals allowed!"
                .format(max_n_proposals))
            sys.exit()
        elif all_res_masks.shape[0] < all_gt_masks.shape[0]:
            zero_padding = np.zeros(
                (all_gt_masks.shape[0] - all_res_masks.shape[0],
                 *all_res_masks.shape[1:]))
            all_res_masks = np.concatenate([all_res_masks, zero_padding],
                                           axis=0)
        j_metrics_res = np.zeros((all_res_masks.shape[0], all_gt_masks.shape[0],
                                  all_gt_masks.shape[1]))
        f_metrics_res = np.zeros((all_res_masks.shape[0], all_gt_masks.shape[0],
                                  all_gt_masks.shape[1]))
        for ii in range(all_gt_masks.shape[0]):
            for jj in range(all_res_masks.shape[0]):
                if 'J' in metric:
                    j_metrics_res[jj, ii, :] = db_eval_iou(
                        all_gt_masks[ii, ...], all_res_masks[jj, ...],
                        all_void_masks)
                if 'F' in metric:
                    f_metrics_res[jj, ii, :] = db_eval_boundary(
                        all_gt_masks[ii, ...], all_res_masks[jj, ...],
                        all_void_masks)
        if 'J' in metric and 'F' in metric:
            all_metrics = (np.mean(j_metrics_res, axis=2) +
                           np.mean(f_metrics_res, axis=2)) / 2
        else:
            all_metrics = np.mean(j_metrics_res,
                                  axis=2) if 'J' in metric else np.mean(
                                      f_metrics_res, axis=2)
        row_ind, col_ind = linear_sum_assignment(-all_metrics)
        return j_metrics_res[row_ind, col_ind, :], f_metrics_res[row_ind,
                                                                 col_ind, :]

Source File: simple_line.py From marl_transfer with MIT License

5 votes

def _bipartite_min_dists(self, dists):
        ri, ci = linear_sum_assignment(dists)
        min_dists = dists[ri, ci]
        return min_dists

Source File: faces_database.py From open_model_zoo with Apache License 2.0

5 votes

def match_faces(self, descriptors, match_algo='HUNGARIAN'):
        database = self.database
        distances = np.empty((len(descriptors), len(database)))
        for i, desc in enumerate(descriptors):
            for j, identity in enumerate(database):
                dist = []
                for k, id_desc in enumerate(identity.descriptors):
                    dist.append(FacesDatabase.Identity.cosine_dist(desc, id_desc))
                distances[i][j] = dist[np.argmin(dist)]

        matches = []
        # if user specify MIN_DIST for face matching, face with minium cosine distance will be selected.
        if match_algo == 'MIN_DIST':
            for i in range(len(descriptors)):
                id = np.argmin(distances[i])
                min_dist = distances[i][id]
                matches.append((id, min_dist))
        else:
            # Find best assignments, prevent repeats, assuming faces can not repeat
            _, assignments = linear_sum_assignment(distances)
            for i in range(len(descriptors)):
                if len(assignments) <= i: # assignment failure, too many faces
                    matches.append((0, 1.0))
                    continue

                id = assignments[i]
                distance = distances[i, id]
                matches.append((id, distance))

        return matches

Source File: sct.py From open_model_zoo with Apache License 2.0

5 votes

def _continue_tracks(self, detections, features):
        active_tracks_idx = []
        for i, track in enumerate(self.tracks):
            if track.get_end_time() >= self.time - self.continue_time_thresh:
                active_tracks_idx.append(i)

        occluded_det_idx = []
        for i, det1 in enumerate(detections):
            for j, det2 in enumerate(detections):
                if i != j and self._ios(det1, det2) > self.detection_occlusion_thresh:
                    occluded_det_idx.append(i)
                    features[i] = None
                    break

        cost_matrix = self._compute_detections_assignment_cost(active_tracks_idx, detections, features)

        assignment = [None for _ in range(cost_matrix.shape[0])]
        if cost_matrix.size > 0:
            row_ind, col_ind = linear_sum_assignment(cost_matrix)
            for i, j in zip(row_ind, col_ind):
                idx = active_tracks_idx[j]
                if cost_matrix[i, j] < self.match_threshold and \
                    self._check_velocity_constraint(self.tracks[idx].get_last_box(),
                                                    self.tracks[idx].get_end_time(),
                                                    detections[i], self.time) and \
                        self._iou(self.tracks[idx].boxes[-1], detections[i]) > self.track_detection_iou_thresh:
                    assignment[i] = j

            for i, j in enumerate(assignment):
                if j is not None:
                    idx = active_tracks_idx[j]
                    crop = self.current_detections[i] if self.current_detections is not None else None
                    self.tracks[idx].add_detection(detections[i], features[i],
                                                   self.time, self.continue_time_thresh,
                                                   self.detection_filter_speed, crop)
        return assignment

Source File: model.py From articulated-part-induction with MIT License

5 votes

def hungarian_matching(pred_x, gt_x, curnmasks):
    """ pred_x, gt_x: B x nmask x nsmp
        curnmasks: B
        return matching_idx: B x nmask x 2 """
    batch_size = gt_x.shape[0]
    nmask = gt_x.shape[1]
    matching_score = np.matmul(gt_x,np.transpose(pred_x,axes=[0,2,1])) # B x nmask x nmask
    matching_score = 1-np.divide(matching_score, np.expand_dims(np.sum(pred_x,2),1)+np.sum(gt_x,2,keepdims=True)-matching_score+1e-8)
    matching_idx = np.zeros((batch_size, nmask, 2)).astype('int32')
    curnmasks = curnmasks.astype('int32')
    for i, curnmask in enumerate(curnmasks):
        row_ind, col_ind = linear_sum_assignment(matching_score[i,:curnmask,:curnmask])
        matching_idx[i,:curnmask,0] = row_ind
        matching_idx[i,:curnmask,1] = col_ind
    return matching_idx

Source File: topic_mapping.py From TopicNet with MIT License

5 votes

def compute_topic_mapping(matrix_left, matrix_right, metric='euclidean'):
    """
    This function provides mapping of topics
    from one model to the topics of the other model
    based on their simmularity defined by the metrics.

    Parameters
    ----------
    matrix_left : np.array
        a matrix of N1 topics x M tokens from the first model
        each row is a cluster in M-dimensional feature space
    matrix_right : np.array
        a matrix of N2 topics x M tokens from the second model
        each row is a cluster in M-dimensional feature space
    metric : str or class
        a string defining metric to use, or function that computes
        pairwise distance between 2 matrices (Default value = 'euclidean')

    Returns
    -------
    tuple of ndarrays
        returns two ndarrays of indices, where each index
        corresponds to a topic from respective models

    """
    if isinstance(metric, str):
        costs = distance.cdist(matrix_left, matrix_right, metric=metric)
    else:
        costs = metric(matrix_left, matrix_right)

    results = optimize.linear_sum_assignment(costs)
    return results

Source File: matching.py From video-to-pose3D with MIT License

5 votes

def matching(pose_preds, matrix, kp_groups):
    index = []
    for k in range(17):
        human_ind, joint_ind = linear_sum_assignment(matrix[k])
        # human_ind, joint_ind = greedy_matching(matrix[k])

        index.append(list(zip(human_ind, joint_ind)))

    for n, person in pose_preds.items():
        for k in range(17):
            g_id = person['group_id'][k]
            if g_id is not None:
                g_id = int(g_id) - 1
                h_id = n

                x, y, s = pose_preds[n][k][0]
                if ((h_id, g_id) not in index[k]) and len(pose_preds[n][k]) > 1:
                    pose_preds[n][k] = np.delete(pose_preds[n][k], 0, 0)
                elif ((h_id, g_id) not in index[k]) and len(person[k]) == 1:
                    x, y, _ = pose_preds[n][k][0]
                    pose_preds[n][k][0] = (x, y, 1e-5)
                    pass
                elif ((h_id, g_id) in index[k]):
                    x, y = kp_groups[k][g_id + 1]['group_center']
                    s = pose_preds[n][k][0][2]
                    pose_preds[n][k][0] = (x, y, s)

    return pose_preds

Source File: accuracy.py From subspace-clustering with MIT License

5 votes

def clustering_accuracy(labels_true, labels_pred):
    """Clustering Accuracy between two clusterings.
    Clustering Accuracy is a measure of the similarity between two labels of
    the same data. Assume that both labels_true and labels_pred contain n 
    distinct labels. Clustering Accuracy is the maximum accuracy over all
    possible permutations of the labels, i.e.
    \max_{\sigma} \sum_i labels_true[i] == \sigma(labels_pred[i])
    where \sigma is a mapping from the set of unique labels of labels_pred to
    the set of unique labels of labels_true. Clustering accuracy is one if 
    and only if there is a permutation of the labels such that there is an
    exact match
    This metric is independent of the absolute values of the labels:
    a permutation of the class or cluster label values won't change the
    score value in any way.
    This metric is furthermore symmetric: switching ``label_true`` with
    ``label_pred`` will return the same score value. This can be useful to
    measure the agreement of two independent label assignments strategies
    on the same dataset when the real ground truth is not known.
    
    Parameters
    ----------
    labels_true : int array, shape = [n_samples]
    	A clustering of the data into disjoint subsets.
    labels_pred : array, shape = [n_samples]
    	A clustering of the data into disjoint subsets.
    
    Returns
    -------
    accuracy : float
       return clustering accuracy in the range of [0, 1]
    """
    labels_true, labels_pred = supervised.check_clusterings(labels_true, labels_pred)
    # value = supervised.contingency_matrix(labels_true, labels_pred, sparse=False)
    value = supervised.contingency_matrix(labels_true, labels_pred)
    [r, c] = linear_sum_assignment(-value)
    return value[r, c].sum() / len(labels_true)

Source File: simple_spread.py From marl_transfer with MIT License

5 votes

def _bipartite_min_dists(self, dists):
        ri, ci = linear_sum_assignment(dists)
        min_dists = dists[ri, ci]
        return min_dists

Source File: simple_formation.py From marl_transfer with MIT License

5 votes

def _bipartite_min_dists(self, dists):
        ri, ci = linear_sum_assignment(dists)
        min_dists = dists[ri, ci]
        return min_dists

Source File: Hungarian.py From sparse-subspace-clustering-python with MIT License

5 votes

def Hungarian(A):
    _, col_ind = linear_sum_assignment(A)
    # Cost can be found as A[row_ind, col_ind].sum()
    return col_ind

Source File: unmixing.py From proxmin with MIT License

5 votes

def match(A, S, trueS):
    """Rearranges columns of S to best fit the components they likely represent (maximizes sum of correlations)"""
    cov = np.cov(trueS, S)
    k = S.shape[0]
    corr = np.zeros([k, k])
    for i in range(k):
        for j in range(k):
            corr[i][j] = cov[i + k][j] / np.sqrt(cov[i + k][i + k] * cov[j][j])
    arrangement = linear_sum_assignment(-corr)
    resS = np.zeros_like(S)
    resAT = np.zeros_like(A.T)
    for t in range(k):
        resS[arrangement[1][t]] = S[arrangement[0][t]]
        resAT[arrangement[1][t]] = A.T[arrangement[0][t]]
    return resAT.T, resS

Source File: metric.py From L2C with MIT License

5 votes

def optimal_assignment(self,gt_n_cluster=None,assign=None):
        if assign is None:
            mat = -self.conf.cpu().numpy() #hungaian finds the minimum cost
            r,assign = hungarian(mat)
        self.conf = self.conf[:,assign]
        self.gt_n_cluster = gt_n_cluster
        return assign

Source File: spatial.py From oddt with BSD 3-Clause "New" or "Revised" License

5 votes

def linear_sum_assignment(M):
        out = linear_assignment(M)
        return out[:, 0], out[:, 1]

Source File: trackingutils.py From DeepLabCut with GNU Lesser General Public License v3.0

5 votes

def associate_detections_to_trackers(detections, trackers, iou_threshold):
    """
    Assigns detections to tracked object (both represented as bounding boxes)

    Returns 3 lists of matches, unmatched_detections and unmatched_trackers
    """
    if not len(trackers):
        return (
            np.empty((0, 2), dtype=int),
            np.arange(len(detections)),
            np.empty((0, 5), dtype=int),
        )
    iou_matrix = np.zeros((len(detections), len(trackers)), dtype=np.float32)

    for d, det in enumerate(detections):
        for t, trk in enumerate(trackers):
            iou_matrix[d, t] = iou(det, trk)
    row_indices, col_indices = linear_sum_assignment(-iou_matrix)

    unmatched_detections = []
    for d, det in enumerate(detections):
        if d not in row_indices:
            unmatched_detections.append(d)
    unmatched_trackers = []
    for t, trk in enumerate(trackers):
        if t not in col_indices:
            unmatched_trackers.append(t)

    # filter out matched with low IOU
    matches = []
    for row, col in zip(row_indices, col_indices):
        if iou_matrix[row, col] < iou_threshold:
            unmatched_detections.append(row)
            unmatched_trackers.append(col)
        else:
            matches.append([row, col])
    if not len(matches):
        matches = np.empty((0, 2), dtype=int)
    else:
        matches = np.stack(matches)
    return matches, np.array(unmatched_detections), np.array(unmatched_trackers)

Source File: util.py From ramp-workflow with BSD 3-Clause "New" or "Revised" License

5 votes

def _match_tuples(y_true, y_pred):
    """
    Given set of true and predicted (x, y, r) tuples.

    Determine the best possible match.

    Parameters
    ----------
    y_true, y_pred : list of tuples

    Returns
    -------
    (idxs_true, idxs_pred, ious)
        idxs_true, idxs_pred : indices into y_true and y_pred of matches
        ious : corresponding IOU value of each match

        The length of the 3 arrays is identical and the minimum of the length
        of y_true and y_pred
    """
    n_true = len(y_true)
    n_pred = len(y_pred)

    iou_matrix = np.empty((n_true, n_pred))

    for i in range(n_true):
        for j in range(n_pred):
            iou_matrix[i, j] = cc_iou(y_true[i], y_pred[j])

    idxs_true, idxs_pred = linear_sum_assignment(1 - iou_matrix)

    if (not idxs_true.size) or (not idxs_pred.size):
        ious = np.array([])
    else:
        ious = iou_matrix[idxs_true, idxs_pred]
    return idxs_true, idxs_pred, ious

Source File: utils.py From IIC with MIT License

5 votes

def unsupervised_labels(y, y_hat, num_classes, num_clusters):
    """
    :param y: true label
    :param y_hat: concentration parameter
    :param num_classes: number of classes (determined by data)
    :param num_clusters: number of clusters (determined by model)
    :return: classification error rate
    """
    assert num_classes == num_clusters

    # initialize count matrix
    cnt_mtx = np.zeros([num_classes, num_classes])

    # fill in matrix
    for i in range(len(y)):
        cnt_mtx[int(y_hat[i]), int(y[i])] += 1

    # find optimal permutation
    row_ind, col_ind = linear_sum_assignment(-cnt_mtx)

    # compute error
    error = 1 - cnt_mtx[row_ind, col_ind].sum() / cnt_mtx.sum()

    # print results
    print('Classification error = {:.4f}'.format(error))

    return error

Source File: bayesian.py From nni with MIT License

5 votes

def skip_connections_distance(list_a, list_b):
    """The distance between the skip-connections of two neural networks."""
    distance_matrix = np.zeros((len(list_a), len(list_b)))
    for i, a in enumerate(list_a):
        for j, b in enumerate(list_b):
            distance_matrix[i][j] = skip_connection_distance(a, b)
    return distance_matrix[linear_sum_assignment(distance_matrix)].sum() + abs(
        len(list_a) - len(list_b)
    )

Source File: evals.py From uis-rnn with Apache License 2.0

5 votes

def compute_sequence_match_accuracy(sequence1, sequence2):
  """Compute the accuracy between two sequences by finding optimal matching.

  Args:
    sequence1: A list of integers or strings.
    sequence2: A list of integers or strings.

  Returns:
    accuracy: sequence matching accuracy as a number in [0.0, 1.0]

  Raises:
    TypeError: If sequence1 or sequence2 is not list.
    ValueError: If sequence1 and sequence2 are not same size.
  """
  if not isinstance(sequence1, list) or not isinstance(sequence2, list):
    raise TypeError('sequence1 and sequence2 must be lists')
  if not sequence1 or len(sequence1) != len(sequence2):
    raise ValueError(
        'sequence1 and sequence2 must have the same non-zero length')
  # get unique ids from sequences
  unique_ids1 = sorted(set(sequence1))
  unique_ids2 = sorted(set(sequence2))
  inverse_index1 = get_list_inverse_index(unique_ids1)
  inverse_index2 = get_list_inverse_index(unique_ids2)
  # get the count matrix
  count_matrix = np.zeros((len(unique_ids1), len(unique_ids2)))
  for item1, item2 in zip(sequence1, sequence2):
    index1 = inverse_index1[item1]
    index2 = inverse_index2[item2]
    count_matrix[index1, index2] += 1.0
  row_index, col_index = optimize.linear_sum_assignment(-count_matrix)
  optimal_match_count = count_matrix[row_index, col_index].sum()
  accuracy = optimal_match_count / len(sequence1)
  return accuracy

Source File: Metrics.py From GMVAE with MIT License

5 votes

def cluster_acc(self, Y_pred, Y):
    Y_pred, Y = np.array(Y_pred), np.array(Y)
    assert Y_pred.size == Y.size
    D = max(Y_pred.max(), Y.max())+1
    w = np.zeros((D,D), dtype=np.int64)
    for i in range(Y_pred.size):
      w[Y_pred[i], Y[i]] += 1
    row, col = linear_sum_assignment(w.max()-w)
    return sum([w[row[i],col[i]] for i in range(row.shape[0])]) * 1.0/Y_pred.size

Source File: Metrics.py From GMVAE with MIT License

5 votes

def cluster_acc(self, Y_pred, Y):
    Y_pred, Y = np.array(Y_pred), np.array(Y)
    assert Y_pred.size == Y.size
    D = max(Y_pred.max(), Y.max())+1
    w = np.zeros((D,D), dtype=np.int64)
    for i in range(Y_pred.size):
      w[Y_pred[i], Y[i]] += 1
    row, col = linear_sum_assignment(w.max()-w)
    return sum([w[row[i],col[i]] for i in range(row.shape[0])]) * 1.0/Y_pred.size

Python scipy.optimize.linear_sum_assignment() Examples