Python Examples of sklearn.cluster.MeanShift

Source File: clustering_meanShift.py From practicalDataAnalysisCookbook with GNU General Public License v2.0

6 votes

def findClusters_meanShift(data):
    '''
        Cluster data using Mean Shift method
    '''
    bandwidth = cl.estimate_bandwidth(data, 
        quantile=0.25, n_samples=500)

    # create the classifier object
    meanShift = cl.MeanShift(
        bandwidth=bandwidth,
        bin_seeding=True
    )

    # fit the data
    return meanShift.fit(data)

# the file name of the dataset

Source File: utils.py From sparseprop with MIT License

6 votes

def get_typical_durations(raw_durations, bandwidth_percentile=0.05, 
                       min_intersection=0.5, miss_covered=0.1):
    """Return typical durations in a dataset."""
    dur = (raw_durations).reshape(raw_durations.shape[0], 1)
    bandwidth = estimate_bandwidth(dur, quantile=bandwidth_percentile)
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=False)
    ms.fit(dur.reshape((dur.shape[0]), 1))
    tw = np.sort(np.array(
        ms.cluster_centers_.reshape(ms.cluster_centers_.shape[0]), dtype=int))
    # Warranty a min intersection in the output durations.
    p = np.zeros((dur.shape[0], tw.shape[0]))
    for idx in range(tw.shape[0]):
        p[:, idx] = (dur/tw[idx]).reshape(p[:,idx].shape[0])
    ll = (p>=min_intersection) & (p<=1.0/min_intersection)
    if (ll.sum(axis=1)>0).sum() / float(raw_durations.shape[0]) < (1.0-miss_covered):
        assert False, "Condition of minimum intersection not satisfied"
    return tw

Source File: lanenet_cluster.py From lanenet-enet-hnet with Apache License 2.0

6 votes

def _cluster(prediction, bandwidth):
        """
        实现论文SectionⅡ的cluster部分
        :param prediction:
        :param bandwidth:
        :return:
        """
        ms = MeanShift(bandwidth, bin_seeding=True)
        log.info('开始Mean shift聚类 ...')
        tic = time.time()
        try:
            ms.fit(prediction)
        except ValueError as err:
            log.error(err)
            return 0, [], []
        log.info('Mean Shift耗时: {:.5f}s'.format(time.time() - tic))
        labels = ms.labels_
        cluster_centers = ms.cluster_centers_

        num_clusters = cluster_centers.shape[0]

        log.info('聚类簇个数为: {:d}'.format(num_clusters))

        return num_clusters, labels, cluster_centers

Source File: meanshift_pytorch.py From PVN3D with MIT License

6 votes

def test():
    while True:
        a = np.random.rand(1000, 2)
        ta = torch.from_numpy(a.astype(np.float32)).cuda()
        ms = MeanShiftTorch(0.05)
        ctr, _ = ms.fit(ta)
        a_idx = (a * 480).astype("uint8")
        show_a = np.zeros((480, 480, 3), dtype="uint8")
        show_a[a_idx[:, 0], a_idx[:, 1], :] = np.array([255, 255, 255])
        ctr = (ctr.cpu().numpy() * 480).astype("uint8")
        show_a = cv2.circle(show_a, (ctr[1], ctr[0]), 3, (0, 0, 255), -1)

        ms_cpu = MeanShift(
            bandwidth=0.05, n_jobs=8
        )
        ms_cpu.fit(a)
        clus_ctrs = np.array(ms_cpu.cluster_centers_)
        clus_labels = ms_cpu.labels_
        ctr = (clus_ctrs[0] * 480).astype("uint8")
        show_a = cv2.circle(show_a, (ctr[1], ctr[0]), 3, (255, 0, 0), -1)
        imshow('show_a', show_a)
        waitKey(0)
        print(clus_ctrs[0])

Source File: test_cluster.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

6 votes

def test_Classifications(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)

        models = ['AffinityPropagation', 'MeanShift']
        for model in models:
            mod1 = getattr(df.cluster, model)()
            mod2 = getattr(cluster, model)()

            df.fit(mod1)
            mod2.fit(iris.data)

            result = df.predict(mod1)
            expected = mod2.predict(iris.data)

            self.assertIsInstance(result, pdml.ModelSeries)
            self.assert_numpy_array_almost_equal(result.values, expected)

Source File: utils.py From Lane_Detection-An_Instance_Segmentation_Approach with MIT License

6 votes

def cluster_embed(embeddings, preds_bin, band_width):
    c = embeddings.shape[1]
    n, _, h, w = preds_bin.shape
    preds_bin = preds_bin.view(n, h, w)
    preds_inst = torch.zeros_like(preds_bin)
    for idx, (embedding, bin_pred) in enumerate(zip(embeddings, preds_bin)):
        # print(embedding.size(), bin_pred.size())
        embedding_fg = torch.transpose(torch.masked_select(embedding, bin_pred.byte()).view(c, -1), 0, 1)
        # print(embedding_fg.size())

        # embedding_expand = embedding.view(embedding.shape[0],
        #                                   embedding.shape[1] * embedding.shape[2])
        # embedding_expand =torch.transpose(embedding_expand, 1, 0)
        # print(embedding_expand.shape)
        clustering = MeanShift(bandwidth=band_width, bin_seeding=True, min_bin_freq=100).fit(embedding_fg.cpu().detach().numpy())

        preds_inst[idx][bin_pred.byte()] = torch.from_numpy(clustering.labels_).cuda() + 1

        # labels_color = get_color(clustering.labels_)
        # preds_inst[idx][bin_pred.byte()] = torch.from_numpy(labels_color).cuda() + 1

        # print(torch.unique(preds_inst[idx]))
    return preds_inst

Source File: clustering.py From ASIS with MIT License

5 votes

def cluster(prediction, bandwidth):
	ms = MeanShift(bandwidth, bin_seeding=True)
	#print ('Mean shift clustering, might take some time ...')
	#tic = time.time()
	ms.fit(prediction)
	#print ('time for clustering', time.time() - tic)
	labels = ms.labels_
	cluster_centers = ms.cluster_centers_
	
	num_clusters = cluster_centers.shape[0]

	return num_clusters, labels, cluster_centers

Source File: mechanical.py From CO2MPAS-TA with European Union Public License 1.1

5 votes

def identify_velocity_speed_ratios_v3(
        engine_speeds_out, velocities, idle_engine_speed, stop_velocity):
    """
    Identifies velocity speed ratios from gear box speed vector [km/(h*RPM)].

    :param engine_speeds_out:
        Engine speed [RPM].
    :type engine_speeds_out: numpy.array

    :param velocities:
        Velocity vector [km/h].
    :type velocities: numpy.array

    :param idle_engine_speed:
        Engine speed idle median and std [RPM].
    :type idle_engine_speed: (float, float)

    :param stop_velocity:
        Maximum velocity to consider the vehicle stopped [km/h].
    :type stop_velocity: float

    :return:
        Constant velocity speed ratios of the gear box [km/(h*RPM)].
    :rtype: dict
    """
    import sklearn.cluster as sk_clu

    idle_speed = idle_engine_speed[0] + idle_engine_speed[1]

    b = (engine_speeds_out > idle_speed) & (velocities > stop_velocity)
    x = (velocities[b] / engine_speeds_out[b])[:, None]

    bandwidth = sk_clu.estimate_bandwidth(x, quantile=0.2)
    ms = sk_clu.MeanShift(bandwidth=bandwidth, bin_seeding=True)
    ms.fit(x)

    vsr = {k + 1: v for k, v in enumerate(sorted(ms.cluster_centers_[:, 0]))}

    vsr[0] = 0.0

    return vsr

Source File: sklearn_cluster.py From learn-to-cluster with MIT License

5 votes

def meanshift(feat, bw, num_process, min_bin_freq, **kwargs):
    print('#num_process:', num_process)
    print('min_bin_freq:', min_bin_freq)
    ms = cluster.MeanShift(bandwidth=bw,
                           n_jobs=num_process,
                           min_bin_freq=min_bin_freq).fit(feat)
    return ms.labels_

Source File: clustering.py From instance-segmentation-with-discriminative-loss-tensorflow with MIT License

5 votes

def cluster(prediction, bandwidth):
	ms = MeanShift(bandwidth, bin_seeding=True)
	print ('Mean shift clustering, might take some time ...')
	tic = time.time()
	ms.fit(prediction)
	print ('time for clustering', time.time() - tic)
	labels = ms.labels_
	cluster_centers = ms.cluster_centers_
	
	num_clusters = cluster_centers.shape[0]

	return num_clusters, labels, cluster_centers

Source File: compare_clustering_algs.py From mmvt with GNU General Public License v3.0

5 votes

def compare(data, n_groups, output_fol):
    # plot_clusters(data.astype(np.float), scipy.cluster.vq.kmeans, 'scipy.cluster.vq.kmeans', output_fol, (n_groups,), {})
    plot_clusters(data, cluster.KMeans, 'KMeans', output_fol, (), {'n_clusters': n_groups})
    for ct in ['spherical', 'tied', 'diag', 'full']:
        plot_clusters(data, mixture.GaussianMixture, 'GMM_{}'.format(ct), output_fol, (),
                      {'n_components': n_groups, 'covariance_type': ct})
    plot_clusters(data, cluster.AffinityPropagation, 'AffinityPropagation', output_fol, (), {'preference': -5.0, 'damping': 0.95})
    plot_clusters(data, cluster.MeanShift, 'MeanShift', output_fol, (0.175,), {'cluster_all': False})
    plot_clusters(data, cluster.SpectralClustering, 'SpectralClustering', output_fol, (), {'n_clusters': n_groups})
    plot_clusters(data, cluster.AgglomerativeClustering, 'AgglomerativeClustering', output_fol, (), {'n_clusters': n_groups, 'linkage': 'ward'})
    plot_clusters(data, cluster.DBSCAN, 'DBSCAN', output_fol, (), {'eps': 0.025})
    # plot_clusters(data, hdbscan.HDBSCAN, 'HDBSCAN', output_fol, (), {'min_cluster_size': 15})

Source File: clustering.py From JSNet with MIT License

5 votes

def cluster(prediction, bandwidth):
    ms = MeanShift(bandwidth, bin_seeding=True)
    # print ('Mean shift clustering, might take some time ...')
    # tic = time.time()
    ms.fit(prediction)
    # print ('time for clustering', time.time() - tic)
    labels = ms.labels_
    cluster_centers = ms.cluster_centers_

    num_clusters = cluster_centers.shape[0]

    return num_clusters, labels, cluster_centers

Source File: ml.py From forex_algotrading with MIT License

5 votes

def main(filename):
	# read csv files with daily data per tick
    df = pandas.read_csv(filename, parse_dates=[0], index_col=0, names=['Date_Time', 'Buy', 'Sell'],
                         date_parser=lambda x: pandas.to_datetime(x, format="%d/%m/%y %H:%M:%S"))

    # group by day and drop NA values (usually weekends)
    grouped_data = df.dropna()
    ticks_data = grouped_data['Sell'].resample('24H').ohlc()
    
    # use 'ask'
    sell_data = grouped_data.as_matrix(columns=['Sell'])

    # calculate bandwidth (expirement with quantile and samples)
    bandwidth = estimate_bandwidth(sell_data, quantile=0.1, n_samples=100)
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)

    # fit the data
    ms.fit(sell_data)

    ml_results = []
    for k in range(len(np.unique(ms.labels_))):
        my_members = ms.labels_ == k
        values = sell_data[my_members, 0]    

        # find the edges
        ml_results.append(min(values))
        ml_results.append(max(values))

    # export the data for the visualizations
    ticks_data.to_json('ticks.json', date_format='iso', orient='index')

    # export ml support resisistance
    with open('ml_results.json', 'w') as f:
        f.write(json.dumps(ml_results))
    

    print("Done. Goto 0.0.0.0:8000/chart.html")

Source File: eval_icp.py From PVN3D with MIT License

5 votes

def get_cld_bigest_clus(p3ds):
    n_clus_jobs = 8
    ms = MeanShift(
        bandwidth=radius, bin_seeding=True, n_jobs=n_clus_jobs
    )
    ms.fit(p3ds)
    clus_labels = ms.labels_
    bg_clus = p3ds[np.where(clus_labels == 0)[0], :]
    return bg_clus

Source File: clusterer.py From yelp with GNU Lesser General Public License v2.1

5 votes

def mean_shift(matrix):
        mean_shift = skcluster.MeanShift()
        mean_shift.fit(matrix)

        labels = mean_shift.labels_
        # Number of clusters in labels, ignoring noise if present.
        n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
        print('Estimated number of clusters:', n_clusters_)

        return labels

Source File: region_growing.py From pyImSegm with BSD 3-Clause "New" or "Revised" License

5 votes

def transform_rays_model_cdf_mixture(list_rays, coef_components=1):
    """ compute the mixture model and transform it into cumulative distribution

    :param list(list(int)) list_rays: list ray features (distances)
    :param int coef_components: multiplication for number of components
    :return any, list(list(int)): mixture model, cumulative distribution

    >>> np.random.seed(0)
    >>> list_rays = [[9, 4, 9], [4, 9, 7], [9, 7, 11], [10, 8, 10],
    ...              [9, 11, 8], [4, 8, 5], [8, 10, 6], [9, 7, 11]]
    >>> mm, cdist = transform_rays_model_cdf_mixture(list_rays)
    >>> # the rounding variate a bit according GMM estimated model
    >>> np.round(np.array(cdist) * 4) / 4.  # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
    array([[ 1. , 1. , 1. , 1. , 1. , 1. , 0.75, 0.75, 0.5 , 0.25, 0. ],
           [ 1. , 1. , 1. , 1. , 1. , 1. , 1.  , 0.75, 0.5 , 0.25, 0. ],
           [ 1. , 1. , 1. , 1. , 1. , 1. , ...,  0.75, 0.5 , 0.25, 0. ]])
    """
    rays = np.array(list_rays)
    ms = cluster.MeanShift()
    ms.fit(rays)
    logging.debug('MeanShift found: %r', np.bincount(ms.labels_))

    nb_components = int(len(np.unique(ms.labels_)) * coef_components)
    mm = mixture.BayesianGaussianMixture(n_components=nb_components)
    # gmm.fit(np.array(list_rays))
    mm.fit(rays, ms.labels_)
    logging.debug('Mixture model found % components with weights: %r',
                  len(mm.weights_), mm.weights_)

    # compute the fairest mean + sigma over all components and ray angles
    max_dist = np.max([[m[i] + np.sqrt(c[i, i]) for i in range(len(m))]
                       for m, c in zip(mm.means_, mm.covariances_)])
    # max_dist = np.max(rays)

    # fixing, AttributeError: 'BayesianGaussianMixture' object has no attribute 'covariances'
    covs = mm.covariances if hasattr(mm, 'covariances') else mm.covariances_
    stds = np.sqrt(abs(covs))[:, np.eye(mm.means_.shape[1], dtype=bool)]
    # stds = np.sum(mm.covariances_, axis=-1)
    cdist = compute_cumulative_distrib(mm.means_, stds, mm.weights_, max_dist)
    return mm, cdist.tolist()

Source File: meanshift_pytorch.py From PVN3D with MIT License

4 votes

def test2():
    sv_ptn = '/data/workspace/3D_Point_Det/config/ycb.onestage.rs14.nofarflatFocalls/train_log/eval_result/051_large_clamp/mask_res_pic/{}sv_info_1.pkl'
    for i in range(2000):
        data = pkl.load(open(sv_ptn.format(i), 'rb'))
        all_p3ds = data['p3ds']

        for cls_id in data['gt_cls_ids'][0]:
            if cls_id == 0:
                break
            p3ds = all_p3ds[np.where(data['labels'] == cls_id)[0], :]
            show_img = np.zeros((480, 640, 3), dtype="uint8")
            p2ds = my_utils.project_p3d(p3ds, 1.0)
            show_img[p2ds[:, 1], p2ds[:, 0], :] = np.array([255, 255, 255])
            gpu_label = np.zeros((480, 640, 3), dtype="uint8")
            cpu_label = gpu_label.copy()
            p3ds_cu = torch.from_numpy(p3ds).cuda()
            ms_gpu = MeanShiftTorch(0.05)

            start = time.time()
            ctr, labels = ms_gpu.fit(p3ds_cu)
            ctr = ctr.cpu().numpy().reshape(1, 3)
            labels = labels.cpu().numpy()
            p2ds_gt_lb = p2ds[np.where(labels==1)[0], :]
            gpu_label[p2ds_gt_lb[:, 1], p2ds_gt_lb[:, 0], :] = np.array(
                [255, 255, 255]
            )
            end = time.time()
            print("gpu time:\t", end - start)
            ctr_2d = my_utils.project_p3d(ctr, 1.0)
            show_img = cv2.circle(
                show_img, (ctr_2d[0][0], ctr_2d[0][1]), 3, (0, 0, 255), -1
            )

            ms_cpu = MeanShift(
                bandwidth=0.05, n_jobs=40
            )
            start = time.time()
            ms_cpu.fit(p3ds)
            end = time.time()
            print("sklearn cpu time:\t", end - start)
            clus_ctrs = np.array(ms_cpu.cluster_centers_)
            clus_labels = ms_cpu.labels_
            ctr_2d = my_utils.project_p3d(clus_ctrs[0].reshape(1, 3), 1.0)
            show_img = cv2.circle(
                show_img, (ctr_2d[0][0], ctr_2d[0][1]), 3, (255, 0, 0), -1
            )
            p2ds_gt_lb = p2ds[np.where(clus_labels==0)[0], :]
            cpu_label[p2ds_gt_lb[:, 1], p2ds_gt_lb[:, 0], :] = np.array(
                [255, 255, 255]
            )
            imshow('show_img', show_img)
            imshow('gpu', gpu_label)
            imshow('cpu', cpu_label)
            waitKey(0)

Source File: region_growing.py From pyImSegm with BSD 3-Clause "New" or "Revised" License

4 votes

def transform_rays_model_cdf_kmeans(list_rays, nb_components=None):
    """ compute the mixture model and transform it into cumulative distribution

    :param list(list(int)) list_rays: list ray features (distances)
    :param int nb_components: number components in mixture model
    :return any, list(list(int)):  mixture model, list of stat/param of models

    >>> np.random.seed(0)
    >>> list_rays = [[9, 4, 9], [4, 9, 7], [9, 7, 11], [10, 8, 10],
    ...              [9, 11, 8], [4, 8, 5], [8, 10, 6], [9, 7, 11]]
    >>> mm, cdist = transform_rays_model_cdf_kmeans(list_rays)
    >>> np.round(cdist, 1).tolist()  # doctest: +NORMALIZE_WHITESPACE
    [[1.0, 1.0, 1.0, 1.0, 0.9, 0.8, 0.7, 0.7, 0.6, 0.4, 0.2, 0.0, 0.0],
     [1.0, 1.0, 1.0, 1.0, 0.9, 0.9, 0.8, 0.7, 0.5, 0.3, 0.2, 0.1, 0.0],
     [1.0, 1.0, 1.0, 1.0, 1.0, 0.9, 0.8, 0.7, 0.5, 0.4, 0.2, 0.1, 0.0]]
    >>> mm, cdist = transform_rays_model_cdf_kmeans(list_rays, nb_components=2)
    """
    rays = np.array(list_rays)
    if not nb_components:
        ms = cluster.MeanShift()
        ms.fit(rays)
        logging.debug('MeanShift found: %r', np.bincount(ms.labels_))
        nb_components = len(np.unique(ms.labels_))
        kmeans = cluster.KMeans(nb_components)
        kmeans.fit(rays, ms.labels_)
    else:
        kmeans = cluster.KMeans(nb_components)
        kmeans.fit(rays)

    labels = kmeans.labels_
    means = kmeans.cluster_centers_
    stds = np.zeros((len(means), rays.shape[1]))
    for i, lb in enumerate(np.unique(labels)):
        stds[i, :] = np.std(np.asarray(list_rays)[labels == lb], axis=0)
    stds += 1
    weights = np.bincount(kmeans.labels_) / float(len(kmeans.labels_))

    # compute the fairest mean + sigma over all components and ray angles
    max_dist = np.max([[m[i] + c[i] for i in range(len(m))]
                       for m, c in zip(means, stds)])

    cdist = compute_cumulative_distrib(means, stds, weights, max_dist)
    return kmeans, cdist.tolist()

Source File: test_cluster.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

3 votes

def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.cluster.AffinityPropagation, cluster.AffinityPropagation)
        self.assertIs(df.cluster.AgglomerativeClustering, cluster.AgglomerativeClustering)
        self.assertIs(df.cluster.Birch, cluster.Birch)
        self.assertIs(df.cluster.DBSCAN, cluster.DBSCAN)
        self.assertIs(df.cluster.FeatureAgglomeration, cluster.FeatureAgglomeration)
        self.assertIs(df.cluster.KMeans, cluster.KMeans)
        self.assertIs(df.cluster.MiniBatchKMeans, cluster.MiniBatchKMeans)
        self.assertIs(df.cluster.MeanShift, cluster.MeanShift)
        self.assertIs(df.cluster.SpectralClustering, cluster.SpectralClustering)

        self.assertIs(df.cluster.bicluster.SpectralBiclustering,
                      cluster.bicluster.SpectralBiclustering)
        self.assertIs(df.cluster.bicluster.SpectralCoclustering,
                      cluster.bicluster.SpectralCoclustering)

Python sklearn.cluster.MeanShift() Examples