Python sklearn.cluster.MeanShift() Examples

The following are 19 code examples of sklearn.cluster.MeanShift(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.cluster , or try the search function .
Example #1
Source File: clustering_meanShift.py    From practicalDataAnalysisCookbook with GNU General Public License v2.0 6 votes vote down vote up
def findClusters_meanShift(data):
    '''
        Cluster data using Mean Shift method
    '''
    bandwidth = cl.estimate_bandwidth(data, 
        quantile=0.25, n_samples=500)

    # create the classifier object
    meanShift = cl.MeanShift(
        bandwidth=bandwidth,
        bin_seeding=True
    )

    # fit the data
    return meanShift.fit(data)

# the file name of the dataset 
Example #2
Source File: utils.py    From sparseprop with MIT License 6 votes vote down vote up
def get_typical_durations(raw_durations, bandwidth_percentile=0.05, 
                       min_intersection=0.5, miss_covered=0.1):
    """Return typical durations in a dataset."""
    dur = (raw_durations).reshape(raw_durations.shape[0], 1)
    bandwidth = estimate_bandwidth(dur, quantile=bandwidth_percentile)
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=False)
    ms.fit(dur.reshape((dur.shape[0]), 1))
    tw = np.sort(np.array(
        ms.cluster_centers_.reshape(ms.cluster_centers_.shape[0]), dtype=int))
    # Warranty a min intersection in the output durations.
    p = np.zeros((dur.shape[0], tw.shape[0]))
    for idx in range(tw.shape[0]):
        p[:, idx] = (dur/tw[idx]).reshape(p[:,idx].shape[0])
    ll = (p>=min_intersection) & (p<=1.0/min_intersection)
    if (ll.sum(axis=1)>0).sum() / float(raw_durations.shape[0]) < (1.0-miss_covered):
        assert False, "Condition of minimum intersection not satisfied"
    return tw 
Example #3
Source File: lanenet_cluster.py    From lanenet-enet-hnet with Apache License 2.0 6 votes vote down vote up
def _cluster(prediction, bandwidth):
        """
        实现论文SectionⅡ的cluster部分
        :param prediction:
        :param bandwidth:
        :return:
        """
        ms = MeanShift(bandwidth, bin_seeding=True)
        log.info('开始Mean shift聚类 ...')
        tic = time.time()
        try:
            ms.fit(prediction)
        except ValueError as err:
            log.error(err)
            return 0, [], []
        log.info('Mean Shift耗时: {:.5f}s'.format(time.time() - tic))
        labels = ms.labels_
        cluster_centers = ms.cluster_centers_

        num_clusters = cluster_centers.shape[0]

        log.info('聚类簇个数为: {:d}'.format(num_clusters))

        return num_clusters, labels, cluster_centers 
Example #4
Source File: meanshift_pytorch.py    From PVN3D with MIT License 6 votes vote down vote up
def test():
    while True:
        a = np.random.rand(1000, 2)
        ta = torch.from_numpy(a.astype(np.float32)).cuda()
        ms = MeanShiftTorch(0.05)
        ctr, _ = ms.fit(ta)
        a_idx = (a * 480).astype("uint8")
        show_a = np.zeros((480, 480, 3), dtype="uint8")
        show_a[a_idx[:, 0], a_idx[:, 1], :] = np.array([255, 255, 255])
        ctr = (ctr.cpu().numpy() * 480).astype("uint8")
        show_a = cv2.circle(show_a, (ctr[1], ctr[0]), 3, (0, 0, 255), -1)

        ms_cpu = MeanShift(
            bandwidth=0.05, n_jobs=8
        )
        ms_cpu.fit(a)
        clus_ctrs = np.array(ms_cpu.cluster_centers_)
        clus_labels = ms_cpu.labels_
        ctr = (clus_ctrs[0] * 480).astype("uint8")
        show_a = cv2.circle(show_a, (ctr[1], ctr[0]), 3, (255, 0, 0), -1)
        imshow('show_a', show_a)
        waitKey(0)
        print(clus_ctrs[0]) 
Example #5
Source File: test_cluster.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_Classifications(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)

        models = ['AffinityPropagation', 'MeanShift']
        for model in models:
            mod1 = getattr(df.cluster, model)()
            mod2 = getattr(cluster, model)()

            df.fit(mod1)
            mod2.fit(iris.data)

            result = df.predict(mod1)
            expected = mod2.predict(iris.data)

            self.assertIsInstance(result, pdml.ModelSeries)
            self.assert_numpy_array_almost_equal(result.values, expected) 
Example #6
Source File: utils.py    From Lane_Detection-An_Instance_Segmentation_Approach with MIT License 6 votes vote down vote up
def cluster_embed(embeddings, preds_bin, band_width):
    c = embeddings.shape[1]
    n, _, h, w = preds_bin.shape
    preds_bin = preds_bin.view(n, h, w)
    preds_inst = torch.zeros_like(preds_bin)
    for idx, (embedding, bin_pred) in enumerate(zip(embeddings, preds_bin)):
        # print(embedding.size(), bin_pred.size())
        embedding_fg = torch.transpose(torch.masked_select(embedding, bin_pred.byte()).view(c, -1), 0, 1)
        # print(embedding_fg.size())

        # embedding_expand = embedding.view(embedding.shape[0],
        #                                   embedding.shape[1] * embedding.shape[2])
        # embedding_expand =torch.transpose(embedding_expand, 1, 0)
        # print(embedding_expand.shape)
        clustering = MeanShift(bandwidth=band_width, bin_seeding=True, min_bin_freq=100).fit(embedding_fg.cpu().detach().numpy())

        preds_inst[idx][bin_pred.byte()] = torch.from_numpy(clustering.labels_).cuda() + 1

        # labels_color = get_color(clustering.labels_)
        # preds_inst[idx][bin_pred.byte()] = torch.from_numpy(labels_color).cuda() + 1

        # print(torch.unique(preds_inst[idx]))
    return preds_inst 
Example #7
Source File: clustering.py    From ASIS with MIT License 5 votes vote down vote up
def cluster(prediction, bandwidth):
	ms = MeanShift(bandwidth, bin_seeding=True)
	#print ('Mean shift clustering, might take some time ...')
	#tic = time.time()
	ms.fit(prediction)
	#print ('time for clustering', time.time() - tic)
	labels = ms.labels_
	cluster_centers = ms.cluster_centers_
	
	num_clusters = cluster_centers.shape[0]

	return num_clusters, labels, cluster_centers 
Example #8
Source File: mechanical.py    From CO2MPAS-TA with European Union Public License 1.1 5 votes vote down vote up
def identify_velocity_speed_ratios_v3(
        engine_speeds_out, velocities, idle_engine_speed, stop_velocity):
    """
    Identifies velocity speed ratios from gear box speed vector [km/(h*RPM)].

    :param engine_speeds_out:
        Engine speed [RPM].
    :type engine_speeds_out: numpy.array

    :param velocities:
        Velocity vector [km/h].
    :type velocities: numpy.array

    :param idle_engine_speed:
        Engine speed idle median and std [RPM].
    :type idle_engine_speed: (float, float)

    :param stop_velocity:
        Maximum velocity to consider the vehicle stopped [km/h].
    :type stop_velocity: float

    :return:
        Constant velocity speed ratios of the gear box [km/(h*RPM)].
    :rtype: dict
    """
    import sklearn.cluster as sk_clu

    idle_speed = idle_engine_speed[0] + idle_engine_speed[1]

    b = (engine_speeds_out > idle_speed) & (velocities > stop_velocity)
    x = (velocities[b] / engine_speeds_out[b])[:, None]

    bandwidth = sk_clu.estimate_bandwidth(x, quantile=0.2)
    ms = sk_clu.MeanShift(bandwidth=bandwidth, bin_seeding=True)
    ms.fit(x)

    vsr = {k + 1: v for k, v in enumerate(sorted(ms.cluster_centers_[:, 0]))}

    vsr[0] = 0.0

    return vsr 
Example #9
Source File: sklearn_cluster.py    From learn-to-cluster with MIT License 5 votes vote down vote up
def meanshift(feat, bw, num_process, min_bin_freq, **kwargs):
    print('#num_process:', num_process)
    print('min_bin_freq:', min_bin_freq)
    ms = cluster.MeanShift(bandwidth=bw,
                           n_jobs=num_process,
                           min_bin_freq=min_bin_freq).fit(feat)
    return ms.labels_ 
Example #10
Source File: clustering.py    From instance-segmentation-with-discriminative-loss-tensorflow with MIT License 5 votes vote down vote up
def cluster(prediction, bandwidth):
	ms = MeanShift(bandwidth, bin_seeding=True)
	print ('Mean shift clustering, might take some time ...')
	tic = time.time()
	ms.fit(prediction)
	print ('time for clustering', time.time() - tic)
	labels = ms.labels_
	cluster_centers = ms.cluster_centers_
	
	num_clusters = cluster_centers.shape[0]

	return num_clusters, labels, cluster_centers 
Example #11
Source File: compare_clustering_algs.py    From mmvt with GNU General Public License v3.0 5 votes vote down vote up
def compare(data, n_groups, output_fol):
    # plot_clusters(data.astype(np.float), scipy.cluster.vq.kmeans, 'scipy.cluster.vq.kmeans', output_fol, (n_groups,), {})
    plot_clusters(data, cluster.KMeans, 'KMeans', output_fol, (), {'n_clusters': n_groups})
    for ct in ['spherical', 'tied', 'diag', 'full']:
        plot_clusters(data, mixture.GaussianMixture, 'GMM_{}'.format(ct), output_fol, (),
                      {'n_components': n_groups, 'covariance_type': ct})
    plot_clusters(data, cluster.AffinityPropagation, 'AffinityPropagation', output_fol, (), {'preference': -5.0, 'damping': 0.95})
    plot_clusters(data, cluster.MeanShift, 'MeanShift', output_fol, (0.175,), {'cluster_all': False})
    plot_clusters(data, cluster.SpectralClustering, 'SpectralClustering', output_fol, (), {'n_clusters': n_groups})
    plot_clusters(data, cluster.AgglomerativeClustering, 'AgglomerativeClustering', output_fol, (), {'n_clusters': n_groups, 'linkage': 'ward'})
    plot_clusters(data, cluster.DBSCAN, 'DBSCAN', output_fol, (), {'eps': 0.025})
    # plot_clusters(data, hdbscan.HDBSCAN, 'HDBSCAN', output_fol, (), {'min_cluster_size': 15}) 
Example #12
Source File: clustering.py    From JSNet with MIT License 5 votes vote down vote up
def cluster(prediction, bandwidth):
    ms = MeanShift(bandwidth, bin_seeding=True)
    # print ('Mean shift clustering, might take some time ...')
    # tic = time.time()
    ms.fit(prediction)
    # print ('time for clustering', time.time() - tic)
    labels = ms.labels_
    cluster_centers = ms.cluster_centers_

    num_clusters = cluster_centers.shape[0]

    return num_clusters, labels, cluster_centers 
Example #13
Source File: ml.py    From forex_algotrading with MIT License 5 votes vote down vote up
def main(filename):
	# read csv files with daily data per tick
    df = pandas.read_csv(filename, parse_dates=[0], index_col=0, names=['Date_Time', 'Buy', 'Sell'],
                         date_parser=lambda x: pandas.to_datetime(x, format="%d/%m/%y %H:%M:%S"))

    # group by day and drop NA values (usually weekends)
    grouped_data = df.dropna()
    ticks_data = grouped_data['Sell'].resample('24H').ohlc()
    
    # use 'ask'
    sell_data = grouped_data.as_matrix(columns=['Sell'])

    # calculate bandwidth (expirement with quantile and samples)
    bandwidth = estimate_bandwidth(sell_data, quantile=0.1, n_samples=100)
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)

    # fit the data
    ms.fit(sell_data)

    ml_results = []
    for k in range(len(np.unique(ms.labels_))):
        my_members = ms.labels_ == k
        values = sell_data[my_members, 0]    

        # find the edges
        ml_results.append(min(values))
        ml_results.append(max(values))

    # export the data for the visualizations
    ticks_data.to_json('ticks.json', date_format='iso', orient='index')

    # export ml support resisistance
    with open('ml_results.json', 'w') as f:
        f.write(json.dumps(ml_results))
    

    print("Done. Goto 0.0.0.0:8000/chart.html") 
Example #14
Source File: eval_icp.py    From PVN3D with MIT License 5 votes vote down vote up
def get_cld_bigest_clus(p3ds):
    n_clus_jobs = 8
    ms = MeanShift(
        bandwidth=radius, bin_seeding=True, n_jobs=n_clus_jobs
    )
    ms.fit(p3ds)
    clus_labels = ms.labels_
    bg_clus = p3ds[np.where(clus_labels == 0)[0], :]
    return bg_clus 
Example #15
Source File: clusterer.py    From yelp with GNU Lesser General Public License v2.1 5 votes vote down vote up
def mean_shift(matrix):
        mean_shift = skcluster.MeanShift()
        mean_shift.fit(matrix)

        labels = mean_shift.labels_
        # Number of clusters in labels, ignoring noise if present.
        n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
        print('Estimated number of clusters:', n_clusters_)

        return labels 
Example #16
Source File: region_growing.py    From pyImSegm with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def transform_rays_model_cdf_mixture(list_rays, coef_components=1):
    """ compute the mixture model and transform it into cumulative distribution

    :param list(list(int)) list_rays: list ray features (distances)
    :param int coef_components: multiplication for number of components
    :return any, list(list(int)): mixture model, cumulative distribution

    >>> np.random.seed(0)
    >>> list_rays = [[9, 4, 9], [4, 9, 7], [9, 7, 11], [10, 8, 10],
    ...              [9, 11, 8], [4, 8, 5], [8, 10, 6], [9, 7, 11]]
    >>> mm, cdist = transform_rays_model_cdf_mixture(list_rays)
    >>> # the rounding variate a bit according GMM estimated model
    >>> np.round(np.array(cdist) * 4) / 4.  # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
    array([[ 1. , 1. , 1. , 1. , 1. , 1. , 0.75, 0.75, 0.5 , 0.25, 0. ],
           [ 1. , 1. , 1. , 1. , 1. , 1. , 1.  , 0.75, 0.5 , 0.25, 0. ],
           [ 1. , 1. , 1. , 1. , 1. , 1. , ...,  0.75, 0.5 , 0.25, 0. ]])
    """
    rays = np.array(list_rays)
    ms = cluster.MeanShift()
    ms.fit(rays)
    logging.debug('MeanShift found: %r', np.bincount(ms.labels_))

    nb_components = int(len(np.unique(ms.labels_)) * coef_components)
    mm = mixture.BayesianGaussianMixture(n_components=nb_components)
    # gmm.fit(np.array(list_rays))
    mm.fit(rays, ms.labels_)
    logging.debug('Mixture model found % components with weights: %r',
                  len(mm.weights_), mm.weights_)

    # compute the fairest mean + sigma over all components and ray angles
    max_dist = np.max([[m[i] + np.sqrt(c[i, i]) for i in range(len(m))]
                       for m, c in zip(mm.means_, mm.covariances_)])
    # max_dist = np.max(rays)

    # fixing, AttributeError: 'BayesianGaussianMixture' object has no attribute 'covariances'
    covs = mm.covariances if hasattr(mm, 'covariances') else mm.covariances_
    stds = np.sqrt(abs(covs))[:, np.eye(mm.means_.shape[1], dtype=bool)]
    # stds = np.sum(mm.covariances_, axis=-1)
    cdist = compute_cumulative_distrib(mm.means_, stds, mm.weights_, max_dist)
    return mm, cdist.tolist() 
Example #17
Source File: meanshift_pytorch.py    From PVN3D with MIT License 4 votes vote down vote up
def test2():
    sv_ptn = '/data/workspace/3D_Point_Det/config/ycb.onestage.rs14.nofarflatFocalls/train_log/eval_result/051_large_clamp/mask_res_pic/{}sv_info_1.pkl'
    for i in range(2000):
        data = pkl.load(open(sv_ptn.format(i), 'rb'))
        all_p3ds = data['p3ds']

        for cls_id in data['gt_cls_ids'][0]:
            if cls_id == 0:
                break
            p3ds = all_p3ds[np.where(data['labels'] == cls_id)[0], :]
            show_img = np.zeros((480, 640, 3), dtype="uint8")
            p2ds = my_utils.project_p3d(p3ds, 1.0)
            show_img[p2ds[:, 1], p2ds[:, 0], :] = np.array([255, 255, 255])
            gpu_label = np.zeros((480, 640, 3), dtype="uint8")
            cpu_label = gpu_label.copy()
            p3ds_cu = torch.from_numpy(p3ds).cuda()
            ms_gpu = MeanShiftTorch(0.05)

            start = time.time()
            ctr, labels = ms_gpu.fit(p3ds_cu)
            ctr = ctr.cpu().numpy().reshape(1, 3)
            labels = labels.cpu().numpy()
            p2ds_gt_lb = p2ds[np.where(labels==1)[0], :]
            gpu_label[p2ds_gt_lb[:, 1], p2ds_gt_lb[:, 0], :] = np.array(
                [255, 255, 255]
            )
            end = time.time()
            print("gpu time:\t", end - start)
            ctr_2d = my_utils.project_p3d(ctr, 1.0)
            show_img = cv2.circle(
                show_img, (ctr_2d[0][0], ctr_2d[0][1]), 3, (0, 0, 255), -1
            )

            ms_cpu = MeanShift(
                bandwidth=0.05, n_jobs=40
            )
            start = time.time()
            ms_cpu.fit(p3ds)
            end = time.time()
            print("sklearn cpu time:\t", end - start)
            clus_ctrs = np.array(ms_cpu.cluster_centers_)
            clus_labels = ms_cpu.labels_
            ctr_2d = my_utils.project_p3d(clus_ctrs[0].reshape(1, 3), 1.0)
            show_img = cv2.circle(
                show_img, (ctr_2d[0][0], ctr_2d[0][1]), 3, (255, 0, 0), -1
            )
            p2ds_gt_lb = p2ds[np.where(clus_labels==0)[0], :]
            cpu_label[p2ds_gt_lb[:, 1], p2ds_gt_lb[:, 0], :] = np.array(
                [255, 255, 255]
            )
            imshow('show_img', show_img)
            imshow('gpu', gpu_label)
            imshow('cpu', cpu_label)
            waitKey(0) 
Example #18
Source File: region_growing.py    From pyImSegm with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def transform_rays_model_cdf_kmeans(list_rays, nb_components=None):
    """ compute the mixture model and transform it into cumulative distribution

    :param list(list(int)) list_rays: list ray features (distances)
    :param int nb_components: number components in mixture model
    :return any, list(list(int)):  mixture model, list of stat/param of models

    >>> np.random.seed(0)
    >>> list_rays = [[9, 4, 9], [4, 9, 7], [9, 7, 11], [10, 8, 10],
    ...              [9, 11, 8], [4, 8, 5], [8, 10, 6], [9, 7, 11]]
    >>> mm, cdist = transform_rays_model_cdf_kmeans(list_rays)
    >>> np.round(cdist, 1).tolist()  # doctest: +NORMALIZE_WHITESPACE
    [[1.0, 1.0, 1.0, 1.0, 0.9, 0.8, 0.7, 0.7, 0.6, 0.4, 0.2, 0.0, 0.0],
     [1.0, 1.0, 1.0, 1.0, 0.9, 0.9, 0.8, 0.7, 0.5, 0.3, 0.2, 0.1, 0.0],
     [1.0, 1.0, 1.0, 1.0, 1.0, 0.9, 0.8, 0.7, 0.5, 0.4, 0.2, 0.1, 0.0]]
    >>> mm, cdist = transform_rays_model_cdf_kmeans(list_rays, nb_components=2)
    """
    rays = np.array(list_rays)
    if not nb_components:
        ms = cluster.MeanShift()
        ms.fit(rays)
        logging.debug('MeanShift found: %r', np.bincount(ms.labels_))
        nb_components = len(np.unique(ms.labels_))
        kmeans = cluster.KMeans(nb_components)
        kmeans.fit(rays, ms.labels_)
    else:
        kmeans = cluster.KMeans(nb_components)
        kmeans.fit(rays)

    labels = kmeans.labels_
    means = kmeans.cluster_centers_
    stds = np.zeros((len(means), rays.shape[1]))
    for i, lb in enumerate(np.unique(labels)):
        stds[i, :] = np.std(np.asarray(list_rays)[labels == lb], axis=0)
    stds += 1
    weights = np.bincount(kmeans.labels_) / float(len(kmeans.labels_))

    # compute the fairest mean + sigma over all components and ray angles
    max_dist = np.max([[m[i] + c[i] for i in range(len(m))]
                       for m, c in zip(means, stds)])

    cdist = compute_cumulative_distrib(means, stds, weights, max_dist)
    return kmeans, cdist.tolist() 
Example #19
Source File: test_cluster.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 3 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.cluster.AffinityPropagation, cluster.AffinityPropagation)
        self.assertIs(df.cluster.AgglomerativeClustering, cluster.AgglomerativeClustering)
        self.assertIs(df.cluster.Birch, cluster.Birch)
        self.assertIs(df.cluster.DBSCAN, cluster.DBSCAN)
        self.assertIs(df.cluster.FeatureAgglomeration, cluster.FeatureAgglomeration)
        self.assertIs(df.cluster.KMeans, cluster.KMeans)
        self.assertIs(df.cluster.MiniBatchKMeans, cluster.MiniBatchKMeans)
        self.assertIs(df.cluster.MeanShift, cluster.MeanShift)
        self.assertIs(df.cluster.SpectralClustering, cluster.SpectralClustering)

        self.assertIs(df.cluster.bicluster.SpectralBiclustering,
                      cluster.bicluster.SpectralBiclustering)
        self.assertIs(df.cluster.bicluster.SpectralCoclustering,
                      cluster.bicluster.SpectralCoclustering)