Python sklearn.cluster.MeanShift() Examples
The following are 19
code examples of sklearn.cluster.MeanShift().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.cluster
, or try the search function
.
Example #1
Source File: clustering_meanShift.py From practicalDataAnalysisCookbook with GNU General Public License v2.0 | 6 votes |
def findClusters_meanShift(data): ''' Cluster data using Mean Shift method ''' bandwidth = cl.estimate_bandwidth(data, quantile=0.25, n_samples=500) # create the classifier object meanShift = cl.MeanShift( bandwidth=bandwidth, bin_seeding=True ) # fit the data return meanShift.fit(data) # the file name of the dataset
Example #2
Source File: utils.py From sparseprop with MIT License | 6 votes |
def get_typical_durations(raw_durations, bandwidth_percentile=0.05, min_intersection=0.5, miss_covered=0.1): """Return typical durations in a dataset.""" dur = (raw_durations).reshape(raw_durations.shape[0], 1) bandwidth = estimate_bandwidth(dur, quantile=bandwidth_percentile) ms = MeanShift(bandwidth=bandwidth, bin_seeding=False) ms.fit(dur.reshape((dur.shape[0]), 1)) tw = np.sort(np.array( ms.cluster_centers_.reshape(ms.cluster_centers_.shape[0]), dtype=int)) # Warranty a min intersection in the output durations. p = np.zeros((dur.shape[0], tw.shape[0])) for idx in range(tw.shape[0]): p[:, idx] = (dur/tw[idx]).reshape(p[:,idx].shape[0]) ll = (p>=min_intersection) & (p<=1.0/min_intersection) if (ll.sum(axis=1)>0).sum() / float(raw_durations.shape[0]) < (1.0-miss_covered): assert False, "Condition of minimum intersection not satisfied" return tw
Example #3
Source File: lanenet_cluster.py From lanenet-enet-hnet with Apache License 2.0 | 6 votes |
def _cluster(prediction, bandwidth): """ 实现论文SectionⅡ的cluster部分 :param prediction: :param bandwidth: :return: """ ms = MeanShift(bandwidth, bin_seeding=True) log.info('开始Mean shift聚类 ...') tic = time.time() try: ms.fit(prediction) except ValueError as err: log.error(err) return 0, [], [] log.info('Mean Shift耗时: {:.5f}s'.format(time.time() - tic)) labels = ms.labels_ cluster_centers = ms.cluster_centers_ num_clusters = cluster_centers.shape[0] log.info('聚类簇个数为: {:d}'.format(num_clusters)) return num_clusters, labels, cluster_centers
Example #4
Source File: meanshift_pytorch.py From PVN3D with MIT License | 6 votes |
def test(): while True: a = np.random.rand(1000, 2) ta = torch.from_numpy(a.astype(np.float32)).cuda() ms = MeanShiftTorch(0.05) ctr, _ = ms.fit(ta) a_idx = (a * 480).astype("uint8") show_a = np.zeros((480, 480, 3), dtype="uint8") show_a[a_idx[:, 0], a_idx[:, 1], :] = np.array([255, 255, 255]) ctr = (ctr.cpu().numpy() * 480).astype("uint8") show_a = cv2.circle(show_a, (ctr[1], ctr[0]), 3, (0, 0, 255), -1) ms_cpu = MeanShift( bandwidth=0.05, n_jobs=8 ) ms_cpu.fit(a) clus_ctrs = np.array(ms_cpu.cluster_centers_) clus_labels = ms_cpu.labels_ ctr = (clus_ctrs[0] * 480).astype("uint8") show_a = cv2.circle(show_a, (ctr[1], ctr[0]), 3, (255, 0, 0), -1) imshow('show_a', show_a) waitKey(0) print(clus_ctrs[0])
Example #5
Source File: test_cluster.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_Classifications(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) models = ['AffinityPropagation', 'MeanShift'] for model in models: mod1 = getattr(df.cluster, model)() mod2 = getattr(cluster, model)() df.fit(mod1) mod2.fit(iris.data) result = df.predict(mod1) expected = mod2.predict(iris.data) self.assertIsInstance(result, pdml.ModelSeries) self.assert_numpy_array_almost_equal(result.values, expected)
Example #6
Source File: utils.py From Lane_Detection-An_Instance_Segmentation_Approach with MIT License | 6 votes |
def cluster_embed(embeddings, preds_bin, band_width): c = embeddings.shape[1] n, _, h, w = preds_bin.shape preds_bin = preds_bin.view(n, h, w) preds_inst = torch.zeros_like(preds_bin) for idx, (embedding, bin_pred) in enumerate(zip(embeddings, preds_bin)): # print(embedding.size(), bin_pred.size()) embedding_fg = torch.transpose(torch.masked_select(embedding, bin_pred.byte()).view(c, -1), 0, 1) # print(embedding_fg.size()) # embedding_expand = embedding.view(embedding.shape[0], # embedding.shape[1] * embedding.shape[2]) # embedding_expand =torch.transpose(embedding_expand, 1, 0) # print(embedding_expand.shape) clustering = MeanShift(bandwidth=band_width, bin_seeding=True, min_bin_freq=100).fit(embedding_fg.cpu().detach().numpy()) preds_inst[idx][bin_pred.byte()] = torch.from_numpy(clustering.labels_).cuda() + 1 # labels_color = get_color(clustering.labels_) # preds_inst[idx][bin_pred.byte()] = torch.from_numpy(labels_color).cuda() + 1 # print(torch.unique(preds_inst[idx])) return preds_inst
Example #7
Source File: clustering.py From ASIS with MIT License | 5 votes |
def cluster(prediction, bandwidth): ms = MeanShift(bandwidth, bin_seeding=True) #print ('Mean shift clustering, might take some time ...') #tic = time.time() ms.fit(prediction) #print ('time for clustering', time.time() - tic) labels = ms.labels_ cluster_centers = ms.cluster_centers_ num_clusters = cluster_centers.shape[0] return num_clusters, labels, cluster_centers
Example #8
Source File: mechanical.py From CO2MPAS-TA with European Union Public License 1.1 | 5 votes |
def identify_velocity_speed_ratios_v3( engine_speeds_out, velocities, idle_engine_speed, stop_velocity): """ Identifies velocity speed ratios from gear box speed vector [km/(h*RPM)]. :param engine_speeds_out: Engine speed [RPM]. :type engine_speeds_out: numpy.array :param velocities: Velocity vector [km/h]. :type velocities: numpy.array :param idle_engine_speed: Engine speed idle median and std [RPM]. :type idle_engine_speed: (float, float) :param stop_velocity: Maximum velocity to consider the vehicle stopped [km/h]. :type stop_velocity: float :return: Constant velocity speed ratios of the gear box [km/(h*RPM)]. :rtype: dict """ import sklearn.cluster as sk_clu idle_speed = idle_engine_speed[0] + idle_engine_speed[1] b = (engine_speeds_out > idle_speed) & (velocities > stop_velocity) x = (velocities[b] / engine_speeds_out[b])[:, None] bandwidth = sk_clu.estimate_bandwidth(x, quantile=0.2) ms = sk_clu.MeanShift(bandwidth=bandwidth, bin_seeding=True) ms.fit(x) vsr = {k + 1: v for k, v in enumerate(sorted(ms.cluster_centers_[:, 0]))} vsr[0] = 0.0 return vsr
Example #9
Source File: sklearn_cluster.py From learn-to-cluster with MIT License | 5 votes |
def meanshift(feat, bw, num_process, min_bin_freq, **kwargs): print('#num_process:', num_process) print('min_bin_freq:', min_bin_freq) ms = cluster.MeanShift(bandwidth=bw, n_jobs=num_process, min_bin_freq=min_bin_freq).fit(feat) return ms.labels_
Example #10
Source File: clustering.py From instance-segmentation-with-discriminative-loss-tensorflow with MIT License | 5 votes |
def cluster(prediction, bandwidth): ms = MeanShift(bandwidth, bin_seeding=True) print ('Mean shift clustering, might take some time ...') tic = time.time() ms.fit(prediction) print ('time for clustering', time.time() - tic) labels = ms.labels_ cluster_centers = ms.cluster_centers_ num_clusters = cluster_centers.shape[0] return num_clusters, labels, cluster_centers
Example #11
Source File: compare_clustering_algs.py From mmvt with GNU General Public License v3.0 | 5 votes |
def compare(data, n_groups, output_fol): # plot_clusters(data.astype(np.float), scipy.cluster.vq.kmeans, 'scipy.cluster.vq.kmeans', output_fol, (n_groups,), {}) plot_clusters(data, cluster.KMeans, 'KMeans', output_fol, (), {'n_clusters': n_groups}) for ct in ['spherical', 'tied', 'diag', 'full']: plot_clusters(data, mixture.GaussianMixture, 'GMM_{}'.format(ct), output_fol, (), {'n_components': n_groups, 'covariance_type': ct}) plot_clusters(data, cluster.AffinityPropagation, 'AffinityPropagation', output_fol, (), {'preference': -5.0, 'damping': 0.95}) plot_clusters(data, cluster.MeanShift, 'MeanShift', output_fol, (0.175,), {'cluster_all': False}) plot_clusters(data, cluster.SpectralClustering, 'SpectralClustering', output_fol, (), {'n_clusters': n_groups}) plot_clusters(data, cluster.AgglomerativeClustering, 'AgglomerativeClustering', output_fol, (), {'n_clusters': n_groups, 'linkage': 'ward'}) plot_clusters(data, cluster.DBSCAN, 'DBSCAN', output_fol, (), {'eps': 0.025}) # plot_clusters(data, hdbscan.HDBSCAN, 'HDBSCAN', output_fol, (), {'min_cluster_size': 15})
Example #12
Source File: clustering.py From JSNet with MIT License | 5 votes |
def cluster(prediction, bandwidth): ms = MeanShift(bandwidth, bin_seeding=True) # print ('Mean shift clustering, might take some time ...') # tic = time.time() ms.fit(prediction) # print ('time for clustering', time.time() - tic) labels = ms.labels_ cluster_centers = ms.cluster_centers_ num_clusters = cluster_centers.shape[0] return num_clusters, labels, cluster_centers
Example #13
Source File: ml.py From forex_algotrading with MIT License | 5 votes |
def main(filename): # read csv files with daily data per tick df = pandas.read_csv(filename, parse_dates=[0], index_col=0, names=['Date_Time', 'Buy', 'Sell'], date_parser=lambda x: pandas.to_datetime(x, format="%d/%m/%y %H:%M:%S")) # group by day and drop NA values (usually weekends) grouped_data = df.dropna() ticks_data = grouped_data['Sell'].resample('24H').ohlc() # use 'ask' sell_data = grouped_data.as_matrix(columns=['Sell']) # calculate bandwidth (expirement with quantile and samples) bandwidth = estimate_bandwidth(sell_data, quantile=0.1, n_samples=100) ms = MeanShift(bandwidth=bandwidth, bin_seeding=True) # fit the data ms.fit(sell_data) ml_results = [] for k in range(len(np.unique(ms.labels_))): my_members = ms.labels_ == k values = sell_data[my_members, 0] # find the edges ml_results.append(min(values)) ml_results.append(max(values)) # export the data for the visualizations ticks_data.to_json('ticks.json', date_format='iso', orient='index') # export ml support resisistance with open('ml_results.json', 'w') as f: f.write(json.dumps(ml_results)) print("Done. Goto 0.0.0.0:8000/chart.html")
Example #14
Source File: eval_icp.py From PVN3D with MIT License | 5 votes |
def get_cld_bigest_clus(p3ds): n_clus_jobs = 8 ms = MeanShift( bandwidth=radius, bin_seeding=True, n_jobs=n_clus_jobs ) ms.fit(p3ds) clus_labels = ms.labels_ bg_clus = p3ds[np.where(clus_labels == 0)[0], :] return bg_clus
Example #15
Source File: clusterer.py From yelp with GNU Lesser General Public License v2.1 | 5 votes |
def mean_shift(matrix): mean_shift = skcluster.MeanShift() mean_shift.fit(matrix) labels = mean_shift.labels_ # Number of clusters in labels, ignoring noise if present. n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) print('Estimated number of clusters:', n_clusters_) return labels
Example #16
Source File: region_growing.py From pyImSegm with BSD 3-Clause "New" or "Revised" License | 5 votes |
def transform_rays_model_cdf_mixture(list_rays, coef_components=1): """ compute the mixture model and transform it into cumulative distribution :param list(list(int)) list_rays: list ray features (distances) :param int coef_components: multiplication for number of components :return any, list(list(int)): mixture model, cumulative distribution >>> np.random.seed(0) >>> list_rays = [[9, 4, 9], [4, 9, 7], [9, 7, 11], [10, 8, 10], ... [9, 11, 8], [4, 8, 5], [8, 10, 6], [9, 7, 11]] >>> mm, cdist = transform_rays_model_cdf_mixture(list_rays) >>> # the rounding variate a bit according GMM estimated model >>> np.round(np.array(cdist) * 4) / 4. # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS array([[ 1. , 1. , 1. , 1. , 1. , 1. , 0.75, 0.75, 0.5 , 0.25, 0. ], [ 1. , 1. , 1. , 1. , 1. , 1. , 1. , 0.75, 0.5 , 0.25, 0. ], [ 1. , 1. , 1. , 1. , 1. , 1. , ..., 0.75, 0.5 , 0.25, 0. ]]) """ rays = np.array(list_rays) ms = cluster.MeanShift() ms.fit(rays) logging.debug('MeanShift found: %r', np.bincount(ms.labels_)) nb_components = int(len(np.unique(ms.labels_)) * coef_components) mm = mixture.BayesianGaussianMixture(n_components=nb_components) # gmm.fit(np.array(list_rays)) mm.fit(rays, ms.labels_) logging.debug('Mixture model found % components with weights: %r', len(mm.weights_), mm.weights_) # compute the fairest mean + sigma over all components and ray angles max_dist = np.max([[m[i] + np.sqrt(c[i, i]) for i in range(len(m))] for m, c in zip(mm.means_, mm.covariances_)]) # max_dist = np.max(rays) # fixing, AttributeError: 'BayesianGaussianMixture' object has no attribute 'covariances' covs = mm.covariances if hasattr(mm, 'covariances') else mm.covariances_ stds = np.sqrt(abs(covs))[:, np.eye(mm.means_.shape[1], dtype=bool)] # stds = np.sum(mm.covariances_, axis=-1) cdist = compute_cumulative_distrib(mm.means_, stds, mm.weights_, max_dist) return mm, cdist.tolist()
Example #17
Source File: meanshift_pytorch.py From PVN3D with MIT License | 4 votes |
def test2(): sv_ptn = '/data/workspace/3D_Point_Det/config/ycb.onestage.rs14.nofarflatFocalls/train_log/eval_result/051_large_clamp/mask_res_pic/{}sv_info_1.pkl' for i in range(2000): data = pkl.load(open(sv_ptn.format(i), 'rb')) all_p3ds = data['p3ds'] for cls_id in data['gt_cls_ids'][0]: if cls_id == 0: break p3ds = all_p3ds[np.where(data['labels'] == cls_id)[0], :] show_img = np.zeros((480, 640, 3), dtype="uint8") p2ds = my_utils.project_p3d(p3ds, 1.0) show_img[p2ds[:, 1], p2ds[:, 0], :] = np.array([255, 255, 255]) gpu_label = np.zeros((480, 640, 3), dtype="uint8") cpu_label = gpu_label.copy() p3ds_cu = torch.from_numpy(p3ds).cuda() ms_gpu = MeanShiftTorch(0.05) start = time.time() ctr, labels = ms_gpu.fit(p3ds_cu) ctr = ctr.cpu().numpy().reshape(1, 3) labels = labels.cpu().numpy() p2ds_gt_lb = p2ds[np.where(labels==1)[0], :] gpu_label[p2ds_gt_lb[:, 1], p2ds_gt_lb[:, 0], :] = np.array( [255, 255, 255] ) end = time.time() print("gpu time:\t", end - start) ctr_2d = my_utils.project_p3d(ctr, 1.0) show_img = cv2.circle( show_img, (ctr_2d[0][0], ctr_2d[0][1]), 3, (0, 0, 255), -1 ) ms_cpu = MeanShift( bandwidth=0.05, n_jobs=40 ) start = time.time() ms_cpu.fit(p3ds) end = time.time() print("sklearn cpu time:\t", end - start) clus_ctrs = np.array(ms_cpu.cluster_centers_) clus_labels = ms_cpu.labels_ ctr_2d = my_utils.project_p3d(clus_ctrs[0].reshape(1, 3), 1.0) show_img = cv2.circle( show_img, (ctr_2d[0][0], ctr_2d[0][1]), 3, (255, 0, 0), -1 ) p2ds_gt_lb = p2ds[np.where(clus_labels==0)[0], :] cpu_label[p2ds_gt_lb[:, 1], p2ds_gt_lb[:, 0], :] = np.array( [255, 255, 255] ) imshow('show_img', show_img) imshow('gpu', gpu_label) imshow('cpu', cpu_label) waitKey(0)
Example #18
Source File: region_growing.py From pyImSegm with BSD 3-Clause "New" or "Revised" License | 4 votes |
def transform_rays_model_cdf_kmeans(list_rays, nb_components=None): """ compute the mixture model and transform it into cumulative distribution :param list(list(int)) list_rays: list ray features (distances) :param int nb_components: number components in mixture model :return any, list(list(int)): mixture model, list of stat/param of models >>> np.random.seed(0) >>> list_rays = [[9, 4, 9], [4, 9, 7], [9, 7, 11], [10, 8, 10], ... [9, 11, 8], [4, 8, 5], [8, 10, 6], [9, 7, 11]] >>> mm, cdist = transform_rays_model_cdf_kmeans(list_rays) >>> np.round(cdist, 1).tolist() # doctest: +NORMALIZE_WHITESPACE [[1.0, 1.0, 1.0, 1.0, 0.9, 0.8, 0.7, 0.7, 0.6, 0.4, 0.2, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0, 0.9, 0.9, 0.8, 0.7, 0.5, 0.3, 0.2, 0.1, 0.0], [1.0, 1.0, 1.0, 1.0, 1.0, 0.9, 0.8, 0.7, 0.5, 0.4, 0.2, 0.1, 0.0]] >>> mm, cdist = transform_rays_model_cdf_kmeans(list_rays, nb_components=2) """ rays = np.array(list_rays) if not nb_components: ms = cluster.MeanShift() ms.fit(rays) logging.debug('MeanShift found: %r', np.bincount(ms.labels_)) nb_components = len(np.unique(ms.labels_)) kmeans = cluster.KMeans(nb_components) kmeans.fit(rays, ms.labels_) else: kmeans = cluster.KMeans(nb_components) kmeans.fit(rays) labels = kmeans.labels_ means = kmeans.cluster_centers_ stds = np.zeros((len(means), rays.shape[1])) for i, lb in enumerate(np.unique(labels)): stds[i, :] = np.std(np.asarray(list_rays)[labels == lb], axis=0) stds += 1 weights = np.bincount(kmeans.labels_) / float(len(kmeans.labels_)) # compute the fairest mean + sigma over all components and ray angles max_dist = np.max([[m[i] + c[i] for i in range(len(m))] for m, c in zip(means, stds)]) cdist = compute_cumulative_distrib(means, stds, weights, max_dist) return kmeans, cdist.tolist()
Example #19
Source File: test_cluster.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 3 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.cluster.AffinityPropagation, cluster.AffinityPropagation) self.assertIs(df.cluster.AgglomerativeClustering, cluster.AgglomerativeClustering) self.assertIs(df.cluster.Birch, cluster.Birch) self.assertIs(df.cluster.DBSCAN, cluster.DBSCAN) self.assertIs(df.cluster.FeatureAgglomeration, cluster.FeatureAgglomeration) self.assertIs(df.cluster.KMeans, cluster.KMeans) self.assertIs(df.cluster.MiniBatchKMeans, cluster.MiniBatchKMeans) self.assertIs(df.cluster.MeanShift, cluster.MeanShift) self.assertIs(df.cluster.SpectralClustering, cluster.SpectralClustering) self.assertIs(df.cluster.bicluster.SpectralBiclustering, cluster.bicluster.SpectralBiclustering) self.assertIs(df.cluster.bicluster.SpectralCoclustering, cluster.bicluster.SpectralCoclustering)