Python sklearn.cluster.estimate_bandwidth() Examples
The following are 11
code examples of sklearn.cluster.estimate_bandwidth().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.cluster
, or try the search function
.
Example #1
Source File: clustering_meanShift.py From practicalDataAnalysisCookbook with GNU General Public License v2.0 | 6 votes |
def findClusters_meanShift(data): ''' Cluster data using Mean Shift method ''' bandwidth = cl.estimate_bandwidth(data, quantile=0.25, n_samples=500) # create the classifier object meanShift = cl.MeanShift( bandwidth=bandwidth, bin_seeding=True ) # fit the data return meanShift.fit(data) # the file name of the dataset
Example #2
Source File: utils.py From sparseprop with MIT License | 6 votes |
def get_typical_durations(raw_durations, bandwidth_percentile=0.05, min_intersection=0.5, miss_covered=0.1): """Return typical durations in a dataset.""" dur = (raw_durations).reshape(raw_durations.shape[0], 1) bandwidth = estimate_bandwidth(dur, quantile=bandwidth_percentile) ms = MeanShift(bandwidth=bandwidth, bin_seeding=False) ms.fit(dur.reshape((dur.shape[0]), 1)) tw = np.sort(np.array( ms.cluster_centers_.reshape(ms.cluster_centers_.shape[0]), dtype=int)) # Warranty a min intersection in the output durations. p = np.zeros((dur.shape[0], tw.shape[0])) for idx in range(tw.shape[0]): p[:, idx] = (dur/tw[idx]).reshape(p[:,idx].shape[0]) ll = (p>=min_intersection) & (p<=1.0/min_intersection) if (ll.sum(axis=1)>0).sum() / float(raw_durations.shape[0]) < (1.0-miss_covered): assert False, "Condition of minimum intersection not satisfied" return tw
Example #3
Source File: test_mean_shift.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_estimate_bandwidth_with_sparse_matrix(): # Test estimate_bandwidth with sparse matrix X = sparse.lil_matrix((1000, 1000)) msg = "A sparse matrix was passed, but dense data is required." assert_raise_message(TypeError, msg, estimate_bandwidth, X, 200)
Example #4
Source File: shifted_delta_cepstra.py From hunspeech with MIT License | 5 votes |
def loop_estimate_bandwidth(): len_ = 4 while len_ < self.sdc_all_speech.shape[0]: logging.info((len_, estimate_bandwidth(self.sdc_all_speech[:len_]))) len_ *= 2
Example #5
Source File: test_mean_shift.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_estimate_bandwidth(): # Test estimate_bandwidth bandwidth = estimate_bandwidth(X, n_samples=200) assert 0.9 <= bandwidth <= 1.5
Example #6
Source File: test_mean_shift.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_estimate_bandwidth_1sample(): # Test estimate_bandwidth when n_samples=1 and quantile<1, so that # n_neighbors is set to 1. bandwidth = estimate_bandwidth(X, n_samples=1, quantile=0.3) assert bandwidth == 0.
Example #7
Source File: test_mean_shift.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_estimate_bandwidth_with_sparse_matrix(): # Test estimate_bandwidth with sparse matrix X = sparse.lil_matrix((1000, 1000)) msg = "A sparse matrix was passed, but dense data is required." assert_raise_message(TypeError, msg, estimate_bandwidth, X, 200)
Example #8
Source File: ml.py From forex_algotrading with MIT License | 5 votes |
def main(filename): # read csv files with daily data per tick df = pandas.read_csv(filename, parse_dates=[0], index_col=0, names=['Date_Time', 'Buy', 'Sell'], date_parser=lambda x: pandas.to_datetime(x, format="%d/%m/%y %H:%M:%S")) # group by day and drop NA values (usually weekends) grouped_data = df.dropna() ticks_data = grouped_data['Sell'].resample('24H').ohlc() # use 'ask' sell_data = grouped_data.as_matrix(columns=['Sell']) # calculate bandwidth (expirement with quantile and samples) bandwidth = estimate_bandwidth(sell_data, quantile=0.1, n_samples=100) ms = MeanShift(bandwidth=bandwidth, bin_seeding=True) # fit the data ms.fit(sell_data) ml_results = [] for k in range(len(np.unique(ms.labels_))): my_members = ms.labels_ == k values = sell_data[my_members, 0] # find the edges ml_results.append(min(values)) ml_results.append(max(values)) # export the data for the visualizations ticks_data.to_json('ticks.json', date_format='iso', orient='index') # export ml support resisistance with open('ml_results.json', 'w') as f: f.write(json.dumps(ml_results)) print("Done. Goto 0.0.0.0:8000/chart.html")
Example #9
Source File: test_cluster.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_estimate_bandwidth(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) result = df.cluster.estimate_bandwidth(random_state=self.random_state) expected = cluster.estimate_bandwidth(iris.data, random_state=self.random_state) self.assertEqual(result, expected)
Example #10
Source File: mechanical.py From CO2MPAS-TA with European Union Public License 1.1 | 5 votes |
def identify_velocity_speed_ratios_v3( engine_speeds_out, velocities, idle_engine_speed, stop_velocity): """ Identifies velocity speed ratios from gear box speed vector [km/(h*RPM)]. :param engine_speeds_out: Engine speed [RPM]. :type engine_speeds_out: numpy.array :param velocities: Velocity vector [km/h]. :type velocities: numpy.array :param idle_engine_speed: Engine speed idle median and std [RPM]. :type idle_engine_speed: (float, float) :param stop_velocity: Maximum velocity to consider the vehicle stopped [km/h]. :type stop_velocity: float :return: Constant velocity speed ratios of the gear box [km/(h*RPM)]. :rtype: dict """ import sklearn.cluster as sk_clu idle_speed = idle_engine_speed[0] + idle_engine_speed[1] b = (engine_speeds_out > idle_speed) & (velocities > stop_velocity) x = (velocities[b] / engine_speeds_out[b])[:, None] bandwidth = sk_clu.estimate_bandwidth(x, quantile=0.2) ms = sk_clu.MeanShift(bandwidth=bandwidth, bin_seeding=True) ms.fit(x) vsr = {k + 1: v for k, v in enumerate(sorted(ms.cluster_centers_[:, 0]))} vsr[0] = 0.0 return vsr
Example #11
Source File: test_mean_shift.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_estimate_bandwidth(): # Test estimate_bandwidth bandwidth = estimate_bandwidth(X, n_samples=200) assert_true(0.9 <= bandwidth <= 1.5)