Python sklearn.datasets.make_circles() Examples
The following are 26
code examples of sklearn.datasets.make_circles().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.datasets
, or try the search function
.
Example #1
Source File: mini.py From SymJAX with Apache License 2.0 | 7 votes |
def load_mini(N=1000): X, y = make_moons(N, noise=0.035, random_state=20) x_, y_ = make_circles(N, noise=0.02, random_state=20) x_[:, 1] += 2.0 y_ += 2 X = np.concatenate([X, x_], axis=0) y = np.concatenate([y, y_]) X -= X.mean(0, keepdims=True) X /= X.max(0, keepdims=True) X = X.astype("float32") y = y.astype("int32") dict_init = [ ("datum_shape", (2,)), ("n_classes", 4), ("name", "mini"), ("classes", [str(u) for u in range(4)]), ] dataset = Dataset(**dict(dict_init)) dataset["inputs/train_set"] = X dataset["outputs/train_set"] = y return dataset
Example #2
Source File: test_forest.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_random_trees_dense_equal(): # Test that the `sparse_output` parameter of RandomTreesEmbedding # works by returning the same array for both argument values. # Create the RTEs hasher_dense = RandomTreesEmbedding(n_estimators=10, sparse_output=False, random_state=0) hasher_sparse = RandomTreesEmbedding(n_estimators=10, sparse_output=True, random_state=0) X, y = datasets.make_circles(factor=0.5) X_transformed_dense = hasher_dense.fit_transform(X) X_transformed_sparse = hasher_sparse.fit_transform(X) # Assert that dense and sparse hashers have same array. assert_array_equal(X_transformed_sparse.toarray(), X_transformed_dense) # Ignore warnings from switching to more power iterations in randomized_svd
Example #3
Source File: test_forest.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_random_hasher(): # test random forest hashing on circles dataset # make sure that it is linearly separable. # even after projected to two SVD dimensions # Note: Not all random_states produce perfect results. hasher = RandomTreesEmbedding(n_estimators=30, random_state=1) X, y = datasets.make_circles(factor=0.5) X_transformed = hasher.fit_transform(X) # test fit and transform: hasher = RandomTreesEmbedding(n_estimators=30, random_state=1) assert_array_equal(hasher.fit(X).transform(X).toarray(), X_transformed.toarray()) # one leaf active per data point per forest assert_equal(X_transformed.shape[0], X.shape[0]) assert_array_equal(X_transformed.sum(axis=1), hasher.n_estimators) svd = TruncatedSVD(n_components=2) X_reduced = svd.fit_transform(X_transformed) linear_clf = LinearSVC() linear_clf.fit(X_reduced, y) assert_equal(linear_clf.score(X_reduced, y), 1.)
Example #4
Source File: test_hierarchical.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_single_linkage_clustering(): # Check that we get the correct result in two emblematic cases moons, moon_labels = make_moons(noise=0.05, random_state=42) clustering = AgglomerativeClustering(n_clusters=2, linkage='single') clustering.fit(moons) assert_almost_equal(normalized_mutual_info_score(clustering.labels_, moon_labels), 1) circles, circle_labels = make_circles(factor=0.5, noise=0.025, random_state=42) clustering = AgglomerativeClustering(n_clusters=2, linkage='single') clustering.fit(circles) assert_almost_equal(normalized_mutual_info_score(clustering.labels_, circle_labels), 1)
Example #5
Source File: test_samples_generator.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_make_circles(): factor = 0.3 for (n_samples, n_outer, n_inner) in [(7, 3, 4), (8, 4, 4)]: # Testing odd and even case, because in the past make_circles always # created an even number of samples. X, y = make_circles(n_samples, shuffle=False, noise=None, factor=factor) assert_equal(X.shape, (n_samples, 2), "X shape mismatch") assert_equal(y.shape, (n_samples,), "y shape mismatch") center = [0.0, 0.0] for x, label in zip(X, y): dist_sqr = ((x - center) ** 2).sum() dist_exp = 1.0 if label == 0 else factor**2 assert_almost_equal(dist_sqr, dist_exp, err_msg="Point is not on expected circle") assert_equal(X[y == 0].shape, (n_outer, 2), "Samples not correctly distributed across circles.") assert_equal(X[y == 1].shape, (n_inner, 2), "Samples not correctly distributed across circles.") assert_raises(ValueError, make_circles, factor=-0.01) assert_raises(ValueError, make_circles, factor=1.)
Example #6
Source File: test_kernel_pca.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_gridsearch_pipeline_precomputed(): # Test if we can do a grid-search to find parameters to separate # circles with a perceptron model using a precomputed kernel. X, y = make_circles(n_samples=400, factor=.3, noise=.05, random_state=0) kpca = KernelPCA(kernel="precomputed", n_components=2) pipeline = Pipeline([("kernel_pca", kpca), ("Perceptron", Perceptron(max_iter=5))]) param_grid = dict(Perceptron__max_iter=np.arange(1, 5)) grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid) X_kernel = rbf_kernel(X, gamma=2.) grid_search.fit(X_kernel, y) assert_equal(grid_search.best_score_, 1) # 0.23. warning about tol not having its correct default value.
Example #7
Source File: test_kernel_pca.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_nested_circles(): # Test the linear separability of the first 2D KPCA transform X, y = make_circles(n_samples=400, factor=.3, noise=.05, random_state=0) # 2D nested circles are not linearly separable train_score = Perceptron(max_iter=5).fit(X, y).score(X, y) assert_less(train_score, 0.8) # Project the circles data into the first 2 components of a RBF Kernel # PCA model. # Note that the gamma value is data dependent. If this test breaks # and the gamma value has to be updated, the Kernel PCA example will # have to be updated too. kpca = KernelPCA(kernel="rbf", n_components=2, fit_inverse_transform=True, gamma=2.) X_kpca = kpca.fit_transform(X) # The data is perfectly linearly separable in that space train_score = Perceptron(max_iter=5).fit(X_kpca, y).score(X_kpca, y) assert_equal(train_score, 1.0)
Example #8
Source File: sequential_minimum_optimization.py From Python with MIT License | 6 votes |
def test_rbf_kernel(ax, cost): train_x, train_y = make_circles( n_samples=500, noise=0.1, factor=0.1, random_state=1 ) train_y[train_y == 0] = -1 scaler = StandardScaler() train_x_scaled = scaler.fit_transform(train_x, train_y) train_data = np.hstack((train_y.reshape(500, 1), train_x_scaled)) mykernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5) mysvm = SmoSVM( train=train_data, kernel_func=mykernel, cost=cost, tolerance=0.001, auto_norm=False, ) mysvm.fit() plot_partition_boundary(mysvm, train_data, ax=ax)
Example #9
Source File: test_forest.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_random_hasher(): # test random forest hashing on circles dataset # make sure that it is linearly separable. # even after projected to two SVD dimensions # Note: Not all random_states produce perfect results. hasher = RandomTreesEmbedding(n_estimators=30, random_state=1) X, y = datasets.make_circles(factor=0.5) X_transformed = hasher.fit_transform(X) # test fit and transform: hasher = RandomTreesEmbedding(n_estimators=30, random_state=1) assert_array_equal(hasher.fit(X).transform(X).toarray(), X_transformed.toarray()) # one leaf active per data point per forest assert_equal(X_transformed.shape[0], X.shape[0]) assert_array_equal(X_transformed.sum(axis=1), hasher.n_estimators) svd = TruncatedSVD(n_components=2) X_reduced = svd.fit_transform(X_transformed) linear_clf = LinearSVC() linear_clf.fit(X_reduced, y) assert_equal(linear_clf.score(X_reduced, y), 1.)
Example #10
Source File: test_forest.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_random_trees_dense_equal(): # Test that the `sparse_output` parameter of RandomTreesEmbedding # works by returning the same array for both argument values. # Create the RTEs hasher_dense = RandomTreesEmbedding(n_estimators=10, sparse_output=False, random_state=0) hasher_sparse = RandomTreesEmbedding(n_estimators=10, sparse_output=True, random_state=0) X, y = datasets.make_circles(factor=0.5) X_transformed_dense = hasher_dense.fit_transform(X) X_transformed_sparse = hasher_sparse.fit_transform(X) # Assert that dense and sparse hashers have same array. assert_array_equal(X_transformed_sparse.toarray(), X_transformed_dense) # Ignore warnings from switching to more power iterations in randomized_svd
Example #11
Source File: test_dbscan.py From dislib with Apache License 2.0 | 5 votes |
def test_n_clusters_circles_grid(self): """ Tests that DBSCAN finds the correct number of clusters when setting n_regions > 1 with circle data. """ n_samples = 1500 x, y = make_circles(n_samples=n_samples, factor=.5, noise=.05) dbscan = DBSCAN(n_regions=4, eps=.15, max_samples=700) x = StandardScaler().fit_transform(x) ds_x = ds.array(x, block_size=(300, 2)) dbscan.fit(ds_x) self.assertEqual(dbscan.n_clusters, 2)
Example #12
Source File: app.py From dash-svm with MIT License | 5 votes |
def generate_data(n_samples, dataset, noise): if dataset == 'moons': return datasets.make_moons( n_samples=n_samples, noise=noise, random_state=0 ) elif dataset == 'circles': return datasets.make_circles( n_samples=n_samples, noise=noise, factor=0.5, random_state=1 ) elif dataset == 'linear': X, y = datasets.make_classification( n_samples=n_samples, n_features=2, n_redundant=0, n_informative=2, random_state=2, n_clusters_per_class=1 ) rng = np.random.RandomState(2) X += noise * rng.uniform(size=X.shape) linearly_separable = (X, y) return linearly_separable else: raise ValueError( 'Data type incorrectly specified. Please choose an existing ' 'dataset.')
Example #13
Source File: classification_example.py From intro_ds with Apache License 2.0 | 5 votes |
def generateData(n): """ """ np.random.seed(12046) blobs = make_blobs(n_samples=n, centers = [[-2, -2], [2, 2]]) circles = make_circles(n_samples=n, factor=.4, noise=.05) moons = make_moons(n_samples=n, noise=.05) blocks = np.random.rand(n, 2) - 0.5 y = (blocks[:, 0] * blocks[:, 1] < 0) + 0 blocks = (blocks, y) # 由于神经网络对数据的线性变换不稳定,因此将数据做归一化处理 scaler = StandardScaler() blobs = (scaler.fit_transform(blobs[0]), blobs[1]) circles = (scaler.fit_transform(circles[0]), circles[1]) moons = (scaler.fit_transform(moons[0]), moons[1]) blocks = (scaler.fit_transform(blocks[0]), blocks[1]) return blobs, circles, moons, blocks
Example #14
Source File: gmm_vs_spectral.py From intro_ds with Apache License 2.0 | 5 votes |
def generateCircles(n): """ 生成圆圈数据 """ data, _ = make_circles(n_samples=n, factor=0.5, noise=0.06) return data
Example #15
Source File: test_forest.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_random_trees_dense_type(): # Test that the `sparse_output` parameter of RandomTreesEmbedding # works by returning a dense array. # Create the RTE with sparse=False hasher = RandomTreesEmbedding(n_estimators=10, sparse_output=False) X, y = datasets.make_circles(factor=0.5) X_transformed = hasher.fit_transform(X) # Assert that type is ndarray, not scipy.sparse.csr.csr_matrix assert_equal(type(X_transformed), np.ndarray)
Example #16
Source File: test_kernel_pca.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_gridsearch_pipeline(): # Test if we can do a grid-search to find parameters to separate # circles with a perceptron model. X, y = make_circles(n_samples=400, factor=.3, noise=.05, random_state=0) kpca = KernelPCA(kernel="rbf", n_components=2) pipeline = Pipeline([("kernel_pca", kpca), ("Perceptron", Perceptron(max_iter=5))]) param_grid = dict(kernel_pca__gamma=2. ** np.arange(-2, 2)) grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid) grid_search.fit(X, y) assert_equal(grid_search.best_score_, 1)
Example #17
Source File: test_kernel_pca.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_gridsearch_pipeline_precomputed(): # Test if we can do a grid-search to find parameters to separate # circles with a perceptron model using a precomputed kernel. X, y = make_circles(n_samples=400, factor=.3, noise=.05, random_state=0) kpca = KernelPCA(kernel="precomputed", n_components=2) pipeline = Pipeline([("kernel_pca", kpca), ("Perceptron", Perceptron(max_iter=5))]) param_grid = dict(Perceptron__max_iter=np.arange(1, 5)) grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid) X_kernel = rbf_kernel(X, gamma=2.) grid_search.fit(X_kernel, y) assert_equal(grid_search.best_score_, 1)
Example #18
Source File: test_dbscan.py From dislib with Apache License 2.0 | 5 votes |
def test_n_clusters_circles_max_samples(self): """ Tests that DBSCAN finds the correct number of clusters when defining max_samples with circle data. """ n_samples = 1500 x, y = make_circles(n_samples=n_samples, factor=.5, noise=.05) dbscan = DBSCAN(n_regions=1, eps=.15, max_samples=500) x = StandardScaler().fit_transform(x) ds_x = ds.array(x, block_size=(300, 2)) dbscan.fit(ds_x) self.assertEqual(dbscan.n_clusters, 2)
Example #19
Source File: test_dbscan.py From dislib with Apache License 2.0 | 5 votes |
def test_n_clusters_circles(self): """ Tests that DBSCAN finds the correct number of clusters with circle data. """ n_samples = 1500 x, y = make_circles(n_samples=n_samples, factor=.5, noise=.05) dbscan = DBSCAN(n_regions=1, eps=.15) x = StandardScaler().fit_transform(x) ds_x = ds.array(x, block_size=(300, 2)) dbscan.fit(ds_x) self.assertEqual(dbscan.n_clusters, 2)
Example #20
Source File: distributions.py From swae-pytorch with MIT License | 5 votes |
def rand_ring2d(batch_size): """ This function generates 2D samples from a hollowed-cirlce distribution in a 2-dimensional space. Args: batch_size (int): number of batch samples Return: torch.Tensor: tensor of size (batch_size, 2) """ circles = make_circles(2 * batch_size, noise=.01) z = np.squeeze(circles[0][np.argwhere(circles[1] == 0), :]) return torch.from_numpy(z).type(torch.FloatTensor)
Example #21
Source File: bivariate.py From SDGym with MIT License | 5 votes |
def make_two_rings(num_samples): samples, labels = make_circles(num_samples, shuffle=True, noise=None, random_state=None, factor=0.6) return samples
Example #22
Source File: test_coverer.py From kepler-mapper with MIT License | 5 votes |
def test_complete_pipeline(self, CoverClass): # TODO: add a mock that asserts the cover was called appropriately.. or test number of cubes etc. data, _ = datasets.make_circles() data = data.astype(np.float64) mapper = KeplerMapper() graph = mapper.map(data, cover=CoverClass()) mapper.visualize(graph)
Example #23
Source File: test_visuals.py From kepler-mapper with MIT License | 5 votes |
def test_format_mapper_data(self, jinja_env): mapper = KeplerMapper() data, labels = make_circles(1000, random_state=0) lens = mapper.fit_transform(data, projection=[0]) graph = mapper.map(lens, data) color_function = lens[:, 0] inverse_X = data projected_X = lens projected_X_names = ["projected_%s" % (i) for i in range(projected_X.shape[1])] inverse_X_names = ["inverse_%s" % (i) for i in range(inverse_X.shape[1])] custom_tooltips = np.array(["customized_%s" % (l) for l in labels]) graph_data = format_mapper_data( graph, color_function, inverse_X, inverse_X_names, projected_X, projected_X_names, custom_tooltips, jinja_env, ) # print(graph_data) # Dump to json so we can easily tell what's in it. graph_data = json.dumps(graph_data) # TODO test more properties! assert "name" in graph_data assert """cube2_cluster0""" in graph_data assert """projected_0""" in graph_data assert """inverse_0""" in graph_data assert """customized_""" in graph_data
Example #24
Source File: test_forest.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_random_trees_dense_type(): # Test that the `sparse_output` parameter of RandomTreesEmbedding # works by returning a dense array. # Create the RTE with sparse=False hasher = RandomTreesEmbedding(n_estimators=10, sparse_output=False) X, y = datasets.make_circles(factor=0.5) X_transformed = hasher.fit_transform(X) # Assert that type is ndarray, not scipy.sparse.csr.csr_matrix assert_equal(type(X_transformed), np.ndarray)
Example #25
Source File: icnn.py From icnn with Apache License 2.0 | 4 votes |
def main(): parser = argparse.ArgumentParser() parser.add_argument('--save', type=str, default='work') parser.add_argument('--nEpoch', type=int, default=100) # parser.add_argument('--testBatchSz', type=int, default=2048) parser.add_argument('--seed', type=int, default=42) parser.add_argument('--model', type=str, default="picnn", choices=['picnn', 'ficnn']) parser.add_argument('--dataset', type=str, default="moons", choices=['moons', 'circles', 'linear']) parser.add_argument('--noncvx', action='store_true') args = parser.parse_args() npr.seed(args.seed) tf.set_random_seed(args.seed) setproctitle.setproctitle('bamos.icnn.synthetic.{}.{}'.format(args.model, args.dataset)) save = os.path.join(os.path.expanduser(args.save), "{}.{}".format(args.model, args.dataset)) if os.path.isdir(save): shutil.rmtree(save) os.makedirs(save, exist_ok=True) if args.dataset == "moons": (dataX, dataY) = make_moons(noise=0.3, random_state=0) elif args.dataset == "circles": (dataX, dataY) = make_circles(noise=0.2, factor=0.5, random_state=0) dataY = 1.-dataY elif args.dataset == "linear": (dataX, dataY) = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1) rng = np.random.RandomState(2) dataX += 2 * rng.uniform(size=dataX.shape) else: assert(False) dataY = dataY.reshape((-1, 1)).astype(np.float32) nData = dataX.shape[0] nFeatures = dataX.shape[1] nLabels = 1 nXy = nFeatures + nLabels config = tf.ConfigProto() #log_device_placement=False) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: model = Model(nFeatures, nLabels, sess, args.model, nGdIter=30) model.train(args, dataX, dataY)
Example #26
Source File: test_clustering.py From nussl with MIT License | 4 votes |
def cluster_data(): np.random.seed(0) # ============ # Generate datasets. We choose the size big enough to see the scalability # of the algorithms, but not too big to avoid too long running times # ============ n_samples = 1500 noisy_circles = datasets.make_circles(n_samples=n_samples, factor=.5, noise=.05) noisy_moons = datasets.make_moons(n_samples=n_samples, noise=.05) blobs = datasets.make_blobs(n_samples=n_samples, random_state=8) # Anisotropicly distributed data random_state = 170 X, y = datasets.make_blobs(n_samples=n_samples, random_state=random_state) transformation = [[0.6, -0.6], [-0.4, 0.8]] X_aniso = np.dot(X, transformation) aniso = (X_aniso, y) # blobs with varied variances varied = datasets.make_blobs(n_samples=n_samples, cluster_std=[1.0, 2.5, 0.5], random_state=random_state) default_base = {'quantile': .3, 'eps': .3, 'damping': .9, 'preference': -200, 'n_neighbors': 10, 'n_clusters': 3, 'min_samples': 20, 'xi': 0.05, 'min_cluster_size': 0.1} data = [ ('noisy_circles', noisy_circles, {'damping': .77, 'preference': -240, 'quantile': .2, 'n_clusters': 2, 'min_samples': 20, 'xi': 0.25}), ('noisy_moons', noisy_moons, {'damping': .75, 'preference': -220, 'n_clusters': 2}), ('varied', varied, {'eps': .18, 'n_neighbors': 2, 'min_samples': 5, 'xi': 0.035, 'min_cluster_size': .2}), ('aniso', aniso, {'eps': .15, 'n_neighbors': 2, 'min_samples': 20, 'xi': 0.1, 'min_cluster_size': .2}), ('blobs', blobs, {}), ] yield data, default_base