Python sklearn.datasets.make_blobs() Examples
The following are 30
code examples of sklearn.datasets.make_blobs().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.datasets
, or try the search function
.
Example #1
Source File: test_confidence.py From nussl with MIT License | 7 votes |
def test_js_divergence(): n_samples = 1000 blobs, _ = datasets.make_blobs(n_samples=n_samples, random_state=8) one_component_a = ml.cluster.GaussianMixture(1) one_component_b = ml.cluster.GaussianMixture(1) two_component = ml.cluster.GaussianMixture(2) one_component_a.fit(blobs) one_component_b.fit(blobs) two_component.fit(blobs) confidence_2v1 = ml.confidence.jensen_shannon_divergence( one_component_a, two_component) confidence_1v1 = ml.confidence.jensen_shannon_divergence( one_component_a, one_component_b) assert confidence_2v1 > confidence_1v1
Example #2
Source File: test_samples_generator.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_make_blobs_error(): n_samples = [20, 20, 20] centers = np.array([[0.0, 0.0], [1.0, 1.0], [0.0, 1.0]]) cluster_stds = np.array([0.05, 0.2, 0.4]) wrong_centers_msg = ("Length of `n_samples` not consistent " "with number of centers. Got n_samples = {} " "and centers = {}".format(n_samples, centers[:-1])) assert_raise_message(ValueError, wrong_centers_msg, make_blobs, n_samples, centers=centers[:-1]) wrong_std_msg = ("Length of `clusters_std` not consistent with " "number of centers. Got centers = {} " "and cluster_std = {}".format(centers, cluster_stds[:-1])) assert_raise_message(ValueError, wrong_std_msg, make_blobs, n_samples, centers=centers, cluster_std=cluster_stds[:-1]) wrong_type_msg = ("Parameter `centers` must be array-like. " "Got {!r} instead".format(3)) assert_raise_message(ValueError, wrong_type_msg, make_blobs, n_samples, centers=3)
Example #3
Source File: helper.py From practicalDataAnalysisCookbook with GNU General Public License v2.0 | 6 votes |
def produce_XOR(sampleSize): import sklearn.datasets as dt # centers of the blobs centers = [(0,0),(3,0),(3,3),(0,3)] # create the sample x, y = dt.make_blobs(n_samples=sampleSize, n_features=2, cluster_std=0.8, centers=centers, shuffle=False ) # and make it XOR like y[y == 2] = 0 y[y == 3] = 1 return x, y
Example #4
Source File: test_sparse.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_svc(): """Check that sparse SVC gives the same result as SVC""" # many class dataset: X_blobs, y_blobs = make_blobs(n_samples=100, centers=10, random_state=0) X_blobs = sparse.csr_matrix(X_blobs) datasets = [[X_sp, Y, T], [X2_sp, Y2, T2], [X_blobs[:80], y_blobs[:80], X_blobs[80:]], [iris.data, iris.target, iris.data]] kernels = ["linear", "poly", "rbf", "sigmoid"] for dataset in datasets: for kernel in kernels: clf = svm.SVC(gamma=1, kernel=kernel, probability=True, random_state=0, decision_function_shape='ovo') sp_clf = svm.SVC(gamma=1, kernel=kernel, probability=True, random_state=0, decision_function_shape='ovo') check_svm_model_equal(clf, sp_clf, *dataset)
Example #5
Source File: test_search.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_grid_search_no_score(): # Test grid-search on classifier that has no score function. clf = LinearSVC(random_state=0) X, y = make_blobs(random_state=0, centers=2) Cs = [.1, 1, 10] clf_no_score = LinearSVCNoScore(random_state=0) grid_search = GridSearchCV(clf, {'C': Cs}, scoring='accuracy') grid_search.fit(X, y) grid_search_no_score = GridSearchCV(clf_no_score, {'C': Cs}, scoring='accuracy') # smoketest grid search grid_search_no_score.fit(X, y) # check that best params are equal assert_equal(grid_search_no_score.best_params_, grid_search.best_params_) # check that we can call score and that it gives the correct result assert_equal(grid_search.score(X, y), grid_search_no_score.score(X, y)) # giving no scoring function raises an error grid_search_no_score = GridSearchCV(clf_no_score, {'C': Cs}) assert_raise_message(TypeError, "no scoring", grid_search_no_score.fit, [[1]])
Example #6
Source File: test_search.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_unsupervised_grid_search(): # test grid-search with unsupervised estimator X, y = make_blobs(random_state=0) km = KMeans(random_state=0) # Multi-metric evaluation unsupervised scoring = ['adjusted_rand_score', 'fowlkes_mallows_score'] for refit in ['adjusted_rand_score', 'fowlkes_mallows_score']: grid_search = GridSearchCV(km, param_grid=dict(n_clusters=[2, 3, 4]), scoring=scoring, refit=refit) grid_search.fit(X, y) # Both ARI and FMS can find the right number :) assert_equal(grid_search.best_params_["n_clusters"], 3) # Single metric evaluation unsupervised grid_search = GridSearchCV(km, param_grid=dict(n_clusters=[2, 3, 4]), scoring='fowlkes_mallows_score') grid_search.fit(X, y) assert_equal(grid_search.best_params_["n_clusters"], 3) # Now without a score, and without y grid_search = GridSearchCV(km, param_grid=dict(n_clusters=[2, 3, 4])) grid_search.fit(X) assert_equal(grid_search.best_params_["n_clusters"], 4)
Example #7
Source File: test_search.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_deprecated_grid_search_iid(): depr_message = ("The default of the `iid` parameter will change from True " "to False in version 0.22") X, y = make_blobs(n_samples=54, random_state=0, centers=2) grid = GridSearchCV(SVC(gamma='scale', random_state=0), param_grid={'C': [10]}, cv=3) # no warning with equally sized test sets assert_no_warnings(grid.fit, X, y) grid = GridSearchCV(SVC(gamma='scale', random_state=0), param_grid={'C': [10]}, cv=5) # warning because 54 % 5 != 0 assert_warns_message(DeprecationWarning, depr_message, grid.fit, X, y) grid = GridSearchCV(SVC(gamma='scale', random_state=0), param_grid={'C': [10]}, cv=2) # warning because stratification into two classes and 27 % 2 != 0 assert_warns_message(DeprecationWarning, depr_message, grid.fit, X, y) grid = GridSearchCV(SVC(gamma='scale', random_state=0), param_grid={'C': [10]}, cv=KFold(2)) # no warning because no stratification and 54 % 2 == 0 assert_no_warnings(grid.fit, X, y)
Example #8
Source File: test_discriminant_analysis.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_lda_coefs(): # Test if the coefficients of the solvers are approximately the same. n_features = 2 n_classes = 2 n_samples = 1000 X, y = make_blobs(n_samples=n_samples, n_features=n_features, centers=n_classes, random_state=11) clf_lda_svd = LinearDiscriminantAnalysis(solver="svd") clf_lda_lsqr = LinearDiscriminantAnalysis(solver="lsqr") clf_lda_eigen = LinearDiscriminantAnalysis(solver="eigen") clf_lda_svd.fit(X, y) clf_lda_lsqr.fit(X, y) clf_lda_eigen.fit(X, y) assert_array_almost_equal(clf_lda_svd.coef_, clf_lda_lsqr.coef_, 1) assert_array_almost_equal(clf_lda_svd.coef_, clf_lda_eigen.coef_, 1) assert_array_almost_equal(clf_lda_eigen.coef_, clf_lda_lsqr.coef_, 1)
Example #9
Source File: test_birch.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_partial_fit(): # Test that fit is equivalent to calling partial_fit multiple times X, y = make_blobs(n_samples=100) brc = Birch(n_clusters=3) brc.fit(X) brc_partial = Birch(n_clusters=None) brc_partial.partial_fit(X[:50]) brc_partial.partial_fit(X[50:]) assert_array_almost_equal(brc_partial.subcluster_centers_, brc.subcluster_centers_) # Test that same global labels are obtained after calling partial_fit # with None brc_partial.set_params(n_clusters=3) brc_partial.partial_fit(None) assert_array_equal(brc_partial.subcluster_labels_, brc.subcluster_labels_)
Example #10
Source File: test_birch.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_n_clusters(): # Test that n_clusters param works properly X, y = make_blobs(n_samples=100, centers=10) brc1 = Birch(n_clusters=10) brc1.fit(X) assert_greater(len(brc1.subcluster_centers_), 10) assert_equal(len(np.unique(brc1.labels_)), 10) # Test that n_clusters = Agglomerative Clustering gives # the same results. gc = AgglomerativeClustering(n_clusters=10) brc2 = Birch(n_clusters=gc) brc2.fit(X) assert_array_equal(brc1.subcluster_labels_, brc2.subcluster_labels_) assert_array_equal(brc1.labels_, brc2.labels_) # Test that the wrong global clustering step raises an Error. clf = ElasticNet() brc3 = Birch(n_clusters=clf) assert_raises(ValueError, brc3.fit, X) # Test that a small number of clusters raises a warning. brc4 = Birch(threshold=10000.) assert_warns(ConvergenceWarning, brc4.fit, X)
Example #11
Source File: test_birch.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_branching_factor(): # Test that nodes have at max branching_factor number of subclusters X, y = make_blobs() branching_factor = 9 # Purposefully set a low threshold to maximize the subclusters. brc = Birch(n_clusters=None, branching_factor=branching_factor, threshold=0.01) brc.fit(X) check_branching_factor(brc.root_, branching_factor) brc = Birch(n_clusters=3, branching_factor=branching_factor, threshold=0.01) brc.fit(X) check_branching_factor(brc.root_, branching_factor) # Raises error when branching_factor is set to one. brc = Birch(n_clusters=None, branching_factor=1, threshold=0.01) assert_raises(ValueError, brc.fit, X)
Example #12
Source File: test_kmeans.py From dislib with Apache License 2.0 | 6 votes |
def test_fit_predict(self): """ Tests fit_predict.""" x, y = make_blobs(n_samples=1500, random_state=170) x_filtered = np.vstack( (x[y == 0][:500], x[y == 1][:100], x[y == 2][:10])) x_train = ds.array(x_filtered, block_size=(300, 2)) kmeans = KMeans(n_clusters=3, random_state=170) labels = kmeans.fit_predict(x_train).collect() skmeans = SKMeans(n_clusters=3, random_state=170) sklabels = skmeans.fit_predict(x_filtered) centers = np.array([[-8.941375656533449, -5.481371322614891], [-4.524023204953875, 0.06235042593214654], [2.332994701667008, 0.37681003933082696]]) self.assertTrue(np.allclose(centers, kmeans.centers)) self.assertTrue(np.allclose(labels, sklabels))
Example #13
Source File: sequential_minimum_optimization.py From Python with MIT License | 6 votes |
def test_linear_kernel(ax, cost): train_x, train_y = make_blobs( n_samples=500, centers=2, n_features=2, random_state=1 ) train_y[train_y == 0] = -1 scaler = StandardScaler() train_x_scaled = scaler.fit_transform(train_x, train_y) train_data = np.hstack((train_y.reshape(500, 1), train_x_scaled)) mykernel = Kernel(kernel="linear", degree=5, coef0=1, gamma=0.5) mysvm = SmoSVM( train=train_data, kernel_func=mykernel, cost=cost, tolerance=0.001, auto_norm=False, ) mysvm.fit() plot_partition_boundary(mysvm, train_data, ax=ax)
Example #14
Source File: test_umap_trustworthiness.py From umap with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_string_metric_supervised_umap_trustworthiness(): data, labels = make_blobs(50, cluster_std=0.5, random_state=42) labels = np.array(["this", "that", "other"])[labels] embedding = UMAP( n_neighbors=10, min_dist=0.01, target_metric="string", target_weight=0.8, n_epochs=100, random_state=42, ).fit_transform(data, labels) trust = trustworthiness(data, embedding, 10) assert_greater_equal( trust, 0.95, "Insufficiently trustworthy embedding for" "blobs dataset: {}".format(trust), )
Example #15
Source File: test_dbscan.py From dislib with Apache License 2.0 | 6 votes |
def test_sparse(self): """ Tests that DBSCAN produces the same results with sparse and dense data. """ n_samples = 1500 x, y = make_blobs(n_samples=n_samples, random_state=170) dbscan = DBSCAN(n_regions=1, eps=.15) transformation = [[0.6, -0.6], [-0.4, 0.8]] x = np.dot(x, transformation) x = StandardScaler().fit_transform(x) dense = ds.array(x, block_size=(300, 2)) sparse = ds.array(csr_matrix(x), block_size=(300, 2)) y_dense = dbscan.fit_predict(dense).collect() y_sparse = dbscan.fit_predict(sparse).collect() self.assertTrue(np.array_equal(y_dense, y_sparse))
Example #16
Source File: test_dbscan.py From dislib with Apache License 2.0 | 6 votes |
def test_n_clusters_aniso_dimensions(self): """ Tests that DBSCAN finds the correct number of clusters when dimensions is not None. """ n_samples = 1500 x, y = make_blobs(n_samples=n_samples, random_state=170) dbscan = DBSCAN(n_regions=5, dimensions=[1], eps=.15) transformation = [[0.6, -0.6], [-0.4, 0.8]] x = np.dot(x, transformation) x = StandardScaler().fit_transform(x) ds_x = ds.array(x, block_size=(300, 2)) y_pred = dbscan.fit_predict(ds_x).collect() true_sizes = {19, 496, 491, 488, 6} cluster_sizes = {y_pred[y_pred == -1].size, y_pred[y_pred == 0].size, y_pred[y_pred == 1].size, y_pred[y_pred == 2].size, y_pred[y_pred == 3].size} self.assertEqual(dbscan.n_clusters, 4) self.assertEqual(true_sizes, cluster_sizes)
Example #17
Source File: test_dbscan.py From dislib with Apache License 2.0 | 6 votes |
def test_n_clusters_aniso_grid(self): """ Tests that DBSCAN finds the correct number of clusters when setting n_regions > 1 with anisotropicly distributed data. """ n_samples = 1500 x, y = make_blobs(n_samples=n_samples, random_state=170) dbscan = DBSCAN(n_regions=4, eps=.15, max_samples=500) transformation = [[0.6, -0.6], [-0.4, 0.8]] x = np.dot(x, transformation) x = StandardScaler().fit_transform(x) ds_x = ds.array(x, block_size=(300, 2)) y_pred = dbscan.fit_predict(ds_x).collect() true_sizes = {19, 496, 491, 488, 6} cluster_sizes = {y_pred[y_pred == -1].size, y_pred[y_pred == 0].size, y_pred[y_pred == 1].size, y_pred[y_pred == 2].size, y_pred[y_pred == 3].size} self.assertEqual(dbscan.n_clusters, 4) self.assertEqual(true_sizes, cluster_sizes)
Example #18
Source File: test_dbscan.py From dislib with Apache License 2.0 | 6 votes |
def test_n_clusters_aniso_max_samples(self): """ Tests that DBSCAN finds the correct number of clusters when defining max_samples with anisotropicly distributed data. """ n_samples = 1500 x, y = make_blobs(n_samples=n_samples, random_state=170) dbscan = DBSCAN(n_regions=1, eps=.15, max_samples=500) transformation = [[0.6, -0.6], [-0.4, 0.8]] x = np.dot(x, transformation) x = StandardScaler().fit_transform(x) ds_x = ds.array(x, block_size=(300, 2)) y_pred = dbscan.fit_predict(ds_x).collect() true_sizes = {19, 496, 491, 488, 6} cluster_sizes = {y_pred[y_pred == -1].size, y_pred[y_pred == 0].size, y_pred[y_pred == 1].size, y_pred[y_pred == 2].size, y_pred[y_pred == 3].size} self.assertEqual(dbscan.n_clusters, 4) self.assertEqual(true_sizes, cluster_sizes)
Example #19
Source File: test_dbscan.py From dislib with Apache License 2.0 | 6 votes |
def test_n_clusters_aniso(self): """ Tests that DBSCAN finds the correct number of clusters with anisotropicly distributed data. """ n_samples = 1500 x, y = make_blobs(n_samples=n_samples, random_state=170) dbscan = DBSCAN(n_regions=1, eps=.15) transformation = [[0.6, -0.6], [-0.4, 0.8]] x = np.dot(x, transformation) x = StandardScaler().fit_transform(x) ds_x = ds.array(x, block_size=(300, 2)) y_pred = dbscan.fit_predict(ds_x).collect() true_sizes = {19, 496, 491, 488, 6} cluster_sizes = {y_pred[y_pred == -1].size, y_pred[y_pred == 0].size, y_pred[y_pred == 1].size, y_pred[y_pred == 2].size, y_pred[y_pred == 3].size} self.assertEqual(dbscan.n_clusters, 4) self.assertEqual(true_sizes, cluster_sizes)
Example #20
Source File: test_preproc.py From dislib with Apache License 2.0 | 6 votes |
def test_irregular(self): """ Test with an irregular array """ n_samples = 1500 x, y = make_blobs(n_samples=n_samples, random_state=170) transformation = [[0.6, -0.6], [-0.4, 0.8]] x = np.dot(x, transformation) ds_arr = ds.array(x, block_size=(300, 2)) ds_arr = ds_arr[297:602] x = x[297:602] sc1 = SKScaler() scaled_x = sc1.fit_transform(x) sc2 = StandardScaler() ds_scaled = sc2.fit_transform(ds_arr) self.assertTrue(np.allclose(scaled_x, ds_scaled.collect())) self.assertTrue(np.allclose(sc1.mean_, sc2.mean_.collect())) self.assertTrue(np.allclose(sc1.var_, sc2.var_.collect())) self.assertEqual(ds_scaled._top_left_shape, compss_wait_on(ds_scaled._blocks[0][0]).shape) self.assertEqual(ds_arr._reg_shape, ds_scaled._reg_shape) self.assertEqual(ds_arr._top_left_shape, ds_scaled._top_left_shape) self.assertEqual(ds_arr.shape, ds_scaled.shape) self.assertEqual(ds_arr._n_blocks, ds_scaled._n_blocks)
Example #21
Source File: test_preproc.py From dislib with Apache License 2.0 | 6 votes |
def test_fit_transform(self): """ Tests fit_transform against scikit-learn. """ n_samples = 1500 x, y = make_blobs(n_samples=n_samples, random_state=170) transformation = [[0.6, -0.6], [-0.4, 0.8]] x = np.dot(x, transformation) ds_arr = ds.array(x, block_size=(300, 2)) sc1 = SKScaler() scaled_x = sc1.fit_transform(x) sc2 = StandardScaler() ds_scaled = sc2.fit_transform(ds_arr) self.assertTrue(np.allclose(scaled_x, ds_scaled.collect())) self.assertTrue(np.allclose(sc1.mean_, sc2.mean_.collect())) self.assertTrue(np.allclose(sc1.var_, sc2.var_.collect())) self.assertEqual(ds_scaled._top_left_shape, ds_scaled._blocks[0][0].shape) self.assertEqual(ds_arr._reg_shape, ds_scaled._reg_shape) self.assertEqual(ds_arr._top_left_shape, ds_scaled._top_left_shape) self.assertEqual(ds_arr.shape, ds_scaled.shape) self.assertEqual(ds_arr._n_blocks, ds_scaled._n_blocks)
Example #22
Source File: estimator_checks.py From Splunking-Crime with GNU Affero General Public License v3.0 | 6 votes |
def check_decision_proba_consistency(name, estimator_orig): # Check whether an estimator having both decision_function and # predict_proba methods has outputs with perfect rank correlation. centers = [(2, 2), (4, 4)] X, y = make_blobs(n_samples=100, random_state=0, n_features=4, centers=centers, cluster_std=1.0, shuffle=True) X_test = np.random.randn(20, 2) + 4 estimator = clone(estimator_orig) if (hasattr(estimator, "decision_function") and hasattr(estimator, "predict_proba")): estimator.fit(X, y) a = estimator.predict_proba(X_test)[:, 1] b = estimator.decision_function(X_test) assert_array_equal(rankdata(a), rankdata(b))
Example #23
Source File: estimator_checks.py From Splunking-Crime with GNU Affero General Public License v3.0 | 6 votes |
def check_estimators_partial_fit_n_features(name, estimator_orig): # check if number of features changes between calls to partial_fit. if not hasattr(estimator_orig, 'partial_fit'): return estimator = clone(estimator_orig) X, y = make_blobs(n_samples=50, random_state=1) X -= X.min() try: if is_classifier(estimator): classes = np.unique(y) estimator.partial_fit(X, y, classes=classes) else: estimator.partial_fit(X, y) except NotImplementedError: return assert_raises(ValueError, estimator.partial_fit, X[:, :-1], y)
Example #24
Source File: test_umap_trustworthiness.py From umap with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_discrete_metric_supervised_umap_trustworthiness(): data, labels = make_blobs(50, cluster_std=0.5, random_state=42) embedding = UMAP( n_neighbors=10, min_dist=0.01, target_metric="ordinal", target_weight=0.8, n_epochs=100, random_state=42, ).fit_transform(data, labels) trust = trustworthiness(data, embedding, 10) assert_greater_equal( trust, 0.95, "Insufficiently trustworthy embedding for" "blobs dataset: {}".format(trust), )
Example #25
Source File: test_umap_trustworthiness.py From umap with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_metric_supervised_umap_trustworthiness(): data, labels = make_blobs(50, cluster_std=0.5, random_state=42) embedding = UMAP( n_neighbors=10, min_dist=0.01, target_metric="l1", target_weight=0.8, n_epochs=100, random_state=42, ).fit_transform(data, labels) trust = trustworthiness(data, embedding, 10) assert_greater_equal( trust, 0.95, "Insufficiently trustworthy embedding for" "blobs dataset: {}".format(trust), )
Example #26
Source File: estimator_checks.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def check_estimators_fit_returns_self(name, estimator_orig): """Check if self is returned when calling fit""" X, y = make_blobs(random_state=0, n_samples=9, n_features=4) # some want non-negative input X -= X.min() estimator = clone(estimator_orig) y = multioutput_estimator_convert_y_2d(estimator, y) set_random_state(estimator) assert_true(estimator.fit(X, y) is estimator)
Example #27
Source File: test_t_sne.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_optimization_minimizes_kl_divergence(): """t-SNE should give a lower KL divergence with more iterations.""" random_state = check_random_state(0) X, _ = make_blobs(n_features=3, random_state=random_state) kl_divergences = [] for n_iter in [250, 300, 350]: tsne = TSNE(n_components=2, perplexity=10, learning_rate=100.0, n_iter=n_iter, random_state=0) tsne.fit_transform(X) kl_divergences.append(tsne.kl_divergence_) assert_less_equal(kl_divergences[1], kl_divergences[0]) assert_less_equal(kl_divergences[2], kl_divergences[1])
Example #28
Source File: estimator_checks.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def check_clusterer_compute_labels_predict(name, clusterer_orig): """Check that predict is invariant of compute_labels""" X, y = make_blobs(n_samples=20, random_state=0) clusterer = clone(clusterer_orig) if hasattr(clusterer, "compute_labels"): # MiniBatchKMeans if hasattr(clusterer, "random_state"): clusterer.set_params(random_state=0) X_pred1 = clusterer.fit(X).predict(X) clusterer.set_params(compute_labels=False) X_pred2 = clusterer.fit(X).predict(X) assert_array_equal(X_pred1, X_pred2)
Example #29
Source File: benchmarks.py From kepler-mapper with MIT License | 5 votes |
def profile(): num_sets = 100 blob_size = 1000 nr_cubes = 10 overlap = 0.2 blob_list = [] for i in range(num_sets): data, _ = datasets.make_blobs(blob_size) blob_list.append(data) mapper = KeplerMapper(verbose=0) pr = cProfile.Profile() pr.enable() for data in blob_list: lens = mapper.fit_transform(data) graph = mapper.map(lens, data, nr_cubes=nr_cubes, overlap_perc=overlap) pr.disable() s = io.StringIO() sortby = "cumulative" ps = pstats.Stats(pr, stream=s).strip_dirs().sort_stats(sortby) ps.print_stats("kmapper") print( "Ran {} blobs of size {} with params (nr_cubes:{}\toverlap:{})".format( num_sets, blob_size, nr_cubes, overlap ) ) print(s.getvalue())
Example #30
Source File: estimator_checks.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def check_classifiers_classes(name, classifier_orig): X, y = make_blobs(n_samples=30, random_state=0, cluster_std=0.1) X, y = shuffle(X, y, random_state=7) X = StandardScaler().fit_transform(X) # We need to make sure that we have non negative data, for things # like NMF X -= X.min() - .1 y_names = np.array(["one", "two", "three"])[y] for y_names in [y_names, y_names.astype('O')]: if name in ["LabelPropagation", "LabelSpreading"]: # TODO some complication with -1 label y_ = y else: y_ = y_names classes = np.unique(y_) classifier = clone(classifier_orig) if name == 'BernoulliNB': classifier.set_params(binarize=X.mean()) set_random_state(classifier) # fit classifier.fit(X, y_) y_pred = classifier.predict(X) # training set performance assert_array_equal(np.unique(y_), np.unique(y_pred)) if np.any(classifier.classes_ != classes): print("Unexpected classes_ attribute for %r: " "expected %s, got %s" % (classifier, classes, classifier.classes_))