Python sklearn.datasets.make_moons() Examples

The following are 30 code examples of sklearn.datasets.make_moons(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.datasets , or try the search function .
Example #1
Source File: tests.py    From numpy_neural_net with MIT License 7 votes vote down vote up
def noise():
	noise_values = [0.01, 0.1, 0.2, 0.3, 0.4]
	nn_input_dim = 2 # input layer dimensionality
	nn_output_dim = 2 # output layer dimensionality 
	learning_rate = 0.01 # learning rate for gradient descent
	reg_lambda = 0.01 # regularization strength
	losses_store = []
	for i in noise_values:
		X, y = datasets.make_moons(200, noise=i)
		num_examples = len(X) # training set size
		model = build_model(X,32,2)
		model, losses = train(model,X, y, reg_lambda=reg_lambda, learning_rate=learning_rate)
		losses_store.append(losses)
		print losses
	x = np.linspace(0,145,30)
	for i in range(len(losses_store)):
		lab = 'noise_value = ' + str(noise_values[i])
		plt.plot(x,losses_store[i],label=lab)
	plt.legend()
	plt.show() 
Example #2
Source File: mini.py    From SymJAX with Apache License 2.0 7 votes vote down vote up
def load_mini(N=1000):
    X, y = make_moons(N, noise=0.035, random_state=20)
    x_, y_ = make_circles(N, noise=0.02, random_state=20)
    x_[:, 1] += 2.0
    y_ += 2
    X = np.concatenate([X, x_], axis=0)
    y = np.concatenate([y, y_])
    X -= X.mean(0, keepdims=True)
    X /= X.max(0, keepdims=True)

    X = X.astype("float32")
    y = y.astype("int32")

    dict_init = [
        ("datum_shape", (2,)),
        ("n_classes", 4),
        ("name", "mini"),
        ("classes", [str(u) for u in range(4)]),
    ]

    dataset = Dataset(**dict(dict_init))
    dataset["inputs/train_set"] = X
    dataset["outputs/train_set"] = y

    return dataset 
Example #3
Source File: test_hierarchical.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_single_linkage_clustering():
    # Check that we get the correct result in two emblematic cases
    moons, moon_labels = make_moons(noise=0.05, random_state=42)
    clustering = AgglomerativeClustering(n_clusters=2, linkage='single')
    clustering.fit(moons)
    assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
                                                     moon_labels), 1)

    circles, circle_labels = make_circles(factor=0.5, noise=0.025,
                                          random_state=42)
    clustering = AgglomerativeClustering(n_clusters=2, linkage='single')
    clustering.fit(circles)
    assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
                                                     circle_labels), 1) 
Example #4
Source File: tests.py    From numpy_neural_net with MIT License 6 votes vote down vote up
def test_num_nodes():
	X, y = datasets.make_moons(400, noise=0.2)
	num_examples = len(X) # training set size
	nn_input_dim = 2 # input layer dimensionality
	nn_output_dim = 2 # output layer dimensionality 
	learning_rate = 0.01 # learning rate for gradient descent
	reg_lambda = 0.01 # regularization strength
	node_vals = [4,8,16,32,64,128]
	losses_store = []
	for val in node_vals:
		model = build_model(X,val,2)
		model, losses = train(model,X, y, reg_lambda=reg_lambda, learning_rate=learning_rate)
		losses_store.append(losses)
		print losses
	x = np.linspace(0,145,30)
	for i in range(len(losses_store)):
		lab = 'n_nodes = ' + str(node_vals[i])
		plt.plot(x,losses_store[i],label=lab)
	plt.legend()
	plt.show() 
Example #5
Source File: tests.py    From numpy_neural_net with MIT License 6 votes vote down vote up
def reg():
	reg_values = [0.00, 0.01, 0.1, 0.2, 0.3]
	nn_input_dim = 2 # input layer dimensionality
	nn_output_dim = 2 # output layer dimensionality 
	learning_rate = 0.01 # learning rate for gradient descent
	losses_store = []
	for i in reg_values:
		reg_lambda = i # regularization strength
		X, y = datasets.make_moons(200, noise=0.2)
		num_examples = len(X) # training set size
		model = build_model(X,32,2)
		model, losses = train(model,X, y, reg_lambda=reg_lambda, learning_rate=learning_rate)
		losses_store.append(losses)
		print losses
	x = np.linspace(0,145,30)
	for i in range(len(losses_store)):
		lab = 'regularization_value = ' + str(reg_values[i])
		plt.plot(x,losses_store[i],label=lab)
	plt.legend()
	plt.show() 
Example #6
Source File: tests.py    From numpy_neural_net with MIT License 6 votes vote down vote up
def num_observations():
	obs_values = [10, 100, 1000]
	nn_input_dim = 2 # input layer dimensionality
	nn_output_dim = 2 # output layer dimensionality 
	learning_rate = 0.01 # learning rate for gradient descent
	reg_lambda = 0.01 # regularization strength
	losses_store = []
	for i in obs_values:
		X, y = datasets.make_moons(i, noise=0.1)
		num_examples = len(X) # training set size
		model = build_model(X,32,2)
		model, losses = train(model,X, y, reg_lambda=reg_lambda, learning_rate=learning_rate)
		losses_store.append(losses)
		print losses
	x = np.linspace(0,145,30)
	for i in range(len(losses_store)):
		lab = 'n_observations = ' + str(obs_values[i])
		plt.plot(x,losses_store[i],label=lab)
	plt.legend()
	plt.show() 
Example #7
Source File: kernel_pca.py    From intro_ds with Apache License 2.0 6 votes vote down vote up
def runKernelPCA():
    """
    使用kernel PCA对数据降维
    """
    data, labels = make_moons(n_samples=100, noise=0.05)
    fig = plt.figure(figsize=(10, 10), dpi=80)
    # 将原始数据可视化
    ax = fig.add_subplot(2, 2, 1)
    visualizeKernelPCA(ax, data, labels)
    # 使用PCA对数据降维,并将结果可视化
    ax = fig.add_subplot(2, 2, 2)
    model = trainPCA(data)
    x = model.transform(data)[:, 0]
    visualizeKernelPCA(ax, np.c_[x, [0] * len(x)], labels)
    # 使用kernel PCA对数据降维,并将结果可视化
    ax = fig.add_subplot(2, 2, 3)
    model = trainKernelPCA(data)
    x = model.transform(data)[:, 0]
    visualizeKernelPCA(ax, np.c_[x, [0] * len(x)], labels)
    # 展示数据在kernel PCA第一和第二主成分的降维结果
    ax = fig.add_subplot(2, 2, 4)
    visualizeKernelPCA(ax, model.transform(data), labels)
    plt.show() 
Example #8
Source File: experiments_moons.py    From domain_adversarial_neural_network with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def make_trans_moons(theta=40, nb=100, noise=.05):
    from math import cos, sin, pi
    
    X, y = make_moons(nb, noise=noise, random_state=1) 
    Xt, yt = make_moons(nb, noise=noise, random_state=2)
    
    trans = -np.mean(X, axis=0) 
    X  = 2*(X+trans)
    Xt = 2*(Xt+trans)
    
    theta = -theta*pi/180
    rotation = np.array( [  [cos(theta), sin(theta)], [-sin(theta), cos(theta)] ] )
    Xt = np.dot(Xt, rotation.T)
    
    return X, y, Xt, yt 
Example #9
Source File: test_samples_generator.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_make_moons():
    X, y = make_moons(3, shuffle=False)
    for x, label in zip(X, y):
        center = [0.0, 0.0] if label == 0 else [1.0, 0.5]
        dist_sqr = ((x - center) ** 2).sum()
        assert_almost_equal(dist_sqr, 1.0,
                            err_msg="Point is not on expected unit circle") 
Example #10
Source File: dbscan.py    From ML-From-Scratch with MIT License 5 votes vote down vote up
def main():
    # Load the dataset
    X, y = datasets.make_moons(n_samples=300, noise=0.08, shuffle=False)

    # Cluster the data using DBSCAN
    clf = DBSCAN(eps=0.17, min_samples=5)
    y_pred = clf.predict(X)

    # Project the data onto the 2 primary principal components
    p = Plot()
    p.plot_in_2d(X, y_pred, title="DBSCAN")
    p.plot_in_2d(X, y, title="Actual Clustering") 
Example #11
Source File: three_layer_network.py    From numpy_neural_net with MIT License 5 votes vote down vote up
def main():
	#toy dataset
	X, y = datasets.make_moons(16, noise=0.10)
	num_examples = len(X) # training set size
	nn_input_dim = 2 # input layer dimensionality
	nn_output_dim = 2 # output layer dimensionality 
	learning_rate = 0.01 # learning rate for gradient descent
	reg_lambda = 0.01 # regularization strength
	model = build_model(X,20,2)
	model, losses = train(model,X, y, reg_lambda=reg_lambda, learning_rate=learning_rate)
	output = feed_forward(model, X)
	preds = np.argmax(output[3], axis=1) 
Example #12
Source File: four_layer_network.py    From numpy_neural_net with MIT License 5 votes vote down vote up
def main():
    #toy dataset
    X, y = datasets.make_moons(16, noise=0.10)
    num_examples = len(X) # training set size
    nn_input_dim = 2 # input layer dimensionality
    nn_output_dim = 2 # output layer dimensionality 
    learning_rate = 0.01 # learning rate for gradient descent
    reg_lambda = 0.01 # regularization strength
    model = build_model(X,20,2)
    model, losses = train(model,X, y, reg_lambda=reg_lambda, learning_rate=learning_rate)
    output = feed_forward(model, X)
    preds = np.argmax(output[3], axis=1) 
Example #13
Source File: classification_example.py    From intro_ds with Apache License 2.0 5 votes vote down vote up
def generateData(n):
    """
    随机生成训练数据
    """
    X, Y = make_moons(n_samples=n, noise=0.05, random_state=2046)
    data = np.concatenate((Y.reshape(-1, 1), X), axis=1)
    data = pd.DataFrame(data, columns=["y", "x1", "x2"])
    return data 
Example #14
Source File: kernel_pca.py    From intro_ds with Apache License 2.0 5 votes vote down vote up
def generateData(n):
    """
    生成线性和非线性数据
    """
    x = np.linspace(-5, 5, n)
    error = np.random.randn(n)
    y = 1 * x + error
    linear = np.c_[x, y]
    nonLinear, _ = make_moons(n_samples=n, noise=0.05)
    return linear, nonLinear 
Example #15
Source File: gmm_vs_spectral.py    From intro_ds with Apache License 2.0 5 votes vote down vote up
def generateMoons(n):
    """
    生成月牙型数据
    """
    data, _ = make_moons(n_samples=n, noise=0.08)
    return data 
Example #16
Source File: classification_example.py    From intro_ds with Apache License 2.0 5 votes vote down vote up
def generateData(n):
    """
    """
    np.random.seed(12046)
    blobs = make_blobs(n_samples=n, centers = [[-2, -2], [2, 2]])
    circles = make_circles(n_samples=n, factor=.4, noise=.05)
    moons = make_moons(n_samples=n, noise=.05)
    blocks = np.random.rand(n, 2) - 0.5
    y = (blocks[:, 0] * blocks[:, 1] < 0) + 0
    blocks = (blocks, y)
    # 由于神经网络对数据的线性变换不稳定,因此将数据做归一化处理
    scaler = StandardScaler()
    blobs = (scaler.fit_transform(blobs[0]), blobs[1])
    circles = (scaler.fit_transform(circles[0]), circles[1])
    moons = (scaler.fit_transform(moons[0]), moons[1])
    blocks = (scaler.fit_transform(blocks[0]), blocks[1])
    return blobs, circles, moons, blocks 
Example #17
Source File: moons.py    From pytorch-flows with MIT License 5 votes vote down vote up
def load_data():
    x = ds.make_moons(n_samples=30000, shuffle=True, noise=0.05)[0]
    return x[:24000], x[24000:27000], x[27000:] 
Example #18
Source File: test_dbscan.py    From dislib with Apache License 2.0 5 votes vote down vote up
def test_n_clusters_moons_grid(self):
        """ Tests that DBSCAN finds the correct number of clusters when
        setting n_regions > 1 with moon data.
        """
        n_samples = 1500
        x, y = make_moons(n_samples=n_samples, noise=.05)
        dbscan = DBSCAN(n_regions=4, eps=.3, max_samples=600)
        x = StandardScaler().fit_transform(x)
        ds_x = ds.array(x, block_size=(300, 2))
        dbscan.fit(ds_x)
        self.assertEqual(dbscan.n_clusters, 2) 
Example #19
Source File: test_dbscan.py    From dislib with Apache License 2.0 5 votes vote down vote up
def test_n_clusters_moons_max_samples(self):
        """ Tests that DBSCAN finds the correct number of clusters when
        defining max_samples with moon data.
        """
        n_samples = 1500
        x, y = make_moons(n_samples=n_samples, noise=.05)
        dbscan = DBSCAN(n_regions=1, eps=.3, max_samples=500)
        x = StandardScaler().fit_transform(x)
        ds_x = ds.array(x, block_size=(300, 2))
        dbscan.fit(ds_x)
        self.assertEqual(dbscan.n_clusters, 2) 
Example #20
Source File: test_dbscan.py    From dislib with Apache License 2.0 5 votes vote down vote up
def test_n_clusters_moons(self):
        """ Tests that DBSCAN finds the correct number of clusters with
        moon data.
        """
        n_samples = 1500
        x, y = make_moons(n_samples=n_samples, noise=.05)
        dbscan = DBSCAN(n_regions=1, eps=.3)
        x = StandardScaler().fit_transform(x)
        ds_x = ds.array(x, block_size=(300, 2))
        dbscan.fit(ds_x)
        self.assertEqual(dbscan.n_clusters, 2) 
Example #21
Source File: test_dbcb.py    From DBCV with MIT License 5 votes vote down vote up
def data():
    n_samples = 60
    noisy_moons = datasets.make_moons(n_samples=n_samples, noise=.05,
                                      random_state=1)
    X = noisy_moons[0]
    return X 
Example #22
Source File: profiler.py    From DBCV with MIT License 5 votes vote down vote up
def generate_data(n_samples=300, noise=0.05):
    noisy_moons = datasets.make_moons(n_samples=n_samples, noise=noise)
    X = noisy_moons[0]
    return X 
Example #23
Source File: app.py    From dash-svm with MIT License 5 votes vote down vote up
def generate_data(n_samples, dataset, noise):
    if dataset == 'moons':
        return datasets.make_moons(
            n_samples=n_samples,
            noise=noise,
            random_state=0
        )

    elif dataset == 'circles':
        return datasets.make_circles(
            n_samples=n_samples,
            noise=noise,
            factor=0.5,
            random_state=1
        )

    elif dataset == 'linear':
        X, y = datasets.make_classification(
            n_samples=n_samples,
            n_features=2,
            n_redundant=0,
            n_informative=2,
            random_state=2,
            n_clusters_per_class=1
        )

        rng = np.random.RandomState(2)
        X += noise * rng.uniform(size=X.shape)
        linearly_separable = (X, y)

        return linearly_separable

    else:
        raise ValueError(
            'Data type incorrectly specified. Please choose an existing '
            'dataset.') 
Example #24
Source File: test_examples.py    From gplearn with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_classifier_comparison():
    """Test the classifier comparison example works"""

    X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
                               random_state=1, n_clusters_per_class=1)
    rng = np.random.RandomState(2)
    X += 2 * rng.uniform(size=X.shape)
    linearly_separable = (X, y)
    datasets = [make_moons(noise=0.3, random_state=0),
                make_circles(noise=0.2, factor=0.5, random_state=1),
                linearly_separable]
    scores = []
    for ds in datasets:
        X, y = ds
        X = StandardScaler().fit_transform(X)
        X_train, X_test, y_train, y_test = \
            train_test_split(X, y, test_size=.4, random_state=42)
        clf = SymbolicClassifier(random_state=0)
        clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)
        scores.append(('%.2f' % score).lstrip('0'))

    assert_equal(scores, ['.95', '.93', '.95']) 
Example #25
Source File: half_moon.py    From auxiliary-deep-generative-models with MIT License 5 votes vote down vote up
def _download():
    train_x, train_t = make_moons(n_samples=10000, shuffle=True, noise=0.2, random_state=1234)
    test_x, test_t = make_moons(n_samples=10000, shuffle=True, noise=0.2, random_state=1234)
    valid_x, valid_t = make_moons(n_samples=10000, shuffle=True, noise=0.2, random_state=1234)

    train_x += np.abs(train_x.min())
    test_x += np.abs(test_x.min())
    valid_x += np.abs(valid_x.min())

    train_set = (train_x, train_t)
    test_set = (test_x, test_t)
    valid_set = (valid_x, valid_t)

    return train_set, test_set, valid_set 
Example #26
Source File: test_base.py    From carl with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_as_classifier():
    X, y = make_moons(n_samples=100, random_state=1)
    y = 2 * y - 1  # use -1/+1 labels

    clf = as_classifier(DecisionTreeRegressor())
    clf.fit(X, y)
    probas = clf.predict_proba(X)
    predictions = clf.predict(X)

    assert_array_equal(probas.shape, (len(X), 2))
    assert_array_equal(predictions, y)

    y[-1] = 2
    clf = as_classifier(DecisionTreeRegressor())
    assert_raises(ValueError, clf.fit, X, y) 
Example #27
Source File: test_samples_generator.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_make_moons():
    X, y = make_moons(3, shuffle=False)
    for x, label in zip(X, y):
        center = [0.0, 0.0] if label == 0 else [1.0, 0.5]
        dist_sqr = ((x - center) ** 2).sum()
        assert_almost_equal(dist_sqr, 1.0,
                            err_msg="Point is not on expected unit circle") 
Example #28
Source File: test_blas.py    From FATE with Apache License 2.0 4 votes vote down vote up
def test_plain_lr():    
    from sklearn.datasets import make_moons
    import functools
    # 修改flow_id 否则内存表可能被覆盖
    session.init(mode=0)
    ns = str(uuid.uuid1())

    X = session.table('testX7', ns, partition=2)
    Y = session.table('testY7', ns, partition=2)

    b = np.array([0])
    eta = 1.2
    max_iter = 10

    total_num = 500

    _x, _y = make_moons(total_num, noise=0.25,random_state=12345)
    for i in range(np.shape(_y)[0]):
        X.put(i, _x[i])
        Y.put(i, _y[i])

    print(len([y for y in Y.collect()]))

    current_milli_time = lambda: int(round(time.time() * 1000))

    start = current_milli_time()
    #shape_w = [1, np.shape(_x)[1]]
    shape_w = [np.shape(_x)[1]]
    w = np.ones(shape_w)

    print(w)
    X = TensorInEgg(None,None,X)
    Y = TensorInEgg(None,None,Y)
    w = TensorInPy(None,None,w)
    b = TensorInPy(None, None, b)

    # lr = LR(shape_w)
    # lr.train(X, Y)
    itr = 0
    while itr < max_iter:
        H = 1 / X
        H = 1.0 / (1 + ((X @ w + b) * -1).map(np.exp))
        R = H - Y

        gradient_w = (R * X).sum() / total_num
        gradient_b = R.sum() / total_num
        w = w - eta * gradient_w
        b = b - eta * gradient_b
        print("aaa",w,b)
        # self.plot(itr)
        itr += 1

    print("train total time: {}".format(current_milli_time() - start))
    _x_test, _y_test = make_moons(50,random_state=12345)
    _x_test = TensorInPy(None,None, _x_test)
    y_pred = 1.0 / (1 + ((_x_test @ w + b) * -1).map(np.exp))
    from sklearn import metrics

    auc = metrics.roc_auc_score(_y_test, y_pred.store.reshape(50))
    print("auc: {}".format(auc)) 
Example #29
Source File: icnn.py    From icnn with Apache License 2.0 4 votes vote down vote up
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--save', type=str, default='work')
    parser.add_argument('--nEpoch', type=int, default=100)
    # parser.add_argument('--testBatchSz', type=int, default=2048)
    parser.add_argument('--seed', type=int, default=42)
    parser.add_argument('--model', type=str, default="picnn",
                        choices=['picnn', 'ficnn'])
    parser.add_argument('--dataset', type=str, default="moons",
                        choices=['moons', 'circles', 'linear'])
    parser.add_argument('--noncvx', action='store_true')

    args = parser.parse_args()

    npr.seed(args.seed)
    tf.set_random_seed(args.seed)

    setproctitle.setproctitle('bamos.icnn.synthetic.{}.{}'.format(args.model, args.dataset))

    save = os.path.join(os.path.expanduser(args.save),
                        "{}.{}".format(args.model, args.dataset))
    if os.path.isdir(save):
        shutil.rmtree(save)
    os.makedirs(save, exist_ok=True)

    if args.dataset == "moons":
        (dataX, dataY) = make_moons(noise=0.3, random_state=0)
    elif args.dataset == "circles":
        (dataX, dataY) = make_circles(noise=0.2, factor=0.5, random_state=0)
        dataY = 1.-dataY
    elif args.dataset == "linear":
        (dataX, dataY) = make_classification(n_features=2, n_redundant=0, n_informative=2,
                                             random_state=1, n_clusters_per_class=1)
        rng = np.random.RandomState(2)
        dataX += 2 * rng.uniform(size=dataX.shape)
    else:
        assert(False)

    dataY = dataY.reshape((-1, 1)).astype(np.float32)

    nData = dataX.shape[0]
    nFeatures = dataX.shape[1]
    nLabels = 1
    nXy = nFeatures + nLabels

    config = tf.ConfigProto() #log_device_placement=False)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        model = Model(nFeatures, nLabels, sess, args.model, nGdIter=30)
        model.train(args, dataX, dataY) 
Example #30
Source File: test_clustering.py    From nussl with MIT License 4 votes vote down vote up
def cluster_data():
    np.random.seed(0)

    # ============
    # Generate datasets. We choose the size big enough to see the scalability
    # of the algorithms, but not too big to avoid too long running times
    # ============
    n_samples = 1500
    noisy_circles = datasets.make_circles(n_samples=n_samples, factor=.5,
                                          noise=.05)
    noisy_moons = datasets.make_moons(n_samples=n_samples, noise=.05)
    blobs = datasets.make_blobs(n_samples=n_samples, random_state=8)

    # Anisotropicly distributed data
    random_state = 170
    X, y = datasets.make_blobs(n_samples=n_samples, random_state=random_state)
    transformation = [[0.6, -0.6], [-0.4, 0.8]]
    X_aniso = np.dot(X, transformation)
    aniso = (X_aniso, y)

    # blobs with varied variances
    varied = datasets.make_blobs(n_samples=n_samples,
                                 cluster_std=[1.0, 2.5, 0.5],
                                 random_state=random_state)

    default_base = {'quantile': .3,
                    'eps': .3,
                    'damping': .9,
                    'preference': -200,
                    'n_neighbors': 10,
                    'n_clusters': 3,
                    'min_samples': 20,
                    'xi': 0.05,
                    'min_cluster_size': 0.1}

    data = [
        ('noisy_circles', noisy_circles, {'damping': .77, 'preference': -240,
                                          'quantile': .2, 'n_clusters': 2,
                                          'min_samples': 20, 'xi': 0.25}),
        ('noisy_moons', noisy_moons, {'damping': .75, 'preference': -220, 'n_clusters': 2}),
        ('varied', varied, {'eps': .18, 'n_neighbors': 2,
                            'min_samples': 5, 'xi': 0.035, 'min_cluster_size': .2}),
        ('aniso', aniso, {'eps': .15, 'n_neighbors': 2,
                          'min_samples': 20, 'xi': 0.1, 'min_cluster_size': .2}),
        ('blobs', blobs, {}),
    ]
    yield data, default_base