Python Examples of sklearn.datasets.make

Source File: tests.py From numpy_neural_net with MIT License

7 votes

def noise():
	noise_values = [0.01, 0.1, 0.2, 0.3, 0.4]
	nn_input_dim = 2 # input layer dimensionality
	nn_output_dim = 2 # output layer dimensionality 
	learning_rate = 0.01 # learning rate for gradient descent
	reg_lambda = 0.01 # regularization strength
	losses_store = []
	for i in noise_values:
		X, y = datasets.make_moons(200, noise=i)
		num_examples = len(X) # training set size
		model = build_model(X,32,2)
		model, losses = train(model,X, y, reg_lambda=reg_lambda, learning_rate=learning_rate)
		losses_store.append(losses)
		print losses
	x = np.linspace(0,145,30)
	for i in range(len(losses_store)):
		lab = 'noise_value = ' + str(noise_values[i])
		plt.plot(x,losses_store[i],label=lab)
	plt.legend()
	plt.show()

Source File: mini.py From SymJAX with Apache License 2.0

7 votes

def load_mini(N=1000):
    X, y = make_moons(N, noise=0.035, random_state=20)
    x_, y_ = make_circles(N, noise=0.02, random_state=20)
    x_[:, 1] += 2.0
    y_ += 2
    X = np.concatenate([X, x_], axis=0)
    y = np.concatenate([y, y_])
    X -= X.mean(0, keepdims=True)
    X /= X.max(0, keepdims=True)

    X = X.astype("float32")
    y = y.astype("int32")

    dict_init = [
        ("datum_shape", (2,)),
        ("n_classes", 4),
        ("name", "mini"),
        ("classes", [str(u) for u in range(4)]),
    ]

    dataset = Dataset(**dict(dict_init))
    dataset["inputs/train_set"] = X
    dataset["outputs/train_set"] = y

    return dataset

Source File: test_hierarchical.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_single_linkage_clustering():
    # Check that we get the correct result in two emblematic cases
    moons, moon_labels = make_moons(noise=0.05, random_state=42)
    clustering = AgglomerativeClustering(n_clusters=2, linkage='single')
    clustering.fit(moons)
    assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
                                                     moon_labels), 1)

    circles, circle_labels = make_circles(factor=0.5, noise=0.025,
                                          random_state=42)
    clustering = AgglomerativeClustering(n_clusters=2, linkage='single')
    clustering.fit(circles)
    assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
                                                     circle_labels), 1)

Source File: tests.py From numpy_neural_net with MIT License

6 votes

def test_num_nodes():
	X, y = datasets.make_moons(400, noise=0.2)
	num_examples = len(X) # training set size
	nn_input_dim = 2 # input layer dimensionality
	nn_output_dim = 2 # output layer dimensionality 
	learning_rate = 0.01 # learning rate for gradient descent
	reg_lambda = 0.01 # regularization strength
	node_vals = [4,8,16,32,64,128]
	losses_store = []
	for val in node_vals:
		model = build_model(X,val,2)
		model, losses = train(model,X, y, reg_lambda=reg_lambda, learning_rate=learning_rate)
		losses_store.append(losses)
		print losses
	x = np.linspace(0,145,30)
	for i in range(len(losses_store)):
		lab = 'n_nodes = ' + str(node_vals[i])
		plt.plot(x,losses_store[i],label=lab)
	plt.legend()
	plt.show()

Source File: tests.py From numpy_neural_net with MIT License

6 votes

def reg():
	reg_values = [0.00, 0.01, 0.1, 0.2, 0.3]
	nn_input_dim = 2 # input layer dimensionality
	nn_output_dim = 2 # output layer dimensionality 
	learning_rate = 0.01 # learning rate for gradient descent
	losses_store = []
	for i in reg_values:
		reg_lambda = i # regularization strength
		X, y = datasets.make_moons(200, noise=0.2)
		num_examples = len(X) # training set size
		model = build_model(X,32,2)
		model, losses = train(model,X, y, reg_lambda=reg_lambda, learning_rate=learning_rate)
		losses_store.append(losses)
		print losses
	x = np.linspace(0,145,30)
	for i in range(len(losses_store)):
		lab = 'regularization_value = ' + str(reg_values[i])
		plt.plot(x,losses_store[i],label=lab)
	plt.legend()
	plt.show()

Source File: tests.py From numpy_neural_net with MIT License

6 votes

def num_observations():
	obs_values = [10, 100, 1000]
	nn_input_dim = 2 # input layer dimensionality
	nn_output_dim = 2 # output layer dimensionality 
	learning_rate = 0.01 # learning rate for gradient descent
	reg_lambda = 0.01 # regularization strength
	losses_store = []
	for i in obs_values:
		X, y = datasets.make_moons(i, noise=0.1)
		num_examples = len(X) # training set size
		model = build_model(X,32,2)
		model, losses = train(model,X, y, reg_lambda=reg_lambda, learning_rate=learning_rate)
		losses_store.append(losses)
		print losses
	x = np.linspace(0,145,30)
	for i in range(len(losses_store)):
		lab = 'n_observations = ' + str(obs_values[i])
		plt.plot(x,losses_store[i],label=lab)
	plt.legend()
	plt.show()

Source File: kernel_pca.py From intro_ds with Apache License 2.0

6 votes

def runKernelPCA():
    """
    使用kernel PCA对数据降维
    """
    data, labels = make_moons(n_samples=100, noise=0.05)
    fig = plt.figure(figsize=(10, 10), dpi=80)
    # 将原始数据可视化
    ax = fig.add_subplot(2, 2, 1)
    visualizeKernelPCA(ax, data, labels)
    # 使用PCA对数据降维，并将结果可视化
    ax = fig.add_subplot(2, 2, 2)
    model = trainPCA(data)
    x = model.transform(data)[:, 0]
    visualizeKernelPCA(ax, np.c_[x, [0] * len(x)], labels)
    # 使用kernel PCA对数据降维，并将结果可视化
    ax = fig.add_subplot(2, 2, 3)
    model = trainKernelPCA(data)
    x = model.transform(data)[:, 0]
    visualizeKernelPCA(ax, np.c_[x, [0] * len(x)], labels)
    # 展示数据在kernel PCA第一和第二主成分的降维结果
    ax = fig.add_subplot(2, 2, 4)
    visualizeKernelPCA(ax, model.transform(data), labels)
    plt.show()

Source File: experiments_moons.py From domain_adversarial_neural_network with BSD 2-Clause "Simplified" License

5 votes

def make_trans_moons(theta=40, nb=100, noise=.05):
    from math import cos, sin, pi
    
    X, y = make_moons(nb, noise=noise, random_state=1) 
    Xt, yt = make_moons(nb, noise=noise, random_state=2)
    
    trans = -np.mean(X, axis=0) 
    X  = 2*(X+trans)
    Xt = 2*(Xt+trans)
    
    theta = -theta*pi/180
    rotation = np.array( [  [cos(theta), sin(theta)], [-sin(theta), cos(theta)] ] )
    Xt = np.dot(Xt, rotation.T)
    
    return X, y, Xt, yt

Source File: test_samples_generator.py From twitter-stock-recommendation with MIT License

5 votes

def test_make_moons():
    X, y = make_moons(3, shuffle=False)
    for x, label in zip(X, y):
        center = [0.0, 0.0] if label == 0 else [1.0, 0.5]
        dist_sqr = ((x - center) ** 2).sum()
        assert_almost_equal(dist_sqr, 1.0,
                            err_msg="Point is not on expected unit circle")

Source File: dbscan.py From ML-From-Scratch with MIT License

5 votes

def main():
    # Load the dataset
    X, y = datasets.make_moons(n_samples=300, noise=0.08, shuffle=False)

    # Cluster the data using DBSCAN
    clf = DBSCAN(eps=0.17, min_samples=5)
    y_pred = clf.predict(X)

    # Project the data onto the 2 primary principal components
    p = Plot()
    p.plot_in_2d(X, y_pred, title="DBSCAN")
    p.plot_in_2d(X, y, title="Actual Clustering")

Source File: three_layer_network.py From numpy_neural_net with MIT License

5 votes

def main():
	#toy dataset
	X, y = datasets.make_moons(16, noise=0.10)
	num_examples = len(X) # training set size
	nn_input_dim = 2 # input layer dimensionality
	nn_output_dim = 2 # output layer dimensionality 
	learning_rate = 0.01 # learning rate for gradient descent
	reg_lambda = 0.01 # regularization strength
	model = build_model(X,20,2)
	model, losses = train(model,X, y, reg_lambda=reg_lambda, learning_rate=learning_rate)
	output = feed_forward(model, X)
	preds = np.argmax(output[3], axis=1)

Source File: four_layer_network.py From numpy_neural_net with MIT License

5 votes

def main():
    #toy dataset
    X, y = datasets.make_moons(16, noise=0.10)
    num_examples = len(X) # training set size
    nn_input_dim = 2 # input layer dimensionality
    nn_output_dim = 2 # output layer dimensionality 
    learning_rate = 0.01 # learning rate for gradient descent
    reg_lambda = 0.01 # regularization strength
    model = build_model(X,20,2)
    model, losses = train(model,X, y, reg_lambda=reg_lambda, learning_rate=learning_rate)
    output = feed_forward(model, X)
    preds = np.argmax(output[3], axis=1)

Source File: classification_example.py From intro_ds with Apache License 2.0

5 votes

def generateData(n):
    """
    随机生成训练数据
    """
    X, Y = make_moons(n_samples=n, noise=0.05, random_state=2046)
    data = np.concatenate((Y.reshape(-1, 1), X), axis=1)
    data = pd.DataFrame(data, columns=["y", "x1", "x2"])
    return data

Source File: kernel_pca.py From intro_ds with Apache License 2.0

5 votes

def generateData(n):
    """
    生成线性和非线性数据
    """
    x = np.linspace(-5, 5, n)
    error = np.random.randn(n)
    y = 1 * x + error
    linear = np.c_[x, y]
    nonLinear, _ = make_moons(n_samples=n, noise=0.05)
    return linear, nonLinear

Source File: gmm_vs_spectral.py From intro_ds with Apache License 2.0

5 votes

def generateMoons(n):
    """
    生成月牙型数据
    """
    data, _ = make_moons(n_samples=n, noise=0.08)
    return data

Source File: classification_example.py From intro_ds with Apache License 2.0

5 votes

def generateData(n):
    """
    """
    np.random.seed(12046)
    blobs = make_blobs(n_samples=n, centers = [[-2, -2], [2, 2]])
    circles = make_circles(n_samples=n, factor=.4, noise=.05)
    moons = make_moons(n_samples=n, noise=.05)
    blocks = np.random.rand(n, 2) - 0.5
    y = (blocks[:, 0] * blocks[:, 1] < 0) + 0
    blocks = (blocks, y)
    # 由于神经网络对数据的线性变换不稳定，因此将数据做归一化处理
    scaler = StandardScaler()
    blobs = (scaler.fit_transform(blobs[0]), blobs[1])
    circles = (scaler.fit_transform(circles[0]), circles[1])
    moons = (scaler.fit_transform(moons[0]), moons[1])
    blocks = (scaler.fit_transform(blocks[0]), blocks[1])
    return blobs, circles, moons, blocks

Source File: moons.py From pytorch-flows with MIT License

5 votes

def load_data():
    x = ds.make_moons(n_samples=30000, shuffle=True, noise=0.05)[0]
    return x[:24000], x[24000:27000], x[27000:]

Source File: test_dbscan.py From dislib with Apache License 2.0

5 votes

def test_n_clusters_moons_grid(self):
        """ Tests that DBSCAN finds the correct number of clusters when
        setting n_regions > 1 with moon data.
        """
        n_samples = 1500
        x, y = make_moons(n_samples=n_samples, noise=.05)
        dbscan = DBSCAN(n_regions=4, eps=.3, max_samples=600)
        x = StandardScaler().fit_transform(x)
        ds_x = ds.array(x, block_size=(300, 2))
        dbscan.fit(ds_x)
        self.assertEqual(dbscan.n_clusters, 2)

Source File: test_dbscan.py From dislib with Apache License 2.0

5 votes

def test_n_clusters_moons_max_samples(self):
        """ Tests that DBSCAN finds the correct number of clusters when
        defining max_samples with moon data.
        """
        n_samples = 1500
        x, y = make_moons(n_samples=n_samples, noise=.05)
        dbscan = DBSCAN(n_regions=1, eps=.3, max_samples=500)
        x = StandardScaler().fit_transform(x)
        ds_x = ds.array(x, block_size=(300, 2))
        dbscan.fit(ds_x)
        self.assertEqual(dbscan.n_clusters, 2)

Source File: test_dbscan.py From dislib with Apache License 2.0

5 votes

def test_n_clusters_moons(self):
        """ Tests that DBSCAN finds the correct number of clusters with
        moon data.
        """
        n_samples = 1500
        x, y = make_moons(n_samples=n_samples, noise=.05)
        dbscan = DBSCAN(n_regions=1, eps=.3)
        x = StandardScaler().fit_transform(x)
        ds_x = ds.array(x, block_size=(300, 2))
        dbscan.fit(ds_x)
        self.assertEqual(dbscan.n_clusters, 2)

Source File: test_dbcb.py From DBCV with MIT License

5 votes

def data():
    n_samples = 60
    noisy_moons = datasets.make_moons(n_samples=n_samples, noise=.05,
                                      random_state=1)
    X = noisy_moons[0]
    return X

Source File: profiler.py From DBCV with MIT License

5 votes

def generate_data(n_samples=300, noise=0.05):
    noisy_moons = datasets.make_moons(n_samples=n_samples, noise=noise)
    X = noisy_moons[0]
    return X

Source File: app.py From dash-svm with MIT License

5 votes

def generate_data(n_samples, dataset, noise):
    if dataset == 'moons':
        return datasets.make_moons(
            n_samples=n_samples,
            noise=noise,
            random_state=0
        )

    elif dataset == 'circles':
        return datasets.make_circles(
            n_samples=n_samples,
            noise=noise,
            factor=0.5,
            random_state=1
        )

    elif dataset == 'linear':
        X, y = datasets.make_classification(
            n_samples=n_samples,
            n_features=2,
            n_redundant=0,
            n_informative=2,
            random_state=2,
            n_clusters_per_class=1
        )

        rng = np.random.RandomState(2)
        X += noise * rng.uniform(size=X.shape)
        linearly_separable = (X, y)

        return linearly_separable

    else:
        raise ValueError(
            'Data type incorrectly specified. Please choose an existing '
            'dataset.')

Source File: test_examples.py From gplearn with BSD 3-Clause "New" or "Revised" License

5 votes

def test_classifier_comparison():
    """Test the classifier comparison example works"""

    X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
                               random_state=1, n_clusters_per_class=1)
    rng = np.random.RandomState(2)
    X += 2 * rng.uniform(size=X.shape)
    linearly_separable = (X, y)
    datasets = [make_moons(noise=0.3, random_state=0),
                make_circles(noise=0.2, factor=0.5, random_state=1),
                linearly_separable]
    scores = []
    for ds in datasets:
        X, y = ds
        X = StandardScaler().fit_transform(X)
        X_train, X_test, y_train, y_test = \
            train_test_split(X, y, test_size=.4, random_state=42)
        clf = SymbolicClassifier(random_state=0)
        clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)
        scores.append(('%.2f' % score).lstrip('0'))

    assert_equal(scores, ['.95', '.93', '.95'])

Source File: half_moon.py From auxiliary-deep-generative-models with MIT License

5 votes

def _download():
    train_x, train_t = make_moons(n_samples=10000, shuffle=True, noise=0.2, random_state=1234)
    test_x, test_t = make_moons(n_samples=10000, shuffle=True, noise=0.2, random_state=1234)
    valid_x, valid_t = make_moons(n_samples=10000, shuffle=True, noise=0.2, random_state=1234)

    train_x += np.abs(train_x.min())
    test_x += np.abs(test_x.min())
    valid_x += np.abs(valid_x.min())

    train_set = (train_x, train_t)
    test_set = (test_x, test_t)
    valid_set = (valid_x, valid_t)

    return train_set, test_set, valid_set

Source File: test_base.py From carl with BSD 3-Clause "New" or "Revised" License

5 votes

def test_as_classifier():
    X, y = make_moons(n_samples=100, random_state=1)
    y = 2 * y - 1  # use -1/+1 labels

    clf = as_classifier(DecisionTreeRegressor())
    clf.fit(X, y)
    probas = clf.predict_proba(X)
    predictions = clf.predict(X)

    assert_array_equal(probas.shape, (len(X), 2))
    assert_array_equal(predictions, y)

    y[-1] = 2
    clf = as_classifier(DecisionTreeRegressor())
    assert_raises(ValueError, clf.fit, X, y)

Source File: test_samples_generator.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_make_moons():
    X, y = make_moons(3, shuffle=False)
    for x, label in zip(X, y):
        center = [0.0, 0.0] if label == 0 else [1.0, 0.5]
        dist_sqr = ((x - center) ** 2).sum()
        assert_almost_equal(dist_sqr, 1.0,
                            err_msg="Point is not on expected unit circle")

Source File: test_blas.py From FATE with Apache License 2.0

4 votes

def test_plain_lr():    
    from sklearn.datasets import make_moons
    import functools
    # 修改flow_id 否则内存表可能被覆盖
    session.init(mode=0)
    ns = str(uuid.uuid1())

    X = session.table('testX7', ns, partition=2)
    Y = session.table('testY7', ns, partition=2)

    b = np.array([0])
    eta = 1.2
    max_iter = 10

    total_num = 500

    _x, _y = make_moons(total_num, noise=0.25,random_state=12345)
    for i in range(np.shape(_y)[0]):
        X.put(i, _x[i])
        Y.put(i, _y[i])

    print(len([y for y in Y.collect()]))

    current_milli_time = lambda: int(round(time.time() * 1000))

    start = current_milli_time()
    #shape_w = [1, np.shape(_x)[1]]
    shape_w = [np.shape(_x)[1]]
    w = np.ones(shape_w)

    print(w)
    X = TensorInEgg(None,None,X)
    Y = TensorInEgg(None,None,Y)
    w = TensorInPy(None,None,w)
    b = TensorInPy(None, None, b)

    # lr = LR(shape_w)
    # lr.train(X, Y)
    itr = 0
    while itr < max_iter:
        H = 1 / X
        H = 1.0 / (1 + ((X @ w + b) * -1).map(np.exp))
        R = H - Y

        gradient_w = (R * X).sum() / total_num
        gradient_b = R.sum() / total_num
        w = w - eta * gradient_w
        b = b - eta * gradient_b
        print("aaa",w,b)
        # self.plot(itr)
        itr += 1

    print("train total time: {}".format(current_milli_time() - start))
    _x_test, _y_test = make_moons(50,random_state=12345)
    _x_test = TensorInPy(None,None, _x_test)
    y_pred = 1.0 / (1 + ((_x_test @ w + b) * -1).map(np.exp))
    from sklearn import metrics

    auc = metrics.roc_auc_score(_y_test, y_pred.store.reshape(50))
    print("auc: {}".format(auc))

Source File: icnn.py From icnn with Apache License 2.0

4 votes

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--save', type=str, default='work')
    parser.add_argument('--nEpoch', type=int, default=100)
    # parser.add_argument('--testBatchSz', type=int, default=2048)
    parser.add_argument('--seed', type=int, default=42)
    parser.add_argument('--model', type=str, default="picnn",
                        choices=['picnn', 'ficnn'])
    parser.add_argument('--dataset', type=str, default="moons",
                        choices=['moons', 'circles', 'linear'])
    parser.add_argument('--noncvx', action='store_true')

    args = parser.parse_args()

    npr.seed(args.seed)
    tf.set_random_seed(args.seed)

    setproctitle.setproctitle('bamos.icnn.synthetic.{}.{}'.format(args.model, args.dataset))

    save = os.path.join(os.path.expanduser(args.save),
                        "{}.{}".format(args.model, args.dataset))
    if os.path.isdir(save):
        shutil.rmtree(save)
    os.makedirs(save, exist_ok=True)

    if args.dataset == "moons":
        (dataX, dataY) = make_moons(noise=0.3, random_state=0)
    elif args.dataset == "circles":
        (dataX, dataY) = make_circles(noise=0.2, factor=0.5, random_state=0)
        dataY = 1.-dataY
    elif args.dataset == "linear":
        (dataX, dataY) = make_classification(n_features=2, n_redundant=0, n_informative=2,
                                             random_state=1, n_clusters_per_class=1)
        rng = np.random.RandomState(2)
        dataX += 2 * rng.uniform(size=dataX.shape)
    else:
        assert(False)

    dataY = dataY.reshape((-1, 1)).astype(np.float32)

    nData = dataX.shape[0]
    nFeatures = dataX.shape[1]
    nLabels = 1
    nXy = nFeatures + nLabels

    config = tf.ConfigProto() #log_device_placement=False)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        model = Model(nFeatures, nLabels, sess, args.model, nGdIter=30)
        model.train(args, dataX, dataY)

Source File: test_clustering.py From nussl with MIT License

4 votes

def cluster_data():
    np.random.seed(0)

    # ============
    # Generate datasets. We choose the size big enough to see the scalability
    # of the algorithms, but not too big to avoid too long running times
    # ============
    n_samples = 1500
    noisy_circles = datasets.make_circles(n_samples=n_samples, factor=.5,
                                          noise=.05)
    noisy_moons = datasets.make_moons(n_samples=n_samples, noise=.05)
    blobs = datasets.make_blobs(n_samples=n_samples, random_state=8)

    # Anisotropicly distributed data
    random_state = 170
    X, y = datasets.make_blobs(n_samples=n_samples, random_state=random_state)
    transformation = [[0.6, -0.6], [-0.4, 0.8]]
    X_aniso = np.dot(X, transformation)
    aniso = (X_aniso, y)

    # blobs with varied variances
    varied = datasets.make_blobs(n_samples=n_samples,
                                 cluster_std=[1.0, 2.5, 0.5],
                                 random_state=random_state)

    default_base = {'quantile': .3,
                    'eps': .3,
                    'damping': .9,
                    'preference': -200,
                    'n_neighbors': 10,
                    'n_clusters': 3,
                    'min_samples': 20,
                    'xi': 0.05,
                    'min_cluster_size': 0.1}

    data = [
        ('noisy_circles', noisy_circles, {'damping': .77, 'preference': -240,
                                          'quantile': .2, 'n_clusters': 2,
                                          'min_samples': 20, 'xi': 0.25}),
        ('noisy_moons', noisy_moons, {'damping': .75, 'preference': -220, 'n_clusters': 2}),
        ('varied', varied, {'eps': .18, 'n_neighbors': 2,
                            'min_samples': 5, 'xi': 0.035, 'min_cluster_size': .2}),
        ('aniso', aniso, {'eps': .15, 'n_neighbors': 2,
                          'min_samples': 20, 'xi': 0.1, 'min_cluster_size': .2}),
        ('blobs', blobs, {}),
    ]
    yield data, default_base

Python sklearn.datasets.make_moons() Examples