Python sklearn.datasets.load_digits() Examples
The following are 30
code examples of sklearn.datasets.load_digits().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.datasets
, or try the search function
.
Example #1
Source File: label_digits.py From libact with BSD 2-Clause "Simplified" License | 7 votes |
def split_train_test(n_classes): from sklearn.datasets import load_digits n_labeled = 5 digits = load_digits(n_class=n_classes) # consider binary case X = digits.data y = digits.target print(np.shape(X)) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) while len(np.unique(y_train[:n_labeled])) < n_classes: X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33) trn_ds = Dataset(X_train, np.concatenate( [y_train[:n_labeled], [None] * (len(y_train) - n_labeled)])) tst_ds = Dataset(X_test, y_test) return trn_ds, tst_ds, digits
Example #2
Source File: train_model.py From production-tools with BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_mnist_data(): """Loads the MNIST data set into memory. Returns ------- X : array-like, shape=[n_samples, n_features] Training data for the MNIST data set. y : array-like, shape=[n_samples,] Labels for the MNIST data set. """ digits = load_digits() X, y = digits.data, digits.target y = LabelBinarizer().fit_transform(y) return X, y
Example #3
Source File: test_pca.py From mars with Apache License 2.0 | 6 votes |
def test_pca_score_with_different_solvers(self): digits = datasets.load_digits() X_digits = mt.tensor(digits.data) pca_dict = {svd_solver: PCA(n_components=30, svd_solver=svd_solver, random_state=0) for svd_solver in self.solver_list} for pca in pca_dict.values(): pca.fit(X_digits) # Sanity check for the noise_variance_. For more details see # https://github.com/scikit-learn/scikit-learn/issues/7568 # https://github.com/scikit-learn/scikit-learn/issues/8541 # https://github.com/scikit-learn/scikit-learn/issues/8544 assert mt.all((pca.explained_variance_ - pca.noise_variance_) >= 0).to_numpy() # Compare scores with different svd_solvers score_dict = {svd_solver: pca.score(X_digits).to_numpy() for svd_solver, pca in pca_dict.items()} assert_almost_equal(score_dict['full'], score_dict['randomized'], decimal=3)
Example #4
Source File: random_forest.py From ML-From-Scratch with MIT License | 6 votes |
def main(): data = datasets.load_digits() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) clf = RandomForest(n_estimators=100) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="Random Forest", accuracy=accuracy, legend_labels=data.target_names)
Example #5
Source File: multilayer_perceptron.py From ML-From-Scratch with MIT License | 6 votes |
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target # Convert the nominal y values to binary y = to_categorical(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) # MLP clf = MultilayerPerceptron(n_hidden=16, n_iterations=1000, learning_rate=0.01) clf.fit(X_train, y_train) y_pred = np.argmax(clf.predict(X_test), axis=1) y_test = np.argmax(y_test, axis=1) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Multilayer Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
Example #6
Source File: naive_bayes.py From ML-From-Scratch with MIT License | 6 votes |
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = NaiveBayes() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Naive Bayes", accuracy=accuracy, legend_labels=data.target_names)
Example #7
Source File: test_model_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_train_test_split(self): df = pdml.ModelFrame(datasets.load_digits()) self.assertIsInstance(df, pdml.ModelFrame) train_df, test_df = df.model_selection.train_test_split() tm.assert_index_equal(df.columns, train_df.columns) tm.assert_index_equal(df.columns, test_df.columns) self.assertEqual(len(df), len(train_df) + len(test_df)) self.assertEqual(df.shape[1], train_df.shape[1]) self.assertEqual(df.shape[1], test_df.shape[1]) tm.assert_index_equal(df.columns, train_df.columns) tm.assert_index_equal(df.columns, test_df.columns) df = pdml.ModelFrame(datasets.load_digits()) df.target_name = 'xxx' train_df, test_df = df.model_selection.train_test_split() tm.assert_index_equal(df.columns, train_df.columns) tm.assert_index_equal(df.columns, test_df.columns) self.assertEqual(train_df.target_name, 'xxx') self.assertEqual(test_df.target_name, 'xxx')
Example #8
Source File: test_model_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_validation_curve(self): digits = datasets.load_digits() df = pdml.ModelFrame(digits) param_range = np.logspace(-2, -1, 2) svc = df.svm.SVC(random_state=self.random_state) result = df.model_selection.validation_curve(svc, 'gamma', param_range) expected = ms.validation_curve(svm.SVC(random_state=self.random_state), digits.data, digits.target, 'gamma', param_range) self.assertEqual(len(result), 2) self.assert_numpy_array_almost_equal(result[0], expected[0]) self.assert_numpy_array_almost_equal(result[1], expected[1])
Example #9
Source File: test_metrics.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def setup_method(self): import sklearn.svm as svm digits = datasets.load_digits() self.data = digits.data self.target = digits.target self.df = pdml.ModelFrame(digits) estimator1 = self.df.svm.LinearSVC(C=1.0, random_state=self.random_state) self.df.fit(estimator1) estimator2 = svm.LinearSVC(C=1.0, random_state=self.random_state) estimator2.fit(self.data, self.target) self.pred = estimator2.predict(self.data) self.decision = estimator2.decision_function(self.data) # argument for classification reports self.labels = np.array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])
Example #10
Source File: test_sklearn_feature_union.py From sklearn-onnx with MIT License | 6 votes |
def test_feature_union_transformer_weights_2(self): data = load_digits() X, y = data.data, data.target X = X.astype(np.float32) X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, random_state=42) model = FeatureUnion([('pca', PCA()), ('svd', TruncatedSVD())], transformer_weights={'pca1': 10, 'svd2': 3} ).fit(X_train) model_onnx = convert_sklearn( model, 'feature union', [('input', FloatTensorType([None, X_test.shape[1]]))]) self.assertTrue(model_onnx is not None) dump_data_and_model( X_test, model, model_onnx, basename="SklearnFeatureUnionTransformerWeights2-Dec4", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #11
Source File: test_sklearn_feature_union.py From sklearn-onnx with MIT License | 6 votes |
def test_feature_union_transformer_weights_1(self): data = load_digits() X, y = data.data, data.target X = X.astype(np.int64) X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, random_state=42) model = FeatureUnion([('pca', PCA()), ('svd', TruncatedSVD())], transformer_weights={'pca': 10, 'svd': 3} ).fit(X_train) model_onnx = convert_sklearn( model, 'feature union', [('input', Int64TensorType([None, X_test.shape[1]]))]) self.assertTrue(model_onnx is not None) dump_data_and_model( X_test, model, model_onnx, basename="SklearnFeatureUnionTransformerWeights1-Dec4", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #12
Source File: test_sklearn_calibrated_classifier_cv_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_calibrated_classifier_cv_int(self): data = load_digits() X, y = data.data, data.target clf = MultinomialNB().fit(X, y) model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y) model_onnx = convert_sklearn( model, "scikit-learn CalibratedClassifierCVMNB", [("input", Int64TensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.int64), model, model_onnx, basename="SklearnCalibratedClassifierCVInt-Dec4", allow_failure="StrictVersion(onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #13
Source File: test_sklearn_k_means_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_batchkmeans_clustering_int(self): data = load_digits() X = data.data model = MiniBatchKMeans(n_clusters=4) model.fit(X) model_onnx = convert_sklearn(model, "kmeans", [("input", Int64TensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.int64)[40:60], model, model_onnx, basename="SklearnBatchKMeansInt-Dec4", allow_failure="StrictVersion(onnx.__version__)" " < StrictVersion('1.2') or " "StrictVersion(onnxruntime.__version__) " "<= StrictVersion('0.2.1')", )
Example #14
Source File: test_sklearn_k_means_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_kmeans_clustering_int(self): data = load_digits() X = data.data model = KMeans(n_clusters=4) model.fit(X) model_onnx = convert_sklearn(model, "kmeans", [("input", Int64TensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.int64)[40:60], model, model_onnx, basename="SklearnKMeansInt-Dec4", # Operator gemm is not implemented in onnxruntime allow_failure="StrictVersion(onnx.__version__)" " < StrictVersion('1.2') or " "StrictVersion(onnxruntime.__version__) " "<= StrictVersion('0.2.1')", )
Example #15
Source File: test_topology_prune.py From sklearn-onnx with MIT License | 6 votes |
def test_dummy_identity(self): digits = datasets.load_digits(n_class=6) Xd = digits.data[:20] yd = digits.target[:20] n_samples, n_features = Xd.shape idtr = make_pipeline(IdentityTransformer(), identity()) idtr.fit(Xd, yd) update_registered_converter(IdentityTransformer, "IdentityTransformer", dummy_shape_calculator, dummy_converter) update_registered_converter(identity, "identity", dummy_shape_calculator, dummy_converter) model_onnx = convert_sklearn( idtr, "idtr", [("input", FloatTensorType([None, Xd.shape[1]]))], target_opset=TARGET_OPSET) idnode = [node for node in model_onnx.graph.node if node.op_type == "Identity"] assert len(idnode) == 2
Example #16
Source File: test_sklearn_pca_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_pca_default_int_randomised(self): data = load_digits() X_train, X_test, *_ = train_test_split( data.data, data.target, test_size=0.2, random_state=42) model = PCA(random_state=42, svd_solver='randomized', iterated_power=3).fit(X_train) model_onnx = convert_sklearn( model, initial_types=[("input", Int64TensorType([None, X_test.shape[1]]))], ) self.assertTrue(model_onnx is not None) dump_data_and_model( X_test.astype(np.int64), model, model_onnx, basename="SklearnPCADefaultIntRandomised", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #17
Source File: test_rpforest.py From rpforest with Apache License 2.0 | 6 votes |
def _get_mnist_data(seed=None): digits = load_digits()["images"] if seed is not None: rnd = np.random.RandomState(seed=seed) else: rnd = np.random.RandomState() no_img, rows, cols = digits.shape X = digits.reshape((no_img, rows * cols)) X = np.ascontiguousarray(X) rnd.shuffle(X) X_test = X[:100] X_train = X[100:] return X_train, X_test
Example #18
Source File: datasets.py From pyDML with GNU General Public License v3.0 | 6 votes |
def digits_reduced(): data=load_digits() XX = data['data'] y = data['target'] nn,dd = XX.shape XX = XX.reshape([nn,8,8]) X = np.empty([nn,3]) for i in xrange(nn): X[i,0] = simetria_hor(XX[i,:,:]) X[i,1] = simetria_ver(XX[i,:,:]) X[i,2] = np.mean(XX[i,:]) return X,y ### ARFF dataframes ###
Example #19
Source File: datasets.py From pyDML with GNU General Public License v3.0 | 6 votes |
def digits_reduced(): data=load_digits() XX = data['data'] y = data['target'] nn,dd = XX.shape XX = XX.reshape([nn,8,8]) X = np.empty([nn,3]) for i in xrange(nn): X[i,0] = simetria_hor(XX[i,:,:]) X[i,1] = simetria_ver(XX[i,:,:]) X[i,2] = np.mean(XX[i,:]) return X,y ### ARFF dataframes ###
Example #20
Source File: test_model_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_cross_val_score(self): import sklearn.svm as svm digits = datasets.load_digits() df = pdml.ModelFrame(digits) clf = svm.SVC(kernel=str('linear'), C=1) result = df.model_selection.cross_val_score(clf, cv=5) expected = ms.cross_val_score(clf, X=digits.data, y=digits.target, cv=5) self.assert_numpy_array_almost_equal(result, expected)
Example #21
Source File: datasets.py From pyDML with GNU General Public License v3.0 | 5 votes |
def digits(numbers=None): data=load_digits() # DIGITS X=data['data'] y=data['target'] if numbers is None: numbers=[0,1,2,3,4,5,6,7,8,9] selected = np.where(np.isin(y,numbers))[0] return X[selected,:], y[selected] return X,y
Example #22
Source File: nn.py From L2L with GNU General Public License v3.0 | 5 votes |
def main(): from sklearn.datasets import load_digits, fetch_mldata SMALL_MNIST = False if SMALL_MNIST: mnist_digits = load_digits() n_input = np.prod(mnist_digits.images.shape[1:]) n_images = len(mnist_digits.images) # 1797 data_images = mnist_digits.images.reshape(n_images, -1) / 16. # -> 1797 x 64 data_targets = mnist_digits.target # im_size_x, im_size_y = 8, 8 else: mnist_digits = fetch_mldata('MNIST original') n_input = np.prod(mnist_digits.data.shape[1:]) data_images = mnist_digits.data / 255. # -> 70000 x 284 data_targets = mnist_digits.target # im_size_x, im_size_y = 28, 28 n_hidden, n_output = 5, 10 nn = NeuralNetworkClassifier(n_input, n_hidden, n_output) weight_shapes = nn.get_weights_shapes() weights = [] for weight_shape in weight_shapes: weights.append(np.random.randn(*weight_shape)) nn.set_weights(*weights) score = nn.score(data_images, data_targets) print("Score is: ", score)
Example #23
Source File: optimizee.py From L2L with GNU General Public License v3.0 | 5 votes |
def __init__(self, traj, parameters): super().__init__(traj) if parameters.use_small_mnist: # 8 x 8 images mnist_digits = load_digits() n_input = np.prod(mnist_digits.images.shape[1:]) n_images = len(mnist_digits.images) # 1797 data_images = mnist_digits.images.reshape(n_images, -1) / 16. # -> 1797 x 64 data_targets = mnist_digits.target else: # 28 x 28 images mnist_digits = fetch_mldata('MNIST original') n_input = np.prod(mnist_digits.data.shape[1:]) data_images = mnist_digits.data / 255. # -> 70000 x 284 n_images = len(data_images) data_targets = mnist_digits.target self.n_images = n_images self.data_images, self.data_targets = data_images, data_targets seed = parameters.seed n_hidden = parameters.n_hidden seed = np.uint32(seed) self.random_state = np.random.RandomState(seed=seed) n_output = 10 # This is always true for mnist self.nn = NeuralNetworkClassifier(n_input, n_hidden, n_output) self.random_state = np.random.RandomState(seed=seed) # create_individual can be called because __init__ is complete except for traj initializtion indiv_dict = self.create_individual() for key, val in indiv_dict.items(): traj.individual.f_add_parameter(key, val) traj.individual.f_add_parameter('seed', seed)
Example #24
Source File: adaboost.py From ML-From-Scratch with MIT License | 5 votes |
def main(): data = datasets.load_digits() X = data.data y = data.target digit1 = 1 digit2 = 8 idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0]) y = data.target[idx] # Change labels to {-1, 1} y[y == digit1] = -1 y[y == digit2] = 1 X = data.data[idx] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) # Adaboost classification with 5 weak classifiers clf = Adaboost(n_clf=5) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimensions to 2d using pca and plot the results Plot().plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy)
Example #25
Source File: principal_component_analysis.py From ML-From-Scratch with MIT License | 5 votes |
def main(): # Demo of how to reduce the dimensionality of the data to two dimension # and plot the results. # Load the dataset data = datasets.load_digits() X = data.data y = data.target # Project the data onto the 2 primary principal components X_trans = PCA().transform(X, 2) x1 = X_trans[:, 0] x2 = X_trans[:, 1] cmap = plt.get_cmap('viridis') colors = [cmap(i) for i in np.linspace(0, 1, len(np.unique(y)))] class_distr = [] # Plot the different class distributions for i, l in enumerate(np.unique(y)): _x1 = x1[y == l] _x2 = x2[y == l] _y = y[y == l] class_distr.append(plt.scatter(_x1, _x2, color=colors[i])) # Add a legend plt.legend(class_distr, y, loc=1) # Axis labels plt.suptitle("PCA Dimensionality Reduction") plt.title("Digit Dataset") plt.xlabel('Principal Component 1') plt.ylabel('Principal Component 2') plt.show()
Example #26
Source File: perceptron.py From ML-From-Scratch with MIT License | 5 votes |
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target # One-hot encoding of nominal y-values y = to_categorical(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) # Perceptron clf = Perceptron(n_iterations=5000, learning_rate=0.001, loss=CrossEntropy, activation_function=Sigmoid) clf.fit(X_train, y_train) y_pred = np.argmax(clf.predict(X_test), axis=1) y_test = np.argmax(y_test, axis=1) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
Example #27
Source File: test_utils.py From pyDML with GNU General Public License v3.0 | 5 votes |
def digits_data(): data = load_digits() # DIGITS X = data['data'] y = data['target'] return X, y
Example #28
Source File: datasets.py From pyDML with GNU General Public License v3.0 | 5 votes |
def digits(numbers=None): data=load_digits() # DIGITS X=data['data'] y=data['target'] if numbers is None: numbers=[0,1,2,3,4,5,6,7,8,9] selected = np.where(np.isin(y,numbers))[0] return X[selected,:], y[selected] return X,y
Example #29
Source File: test_sparse.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_unsorted_indices(): # test that the result with sorted and unsorted indices in csr is the same # we use a subset of digits as iris, blobs or make_classification didn't # show the problem digits = load_digits() X, y = digits.data[:50], digits.target[:50] X_test = sparse.csr_matrix(digits.data[50:100]) X_sparse = sparse.csr_matrix(X) coef_dense = svm.SVC(kernel='linear', probability=True, random_state=0).fit(X, y).coef_ sparse_svc = svm.SVC(kernel='linear', probability=True, random_state=0).fit(X_sparse, y) coef_sorted = sparse_svc.coef_ # make sure dense and sparse SVM give the same result assert_array_almost_equal(coef_dense, coef_sorted.toarray()) X_sparse_unsorted = X_sparse[np.arange(X.shape[0])] X_test_unsorted = X_test[np.arange(X_test.shape[0])] # make sure we scramble the indices assert_false(X_sparse_unsorted.has_sorted_indices) assert_false(X_test_unsorted.has_sorted_indices) unsorted_svc = svm.SVC(kernel='linear', probability=True, random_state=0).fit(X_sparse_unsorted, y) coef_unsorted = unsorted_svc.coef_ # make sure unsorted indices give same result assert_array_almost_equal(coef_unsorted.toarray(), coef_sorted.toarray()) assert_array_almost_equal(sparse_svc.predict_proba(X_test_unsorted), sparse_svc.predict_proba(X_test))
Example #30
Source File: test_pnn.py From neupy with MIT License | 5 votes |
def test_digit_prediction(self): dataset = datasets.load_digits() x_train, x_test, y_train, y_test = train_test_split( dataset.data, dataset.target, test_size=0.3 ) nw = algorithms.PNN(verbose=False, std=10) nw.train(x_train, y_train) result = nw.predict(x_test) accuracy = metrics.accuracy_score(y_test, result) self.assertAlmostEqual(accuracy, 0.9889, places=4)