Python sklearn.neighbors.KNeighborsClassifier() Examples
The following are 30
code examples of sklearn.neighbors.KNeighborsClassifier().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.neighbors
, or try the search function
.
Example #1
Source File: main.py From transferlearning with MIT License | 14 votes |
def classify_1nn(data_train, data_test): ''' Classification using 1NN Inputs: data_train, data_test: train and test csv file path Outputs: yprediction and accuracy ''' from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import accuracy_score from sklearn.preprocessing import StandardScaler data = {'src': np.loadtxt(data_train, delimiter=','), 'tar': np.loadtxt(data_test, delimiter=','), } Xs, Ys, Xt, Yt = data['src'][:, :-1], data['src'][:, - 1], data['tar'][:, :-1], data['tar'][:, -1] Xs = StandardScaler(with_mean=0, with_std=1).fit_transform(Xs) Xt = StandardScaler(with_mean=0, with_std=1).fit_transform(Xt) clf = KNeighborsClassifier(n_neighbors=1) clf.fit(Xs, Ys) ypred = clf.predict(Xt) acc = accuracy_score(y_true=Yt, y_pred=ypred) print('Acc: {:.4f}'.format(acc)) return ypred, acc
Example #2
Source File: test_run.py From nyaggle with MIT License | 7 votes |
def test_experiment_sklearn_multiclass(tmpdir_name): X, y = make_classification_df(n_samples=1024, n_num_features=10, n_cat_features=0, n_classes=5, random_state=0, id_column='user_id') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) params = { 'n_neighbors': 10 } result = run_experiment(params, X_train, y_train, X_test, tmpdir_name, algorithm_type=KNeighborsClassifier, with_auto_prep=False) assert len(np.unique(result.oof_prediction[:, 0])) > 5 # making sure prediction is not binarized assert len(np.unique(result.test_prediction[:, 0])) > 5 assert result.oof_prediction.shape == (len(y_train), 5) assert result.test_prediction.shape == (len(y_test), 5) _check_file_exists(tmpdir_name)
Example #3
Source File: testScoreWithAdapaSklearn.py From nyoka with Apache License 2.0 | 6 votes |
def test_31_knn_classifier(self): print("\ntest 31 (knn classifier without preprocessing) [binary-class]\n") X, X_test, y, features, target, test_file = self.data_utility.get_data_for_binary_classification() model = KNeighborsClassifier() pipeline_obj = Pipeline([ ("model", model) ]) pipeline_obj.fit(X,y) file_name = 'test31sklearn.pmml' skl_to_pmml(pipeline_obj, features, target, file_name) model_name = self.adapa_utility.upload_to_zserver(file_name) predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file) model_pred = pipeline_obj.predict(X_test) model_prob = pipeline_obj.predict_proba(X_test) self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True) self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)
Example #4
Source File: test_neighbors.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_kneighbors_classifier_sparse(n_samples=40, n_features=5, n_test_pts=10, n_neighbors=5, random_state=0): # Test k-NN classifier on sparse matrices # Like the above, but with various types of sparse matrices rng = np.random.RandomState(random_state) X = 2 * rng.rand(n_samples, n_features) - 1 X *= X > .2 y = ((X ** 2).sum(axis=1) < .5).astype(np.int) for sparsemat in SPARSE_TYPES: knn = neighbors.KNeighborsClassifier(n_neighbors=n_neighbors, algorithm='auto') knn.fit(sparsemat(X), y) epsilon = 1e-5 * (2 * rng.rand(1, n_features) - 1) for sparsev in SPARSE_TYPES + (np.asarray,): X_eps = sparsev(X[:n_test_pts] + epsilon) y_pred = knn.predict(X_eps) assert_array_equal(y_pred, y[:n_test_pts])
Example #5
Source File: test_neighbors.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_neighbors_iris(): # Sanity checks on the iris dataset # Puts three points of each label in the plane and performs a # nearest neighbor query on points near the decision boundary. for algorithm in ALGORITHMS: clf = neighbors.KNeighborsClassifier(n_neighbors=1, algorithm=algorithm) clf.fit(iris.data, iris.target) assert_array_equal(clf.predict(iris.data), iris.target) clf.set_params(n_neighbors=9, algorithm=algorithm) clf.fit(iris.data, iris.target) assert np.mean(clf.predict(iris.data) == iris.target) > 0.95 rgs = neighbors.KNeighborsRegressor(n_neighbors=5, algorithm=algorithm) rgs.fit(iris.data, iris.target) assert_greater(np.mean(rgs.predict(iris.data).round() == iris.target), 0.95)
Example #6
Source File: test_neighbors.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_neighbors_digits(): # Sanity check on the digits dataset # the 'brute' algorithm has been observed to fail if the input # dtype is uint8 due to overflow in distance calculations. X = digits.data.astype('uint8') Y = digits.target (n_samples, n_features) = X.shape train_test_boundary = int(n_samples * 0.8) train = np.arange(0, train_test_boundary) test = np.arange(train_test_boundary, n_samples) (X_train, Y_train, X_test, Y_test) = X[train], Y[train], X[test], Y[test] clf = neighbors.KNeighborsClassifier(n_neighbors=1, algorithm='brute') score_uint8 = clf.fit(X_train, Y_train).score(X_test, Y_test) score_float = clf.fit(X_train.astype(float, copy=False), Y_train).score( X_test.astype(float, copy=False), Y_test) assert_equal(score_uint8, score_float)
Example #7
Source File: classifier.py From stock-price-prediction with MIT License | 6 votes |
def buildModel(dataset, method, parameters): """ Build final model for predicting real testing data """ features = dataset.columns[0:-1] if method == 'RNN': clf = performRNNlass(dataset[features], dataset['UpDown']) return clf elif method == 'RF': clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1) elif method == 'KNN': clf = neighbors.KNeighborsClassifier() elif method == 'SVM': c = parameters[0] g = parameters[1] clf = SVC(C=c, gamma=g) elif method == 'ADA': clf = AdaBoostClassifier() return clf.fit(dataset[features], dataset['UpDown'])
Example #8
Source File: testScoreWithAdapaSklearn.py From nyoka with Apache License 2.0 | 6 votes |
def test_32_knn_classifier(self): print("\ntest 32 (knn classifier without preprocessing) [multi-class]\n") X, X_test, y, features, target, test_file = self.data_utility.get_data_for_multi_class_classification() model = KNeighborsClassifier() pipeline_obj = Pipeline([ ("model", model) ]) pipeline_obj.fit(X,y) file_name = 'test32sklearn.pmml' skl_to_pmml(pipeline_obj, features, target, file_name) model_name = self.adapa_utility.upload_to_zserver(file_name) predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file) model_pred = pipeline_obj.predict(X_test) model_prob = pipeline_obj.predict_proba(X_test) self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True) self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)
Example #9
Source File: test_neighbors.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_same_knn_parallel(algorithm): X, y = datasets.make_classification(n_samples=30, n_features=5, n_redundant=0, random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y) clf = neighbors.KNeighborsClassifier(n_neighbors=3, algorithm=algorithm) clf.fit(X_train, y_train) y = clf.predict(X_test) dist, ind = clf.kneighbors(X_test) graph = clf.kneighbors_graph(X_test, mode='distance').toarray() clf.set_params(n_jobs=3) clf.fit(X_train, y_train) y_parallel = clf.predict(X_test) dist_parallel, ind_parallel = clf.kneighbors(X_test) graph_parallel = \ clf.kneighbors_graph(X_test, mode='distance').toarray() assert_array_equal(y, y_parallel) assert_array_almost_equal(dist, dist_parallel) assert_array_equal(ind, ind_parallel) assert_array_almost_equal(graph, graph_parallel)
Example #10
Source File: sklearn_tune.py From ml-parameter-optimization with MIT License | 6 votes |
def tune_params(self): """ tune specified (and default) parameters """ self._start_time = time.time() self.default_params() # set default parameters self.score_init() # set initial score if self.dim_reduction is not None: knn = Pipeline([('dimred',self.dim_reduction_method()) ('knn',KNeighborsClassifier(**self._params))]) self._pipeline = True else: knn = KNeighborsClassifier(**self._params) self.apply_gridsearch(knn) self.print_progress(self._start_time) return self
Example #11
Source File: classifier.py From libfaceid with MIT License | 6 votes |
def __init__(self, classifier=FaceClassifierModels.DEFAULT): self._clf = None if classifier == FaceClassifierModels.LINEAR_SVM: self._clf = SVC(C=1.0, kernel="linear", probability=True) elif classifier == FaceClassifierModels.NAIVE_BAYES: self._clf = GaussianNB() elif classifier == FaceClassifierModels.RBF_SVM: self._clf = SVC(C=1, kernel='rbf', probability=True, gamma=2) elif classifier == FaceClassifierModels.NEAREST_NEIGHBORS: self._clf = KNeighborsClassifier(1) elif classifier == FaceClassifierModels.DECISION_TREE: self._clf = DecisionTreeClassifier(max_depth=5) elif classifier == FaceClassifierModels.RANDOM_FOREST: self._clf = RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1) elif classifier == FaceClassifierModels.NEURAL_NET: self._clf = MLPClassifier(alpha=1) elif classifier == FaceClassifierModels.ADABOOST: self._clf = AdaBoostClassifier() elif classifier == FaceClassifierModels.QDA: self._clf = QuadraticDiscriminantAnalysis() print("classifier={}".format(FaceClassifierModels(classifier)))
Example #12
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_classification(): # Check classification for various parameter settings. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) grid = ParameterGrid({"max_samples": [0.5, 1.0], "max_features": [1, 2, 4], "bootstrap": [True, False], "bootstrap_features": [True, False]}) for base_estimator in [None, DummyClassifier(), Perceptron(tol=1e-3), DecisionTreeClassifier(), KNeighborsClassifier(), SVC(gamma="scale")]: for params in grid: BaggingClassifier(base_estimator=base_estimator, random_state=rng, **params).fit(X_train, y_train).predict(X_test)
Example #13
Source File: knn_missing_data.py From Generative-ConvACs with MIT License | 6 votes |
def knn_masked_data(trX,trY,missing_data_dir, input_shape, k): raw_im_data = np.loadtxt(join(script_dir,missing_data_dir,'index.txt'),delimiter=' ',dtype=str) raw_mask_data = np.loadtxt(join(script_dir,missing_data_dir,'index_mask.txt'),delimiter=' ',dtype=str) # Using 'brute' method since we only want to do one query per classifier # so this will be quicker as it avoids overhead of creating a search tree knn_m = KNeighborsClassifier(algorithm='brute',n_neighbors=k) prob_Y_hat = np.zeros((raw_im_data.shape[0],int(np.max(trY)+1))) total_images = raw_im_data.shape[0] pbar = progressbar.ProgressBar(widgets=[progressbar.FormatLabel('\rProcessed %(value)d of %(max)d Images '), progressbar.Bar()], maxval=total_images, term_width=50).start() for i in range(total_images): mask_im=load_image(join(script_dir,missing_data_dir,raw_mask_data[i][0]), input_shape,1).reshape(np.prod(input_shape)) mask = np.logical_not(mask_im > eps) # since mask is 1 at missing locations v_im=load_image(join(script_dir,missing_data_dir,raw_im_data[i][0]), input_shape, 255).reshape(np.prod(input_shape)) rep_mask = np.tile(mask,(trX.shape[0],1)) # Corrupt whole training set according to the current mask corr_trX = np.multiply(trX, rep_mask) knn_m.fit(corr_trX, trY) prob_Y_hat[i,:] = knn_m.predict_proba(v_im.reshape(1,-1)) pbar.update(i) pbar.finish() return prob_Y_hat
Example #14
Source File: structural_tests.py From drifter_ml with MIT License | 5 votes |
def cls_supervised_clustering(self, data): k_measures = [] X = data[self.column_names] y = data[self.target_name] for k in range(2, 12): knn = neighbors.KNeighborsClassifier(n_neighbors=k) knn.fit(X, y) y_pred = knn.predict(X) k_measures.append((k, metrics.mean_squared_error(y, y_pred))) sorted_k_measures = sorted(k_measures, key=lambda t:t[1]) lowest_mse = sorted_k_measures[0] best_k = lowest_mse[0] return best_k
Example #15
Source File: predictor.py From quantified-self with MIT License | 5 votes |
def __init__(self, n_neighbors=8, slackbot=None): self.knn = KNeighborsClassifier(n_neighbors=n_neighbors, weights="distance") skill_data = SkillData() data_X, data_y = SkillDataLoader().make_data_set(skill_data.q) self.knn.fit(data_X, data_y) if slackbot is None: self.slackbot = SlackerAdapter() else: self.slackbot = slackbot
Example #16
Source File: sentiment_analysis_ml.py From Sentiment_Analysis_cnn_lstm_cnnlstm_textcnn_bilstm with Apache License 2.0 | 5 votes |
def nn_classifier(n_neighbors,train_vecs,y_train,test_vecs,y_test): clf = neighbors.KNeighborsClassifier(n_neighbors, weights='uniform') clf.fit(train_vecs,y_train) joblib.dump(clf,storedpaths+'model_nn.pkl') test_scores=clf.score(test_vecs,y_test) return test_scores # 训练 LogisticRegression 分类算法
Example #17
Source File: test_nonparametric.py From numpy-ml with GNU General Public License v3.0 | 5 votes |
def test_knn_clf(N=15): np.random.seed(12345) i = 0 while i < N: N = np.random.randint(2, 100) M = np.random.randint(2, 100) k = np.random.randint(1, N) n_classes = np.random.randint(2, 10) ls = np.min([np.random.randint(1, 10), N - 1]) weights = "uniform" X = np.random.rand(N, M) X_test = np.random.rand(N, M) y = np.random.randint(0, n_classes, size=N) knn = KNN(k=k, leaf_size=ls, metric=euclidean, classifier=True, weights=weights) knn.fit(X, y) preds = knn.predict(X_test) gold = KNeighborsClassifier( p=2, metric="minkowski", leaf_size=ls, n_neighbors=k, weights=weights, algorithm="ball_tree", ) gold.fit(X, y) gold_preds = gold.predict(X_test) for mine, theirs in zip(preds, gold_preds): np.testing.assert_almost_equal(mine, theirs) print("PASSED") i += 1
Example #18
Source File: _k_neighbors_classifier.py From coremltools with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _check_fitted(model): """Simple wrapper to check if the KNeighborsClassifier has been fitted.""" return _sklearn_util.check_fitted( model, lambda m: hasattr(m, "_fit_method") or hasattr(m, "_fit_X") )
Example #19
Source File: KNN.py From AI_Sudoku with Creative Commons Zero v1.0 Universal | 5 votes |
def __init__(self, k): self.mnist = datasets.fetch_openml('mnist_784', data_home='mnist_dataset/') self.data, self.target = self.mnist.data, self.mnist.target # Make an array of indices the size of MNIST to use for making the data sets. # This array is in random order, so we can use it to scramble up the MNIST data self.indx = np.random.choice(len(self.target), 70000, replace=False) # Initialising the classifier self.classifier = KNeighborsClassifier(n_neighbors=k) # method for building the datasets to test with
Example #20
Source File: scikitlearn.py From sia-cog with MIT License | 5 votes |
def getModels(): result = [] result.append("LinearRegression") result.append("BayesianRidge") result.append("ARDRegression") result.append("ElasticNet") result.append("HuberRegressor") result.append("Lasso") result.append("LassoLars") result.append("Rigid") result.append("SGDRegressor") result.append("SVR") result.append("MLPClassifier") result.append("KNeighborsClassifier") result.append("SVC") result.append("GaussianProcessClassifier") result.append("DecisionTreeClassifier") result.append("RandomForestClassifier") result.append("AdaBoostClassifier") result.append("GaussianNB") result.append("LogisticRegression") result.append("QuadraticDiscriminantAnalysis") return result
Example #21
Source File: _k_neighbors_classifier.py From coremltools with BSD 3-Clause "New" or "Revised" License | 5 votes |
def supports_output_scores(model): """KNeighborsClassifier models do not support output scores.""" return False
Example #22
Source File: audio_transfer_learning.py From sklearn-audio-transfer-learning with ISC License | 5 votes |
def define_classification_model(): """ Select and define the model you will use for the classifier. """ if config['model_type'] == 'linearSVM': # linearSVM can be faster than SVM return LinearSVC(C=1) elif config['model_type'] == 'SVM': # non-linearSVM, we can use the kernel trick return SVC(C=1, kernel='rbf', gamma='scale') elif config['model_type'] == 'kNN': # k-nearest neighbour return KNeighborsClassifier(n_neighbors=1, metric='cosine') elif config['model_type'] == 'perceptron': # otpimizes log-loss, also known as cross-entropy with sgd return SGDClassifier(max_iter=600, verbose=0.5, loss='log', learning_rate='optimal') elif config['model_type'] == 'MLP': # otpimizes log-loss, also known as cross-entropy with sgd return MLPClassifier(hidden_layer_sizes=(20,), max_iter=600, verbose=10, solver='sgd', learning_rate='constant', learning_rate_init=0.001)
Example #23
Source File: utils.py From website-fingerprinting with MIT License | 5 votes |
def train(streams, labels): """ This function trains the classifier with the data. """ # Shuffle the arrays. streams, labels = shuffle(streams, labels) stream_amount = len(streams) training_size = int(stream_amount * 0.9) # Get 70% of the streams for training purposes. training_x = streams[:training_size] training_y = labels[:training_size] # Get 30% of the streams for testing purposes testing_x = streams[training_size:] testing_y = labels[training_size:] print("Training size: {}".format(training_size)) print("Testing size: {}".format(stream_amount - training_size)) # Initialize the classifier. clf = KNeighborsClassifier() # Now lets train our KNN classifier. clf = clf.fit(training_x, training_y) # Save a snapshot of this classifier. joblib.dump(clf, "./classifier-nb.dmp", compress=9) # Get the prediction. predictions = clf.predict(testing_x) print("Accuracy: %s%%" % (accuracy_score(testing_y, predictions) * 100,))
Example #24
Source File: data_trusting.py From lime-experiments with BSD 2-Clause "Simplified" License | 5 votes |
def get_classifier(name, vectorizer): if name == 'logreg': return linear_model.LogisticRegression(fit_intercept=True) if name == 'random_forest': return ensemble.RandomForestClassifier(n_estimators=1000, random_state=1, max_depth=5, n_jobs=10) if name == 'svm': return svm.SVC(probability=True, kernel='rbf', C=10,gamma=0.001) if name == 'tree': return tree.DecisionTreeClassifier(random_state=1) if name == 'neighbors': return neighbors.KNeighborsClassifier() if name == 'embforest': return embedding_forest.EmbeddingForest(vectorizer)
Example #25
Source File: parzen_windows.py From lime-experiments with BSD 2-Clause "Simplified" License | 5 votes |
def get_classifier(name, vectorizer): if name == 'logreg': return linear_model.LogisticRegression(fit_intercept=True) if name == 'random_forest': return ensemble.RandomForestClassifier(n_estimators=1000, random_state=1, max_depth=5, n_jobs=10) if name == 'svm': return svm.SVC(probability=True, kernel='rbf', C=10,gamma=0.001) if name == 'tree': return tree.DecisionTreeClassifier(random_state=1) if name == 'neighbors': return neighbors.KNeighborsClassifier() if name == 'embforest': return embedding_forest.EmbeddingForest(vectorizer)
Example #26
Source File: test_neighbors.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_dtype_convert(): classifier = neighbors.KNeighborsClassifier(n_neighbors=1) CLASSES = 15 X = np.eye(CLASSES) y = [ch for ch in 'ABCDEFGHIJKLMNOPQRSTU'[:CLASSES]] result = classifier.fit(X, y).predict(X) assert_array_equal(result, y)
Example #27
Source File: test_neighbors.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_knn_forcing_backend(backend, algorithm): # Non-regression test which ensure the knn methods are properly working # even when forcing the global joblib backend. with parallel_backend(backend): X, y = datasets.make_classification(n_samples=30, n_features=5, n_redundant=0, random_state=0) X_train, X_test, y_train, y_test = train_test_split(X, y) clf = neighbors.KNeighborsClassifier(n_neighbors=3, algorithm=algorithm, n_jobs=3) clf.fit(X_train, y_train) clf.predict(X_test) clf.kneighbors(X_test) clf.kneighbors_graph(X_test, mode='distance').toarray()
Example #28
Source File: test_neighbors.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_predict_sparse_ball_kd_tree(): rng = np.random.RandomState(0) X = rng.rand(5, 5) y = rng.randint(0, 2, 5) nbrs1 = neighbors.KNeighborsClassifier(1, algorithm='kd_tree') nbrs2 = neighbors.KNeighborsRegressor(1, algorithm='ball_tree') for model in [nbrs1, nbrs2]: model.fit(X, y) assert_raises(ValueError, model.predict, csr_matrix(X))
Example #29
Source File: test_neighbors.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_metric_params_interface(): assert_warns(SyntaxWarning, neighbors.KNeighborsClassifier, metric_params={'p': 3})
Example #30
Source File: test_neighbors.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_kneighbors_classifier_predict_proba(): # Test KNeighborsClassifier.predict_proba() method X = np.array([[0, 2, 0], [0, 2, 1], [2, 0, 0], [2, 2, 0], [0, 0, 2], [0, 0, 1]]) y = np.array([4, 4, 5, 5, 1, 1]) cls = neighbors.KNeighborsClassifier(n_neighbors=3, p=1) # cityblock dist cls.fit(X, y) y_prob = cls.predict_proba(X) real_prob = np.array([[0, 2. / 3, 1. / 3], [1. / 3, 2. / 3, 0], [1. / 3, 0, 2. / 3], [0, 1. / 3, 2. / 3], [2. / 3, 1. / 3, 0], [2. / 3, 1. / 3, 0]]) assert_array_equal(real_prob, y_prob) # Check that it also works with non integer labels cls.fit(X, y.astype(str)) y_prob = cls.predict_proba(X) assert_array_equal(real_prob, y_prob) # Check that it works with weights='distance' cls = neighbors.KNeighborsClassifier( n_neighbors=2, p=1, weights='distance') cls.fit(X, y) y_prob = cls.predict_proba(np.array([[0, 2, 0], [2, 2, 2]])) real_prob = np.array([[0, 1, 0], [0, 0.4, 0.6]]) assert_array_almost_equal(real_prob, y_prob)