Python sklearn.neighbors.KNeighborsClassifier() Examples

The following are 30 code examples of sklearn.neighbors.KNeighborsClassifier(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.neighbors , or try the search function .
Example #1
Source File: main.py    From transferlearning with MIT License 14 votes vote down vote up
def classify_1nn(data_train, data_test):
    '''
    Classification using 1NN
    Inputs: data_train, data_test: train and test csv file path
    Outputs: yprediction and accuracy
    '''
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.metrics import accuracy_score
    from sklearn.preprocessing import StandardScaler
    data = {'src': np.loadtxt(data_train, delimiter=','),
            'tar': np.loadtxt(data_test, delimiter=','),
            }
    Xs, Ys, Xt, Yt = data['src'][:, :-1], data['src'][:, -
                                                      1], data['tar'][:, :-1], data['tar'][:, -1]
    Xs = StandardScaler(with_mean=0, with_std=1).fit_transform(Xs)
    Xt = StandardScaler(with_mean=0, with_std=1).fit_transform(Xt)
    clf = KNeighborsClassifier(n_neighbors=1)
    clf.fit(Xs, Ys)
    ypred = clf.predict(Xt)
    acc = accuracy_score(y_true=Yt, y_pred=ypred)
    print('Acc: {:.4f}'.format(acc))
    return ypred, acc 
Example #2
Source File: test_run.py    From nyaggle with MIT License 7 votes vote down vote up
def test_experiment_sklearn_multiclass(tmpdir_name):
    X, y = make_classification_df(n_samples=1024, n_num_features=10, n_cat_features=0,
                                  n_classes=5, random_state=0, id_column='user_id')

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

    params = {
        'n_neighbors': 10
    }

    result = run_experiment(params, X_train, y_train, X_test, tmpdir_name, algorithm_type=KNeighborsClassifier,
                            with_auto_prep=False)

    assert len(np.unique(result.oof_prediction[:, 0])) > 5  # making sure prediction is not binarized
    assert len(np.unique(result.test_prediction[:, 0])) > 5
    assert result.oof_prediction.shape == (len(y_train), 5)
    assert result.test_prediction.shape == (len(y_test), 5)

    _check_file_exists(tmpdir_name) 
Example #3
Source File: testScoreWithAdapaSklearn.py    From nyoka with Apache License 2.0 6 votes vote down vote up
def test_31_knn_classifier(self):
        print("\ntest 31 (knn classifier without preprocessing) [binary-class]\n")
        X, X_test, y, features, target, test_file = self.data_utility.get_data_for_binary_classification()

        model = KNeighborsClassifier()
        pipeline_obj = Pipeline([
            ("model", model)
        ])
        pipeline_obj.fit(X,y)
        file_name = 'test31sklearn.pmml'
        
        skl_to_pmml(pipeline_obj, features, target, file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file)
        model_pred = pipeline_obj.predict(X_test)
        model_prob = pipeline_obj.predict_proba(X_test)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True) 
Example #4
Source File: test_neighbors.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_kneighbors_classifier_sparse(n_samples=40,
                                      n_features=5,
                                      n_test_pts=10,
                                      n_neighbors=5,
                                      random_state=0):
    # Test k-NN classifier on sparse matrices
    # Like the above, but with various types of sparse matrices
    rng = np.random.RandomState(random_state)
    X = 2 * rng.rand(n_samples, n_features) - 1
    X *= X > .2
    y = ((X ** 2).sum(axis=1) < .5).astype(np.int)

    for sparsemat in SPARSE_TYPES:
        knn = neighbors.KNeighborsClassifier(n_neighbors=n_neighbors,
                                             algorithm='auto')
        knn.fit(sparsemat(X), y)
        epsilon = 1e-5 * (2 * rng.rand(1, n_features) - 1)
        for sparsev in SPARSE_TYPES + (np.asarray,):
            X_eps = sparsev(X[:n_test_pts] + epsilon)
            y_pred = knn.predict(X_eps)
            assert_array_equal(y_pred, y[:n_test_pts]) 
Example #5
Source File: test_neighbors.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_neighbors_iris():
    # Sanity checks on the iris dataset
    # Puts three points of each label in the plane and performs a
    # nearest neighbor query on points near the decision boundary.

    for algorithm in ALGORITHMS:
        clf = neighbors.KNeighborsClassifier(n_neighbors=1,
                                             algorithm=algorithm)
        clf.fit(iris.data, iris.target)
        assert_array_equal(clf.predict(iris.data), iris.target)

        clf.set_params(n_neighbors=9, algorithm=algorithm)
        clf.fit(iris.data, iris.target)
        assert np.mean(clf.predict(iris.data) == iris.target) > 0.95

        rgs = neighbors.KNeighborsRegressor(n_neighbors=5, algorithm=algorithm)
        rgs.fit(iris.data, iris.target)
        assert_greater(np.mean(rgs.predict(iris.data).round() == iris.target),
                       0.95) 
Example #6
Source File: test_neighbors.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_neighbors_digits():
    # Sanity check on the digits dataset
    # the 'brute' algorithm has been observed to fail if the input
    # dtype is uint8 due to overflow in distance calculations.

    X = digits.data.astype('uint8')
    Y = digits.target
    (n_samples, n_features) = X.shape
    train_test_boundary = int(n_samples * 0.8)
    train = np.arange(0, train_test_boundary)
    test = np.arange(train_test_boundary, n_samples)
    (X_train, Y_train, X_test, Y_test) = X[train], Y[train], X[test], Y[test]

    clf = neighbors.KNeighborsClassifier(n_neighbors=1, algorithm='brute')
    score_uint8 = clf.fit(X_train, Y_train).score(X_test, Y_test)
    score_float = clf.fit(X_train.astype(float, copy=False), Y_train).score(
        X_test.astype(float, copy=False), Y_test)
    assert_equal(score_uint8, score_float) 
Example #7
Source File: classifier.py    From stock-price-prediction with MIT License 6 votes vote down vote up
def buildModel(dataset, method, parameters):
    """
    Build final model for predicting real testing data
    """
    features = dataset.columns[0:-1]

    if method == 'RNN':
        clf = performRNNlass(dataset[features], dataset['UpDown'])
        return clf

    elif method == 'RF':
        clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1)

    elif method == 'KNN':
        clf = neighbors.KNeighborsClassifier()

    elif method == 'SVM':
        c = parameters[0]
        g =  parameters[1]
        clf = SVC(C=c, gamma=g)

    elif method == 'ADA':
        clf = AdaBoostClassifier()

    return clf.fit(dataset[features], dataset['UpDown']) 
Example #8
Source File: testScoreWithAdapaSklearn.py    From nyoka with Apache License 2.0 6 votes vote down vote up
def test_32_knn_classifier(self):
        print("\ntest 32 (knn classifier without preprocessing) [multi-class]\n")
        X, X_test, y, features, target, test_file = self.data_utility.get_data_for_multi_class_classification()

        model = KNeighborsClassifier()
        pipeline_obj = Pipeline([
            ("model", model)
        ])
        pipeline_obj.fit(X,y)
        file_name = 'test32sklearn.pmml'
        
        skl_to_pmml(pipeline_obj, features, target, file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file)
        model_pred = pipeline_obj.predict(X_test)
        model_prob = pipeline_obj.predict_proba(X_test)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True) 
Example #9
Source File: test_neighbors.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_same_knn_parallel(algorithm):
    X, y = datasets.make_classification(n_samples=30, n_features=5,
                                        n_redundant=0, random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    clf = neighbors.KNeighborsClassifier(n_neighbors=3,
                                         algorithm=algorithm)
    clf.fit(X_train, y_train)
    y = clf.predict(X_test)
    dist, ind = clf.kneighbors(X_test)
    graph = clf.kneighbors_graph(X_test, mode='distance').toarray()

    clf.set_params(n_jobs=3)
    clf.fit(X_train, y_train)
    y_parallel = clf.predict(X_test)
    dist_parallel, ind_parallel = clf.kneighbors(X_test)
    graph_parallel = \
        clf.kneighbors_graph(X_test, mode='distance').toarray()

    assert_array_equal(y, y_parallel)
    assert_array_almost_equal(dist, dist_parallel)
    assert_array_equal(ind, ind_parallel)
    assert_array_almost_equal(graph, graph_parallel) 
Example #10
Source File: sklearn_tune.py    From ml-parameter-optimization with MIT License 6 votes vote down vote up
def tune_params(self):
        """
        tune specified (and default) parameters
        """
        self._start_time = time.time()
        self.default_params() # set default parameters
        self.score_init() # set initial score
        if self.dim_reduction is not None:
            knn = Pipeline([('dimred',self.dim_reduction_method())
                            ('knn',KNeighborsClassifier(**self._params))])
            self._pipeline = True
        else:
            knn = KNeighborsClassifier(**self._params)
        self.apply_gridsearch(knn)
        self.print_progress(self._start_time)
        return self 
Example #11
Source File: classifier.py    From libfaceid with MIT License 6 votes vote down vote up
def __init__(self, classifier=FaceClassifierModels.DEFAULT):
        self._clf = None
        if classifier == FaceClassifierModels.LINEAR_SVM:
            self._clf = SVC(C=1.0, kernel="linear", probability=True)
        elif classifier == FaceClassifierModels.NAIVE_BAYES:
            self._clf = GaussianNB()
        elif classifier == FaceClassifierModels.RBF_SVM:
            self._clf = SVC(C=1, kernel='rbf', probability=True, gamma=2)
        elif classifier == FaceClassifierModels.NEAREST_NEIGHBORS:
            self._clf = KNeighborsClassifier(1)
        elif classifier == FaceClassifierModels.DECISION_TREE:
            self._clf = DecisionTreeClassifier(max_depth=5)
        elif classifier == FaceClassifierModels.RANDOM_FOREST:
            self._clf = RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1)
        elif classifier == FaceClassifierModels.NEURAL_NET:
            self._clf = MLPClassifier(alpha=1)
        elif classifier == FaceClassifierModels.ADABOOST:
            self._clf = AdaBoostClassifier()
        elif classifier == FaceClassifierModels.QDA:
            self._clf = QuadraticDiscriminantAnalysis()
        print("classifier={}".format(FaceClassifierModels(classifier))) 
Example #12
Source File: test_bagging.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_classification():
    # Check classification for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [1, 2, 4],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyClassifier(),
                           Perceptron(tol=1e-3),
                           DecisionTreeClassifier(),
                           KNeighborsClassifier(),
                           SVC(gamma="scale")]:
        for params in grid:
            BaggingClassifier(base_estimator=base_estimator,
                              random_state=rng,
                              **params).fit(X_train, y_train).predict(X_test) 
Example #13
Source File: knn_missing_data.py    From Generative-ConvACs with MIT License 6 votes vote down vote up
def knn_masked_data(trX,trY,missing_data_dir, input_shape, k):
    
    raw_im_data = np.loadtxt(join(script_dir,missing_data_dir,'index.txt'),delimiter=' ',dtype=str)
    raw_mask_data = np.loadtxt(join(script_dir,missing_data_dir,'index_mask.txt'),delimiter=' ',dtype=str)
    # Using 'brute' method since we only want to do one query per classifier
    # so this will be quicker as it avoids overhead of creating a search tree
    knn_m = KNeighborsClassifier(algorithm='brute',n_neighbors=k)
    prob_Y_hat = np.zeros((raw_im_data.shape[0],int(np.max(trY)+1)))
    total_images = raw_im_data.shape[0]
    pbar = progressbar.ProgressBar(widgets=[progressbar.FormatLabel('\rProcessed %(value)d of %(max)d Images '), progressbar.Bar()], maxval=total_images, term_width=50).start()
    for i in range(total_images):
        mask_im=load_image(join(script_dir,missing_data_dir,raw_mask_data[i][0]), input_shape,1).reshape(np.prod(input_shape))
        mask = np.logical_not(mask_im > eps) # since mask is 1 at missing locations
        v_im=load_image(join(script_dir,missing_data_dir,raw_im_data[i][0]), input_shape, 255).reshape(np.prod(input_shape))
        rep_mask = np.tile(mask,(trX.shape[0],1))
        # Corrupt whole training set according to the current mask
        corr_trX = np.multiply(trX, rep_mask)        
        knn_m.fit(corr_trX, trY)
        prob_Y_hat[i,:] = knn_m.predict_proba(v_im.reshape(1,-1))
        pbar.update(i)
    pbar.finish()
    return prob_Y_hat 
Example #14
Source File: structural_tests.py    From drifter_ml with MIT License 5 votes vote down vote up
def cls_supervised_clustering(self, data):
        k_measures = []
        X = data[self.column_names]
        y = data[self.target_name]
        for k in range(2, 12):
            knn = neighbors.KNeighborsClassifier(n_neighbors=k)
            knn.fit(X, y)
            y_pred = knn.predict(X)
            k_measures.append((k, metrics.mean_squared_error(y, y_pred)))
        sorted_k_measures = sorted(k_measures, key=lambda t:t[1])
        lowest_mse = sorted_k_measures[0]
        best_k = lowest_mse[0]
        return best_k 
Example #15
Source File: predictor.py    From quantified-self with MIT License 5 votes vote down vote up
def __init__(self, n_neighbors=8, slackbot=None):
        self.knn = KNeighborsClassifier(n_neighbors=n_neighbors, weights="distance")

        skill_data = SkillData()
        data_X, data_y = SkillDataLoader().make_data_set(skill_data.q)
        self.knn.fit(data_X, data_y)

        if slackbot is None:
            self.slackbot = SlackerAdapter()
        else:
            self.slackbot = slackbot 
Example #16
Source File: sentiment_analysis_ml.py    From Sentiment_Analysis_cnn_lstm_cnnlstm_textcnn_bilstm with Apache License 2.0 5 votes vote down vote up
def nn_classifier(n_neighbors,train_vecs,y_train,test_vecs,y_test):
    clf = neighbors.KNeighborsClassifier(n_neighbors, weights='uniform')
    clf.fit(train_vecs,y_train)
    joblib.dump(clf,storedpaths+'model_nn.pkl')
    test_scores=clf.score(test_vecs,y_test)
    return test_scores
    
# 训练 LogisticRegression 分类算法 
Example #17
Source File: test_nonparametric.py    From numpy-ml with GNU General Public License v3.0 5 votes vote down vote up
def test_knn_clf(N=15):
    np.random.seed(12345)

    i = 0
    while i < N:
        N = np.random.randint(2, 100)
        M = np.random.randint(2, 100)
        k = np.random.randint(1, N)
        n_classes = np.random.randint(2, 10)
        ls = np.min([np.random.randint(1, 10), N - 1])
        weights = "uniform"

        X = np.random.rand(N, M)
        X_test = np.random.rand(N, M)
        y = np.random.randint(0, n_classes, size=N)

        knn = KNN(k=k, leaf_size=ls, metric=euclidean, classifier=True, weights=weights)
        knn.fit(X, y)
        preds = knn.predict(X_test)

        gold = KNeighborsClassifier(
            p=2,
            metric="minkowski",
            leaf_size=ls,
            n_neighbors=k,
            weights=weights,
            algorithm="ball_tree",
        )
        gold.fit(X, y)
        gold_preds = gold.predict(X_test)

        for mine, theirs in zip(preds, gold_preds):
            np.testing.assert_almost_equal(mine, theirs)
        print("PASSED")
        i += 1 
Example #18
Source File: _k_neighbors_classifier.py    From coremltools with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _check_fitted(model):
    """Simple wrapper to check if the KNeighborsClassifier has been fitted."""
    return _sklearn_util.check_fitted(
        model, lambda m: hasattr(m, "_fit_method") or hasattr(m, "_fit_X")
    ) 
Example #19
Source File: KNN.py    From AI_Sudoku with Creative Commons Zero v1.0 Universal 5 votes vote down vote up
def __init__(self, k):
        self.mnist = datasets.fetch_openml('mnist_784', data_home='mnist_dataset/')
        self.data, self.target = self.mnist.data, self.mnist.target
        # Make an array of indices the size of MNIST to use for making the data sets.
        # This array is in random order, so we can use it to scramble up the MNIST data
        self.indx = np.random.choice(len(self.target), 70000, replace=False)
        # Initialising the classifier
        self.classifier = KNeighborsClassifier(n_neighbors=k)

    # method for building the datasets to test with 
Example #20
Source File: scikitlearn.py    From sia-cog with MIT License 5 votes vote down vote up
def getModels():
    result = []
    result.append("LinearRegression")
    result.append("BayesianRidge")
    result.append("ARDRegression")
    result.append("ElasticNet")
    result.append("HuberRegressor")
    result.append("Lasso")
    result.append("LassoLars")
    result.append("Rigid")
    result.append("SGDRegressor")
    result.append("SVR")
    result.append("MLPClassifier")
    result.append("KNeighborsClassifier")
    result.append("SVC")
    result.append("GaussianProcessClassifier")
    result.append("DecisionTreeClassifier")
    result.append("RandomForestClassifier")
    result.append("AdaBoostClassifier")
    result.append("GaussianNB")
    result.append("LogisticRegression")
    result.append("QuadraticDiscriminantAnalysis")
    return result 
Example #21
Source File: _k_neighbors_classifier.py    From coremltools with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def supports_output_scores(model):
    """KNeighborsClassifier models do not support output scores."""
    return False 
Example #22
Source File: audio_transfer_learning.py    From sklearn-audio-transfer-learning with ISC License 5 votes vote down vote up
def define_classification_model():
    """ Select and define the model you will use for the classifier. 
    """
    if config['model_type'] == 'linearSVM': # linearSVM can be faster than SVM
        return LinearSVC(C=1)
    elif config['model_type'] == 'SVM': # non-linearSVM, we can use the kernel trick
        return SVC(C=1, kernel='rbf', gamma='scale')
    elif config['model_type'] == 'kNN': # k-nearest neighbour
        return KNeighborsClassifier(n_neighbors=1, metric='cosine')
    elif config['model_type'] == 'perceptron': # otpimizes log-loss, also known as cross-entropy with sgd
        return SGDClassifier(max_iter=600, verbose=0.5, loss='log', learning_rate='optimal')
    elif config['model_type'] == 'MLP': # otpimizes log-loss, also known as cross-entropy with sgd
        return MLPClassifier(hidden_layer_sizes=(20,), max_iter=600, verbose=10, 
               solver='sgd', learning_rate='constant', learning_rate_init=0.001) 
Example #23
Source File: utils.py    From website-fingerprinting with MIT License 5 votes vote down vote up
def train(streams, labels):
    """ This function trains the classifier with the data. """

    # Shuffle the arrays.
    streams, labels = shuffle(streams, labels)

    stream_amount = len(streams)
    training_size = int(stream_amount * 0.9)

    # Get 70% of the streams for training purposes.
    training_x = streams[:training_size]
    training_y = labels[:training_size]

    # Get 30% of the streams for testing purposes
    testing_x = streams[training_size:]
    testing_y = labels[training_size:]

    print("Training size: {}".format(training_size))
    print("Testing size:  {}".format(stream_amount - training_size))

    # Initialize the classifier.
    clf = KNeighborsClassifier()

    # Now lets train our KNN classifier.
    clf = clf.fit(training_x, training_y)

    # Save a snapshot of this classifier.
    joblib.dump(clf, "./classifier-nb.dmp", compress=9)

    # Get the prediction.
    predictions = clf.predict(testing_x)

    print("Accuracy: %s%%" % (accuracy_score(testing_y, predictions) * 100,)) 
Example #24
Source File: data_trusting.py    From lime-experiments with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def get_classifier(name, vectorizer):
  if name == 'logreg':
    return linear_model.LogisticRegression(fit_intercept=True)
  if name == 'random_forest':
    return ensemble.RandomForestClassifier(n_estimators=1000, random_state=1, max_depth=5, n_jobs=10)
  if name == 'svm':
    return svm.SVC(probability=True, kernel='rbf', C=10,gamma=0.001)
  if name == 'tree':
    return tree.DecisionTreeClassifier(random_state=1)
  if name == 'neighbors':
    return neighbors.KNeighborsClassifier()
  if name == 'embforest':
    return embedding_forest.EmbeddingForest(vectorizer) 
Example #25
Source File: parzen_windows.py    From lime-experiments with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def get_classifier(name, vectorizer):
  if name == 'logreg':
    return linear_model.LogisticRegression(fit_intercept=True)
  if name == 'random_forest':
    return ensemble.RandomForestClassifier(n_estimators=1000, random_state=1, max_depth=5, n_jobs=10)
  if name == 'svm':
    return svm.SVC(probability=True, kernel='rbf', C=10,gamma=0.001)
  if name == 'tree':
    return tree.DecisionTreeClassifier(random_state=1)
  if name == 'neighbors':
    return neighbors.KNeighborsClassifier()
  if name == 'embforest':
    return embedding_forest.EmbeddingForest(vectorizer) 
Example #26
Source File: test_neighbors.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_dtype_convert():
    classifier = neighbors.KNeighborsClassifier(n_neighbors=1)
    CLASSES = 15
    X = np.eye(CLASSES)
    y = [ch for ch in 'ABCDEFGHIJKLMNOPQRSTU'[:CLASSES]]

    result = classifier.fit(X, y).predict(X)
    assert_array_equal(result, y) 
Example #27
Source File: test_neighbors.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_knn_forcing_backend(backend, algorithm):
    # Non-regression test which ensure the knn methods are properly working
    # even when forcing the global joblib backend.
    with parallel_backend(backend):
        X, y = datasets.make_classification(n_samples=30, n_features=5,
                                            n_redundant=0, random_state=0)
        X_train, X_test, y_train, y_test = train_test_split(X, y)

        clf = neighbors.KNeighborsClassifier(n_neighbors=3,
                                             algorithm=algorithm,
                                             n_jobs=3)
        clf.fit(X_train, y_train)
        clf.predict(X_test)
        clf.kneighbors(X_test)
        clf.kneighbors_graph(X_test, mode='distance').toarray() 
Example #28
Source File: test_neighbors.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_predict_sparse_ball_kd_tree():
    rng = np.random.RandomState(0)
    X = rng.rand(5, 5)
    y = rng.randint(0, 2, 5)
    nbrs1 = neighbors.KNeighborsClassifier(1, algorithm='kd_tree')
    nbrs2 = neighbors.KNeighborsRegressor(1, algorithm='ball_tree')
    for model in [nbrs1, nbrs2]:
        model.fit(X, y)
        assert_raises(ValueError, model.predict, csr_matrix(X)) 
Example #29
Source File: test_neighbors.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_metric_params_interface():
    assert_warns(SyntaxWarning, neighbors.KNeighborsClassifier,
                 metric_params={'p': 3}) 
Example #30
Source File: test_neighbors.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_kneighbors_classifier_predict_proba():
    # Test KNeighborsClassifier.predict_proba() method
    X = np.array([[0, 2, 0],
                  [0, 2, 1],
                  [2, 0, 0],
                  [2, 2, 0],
                  [0, 0, 2],
                  [0, 0, 1]])
    y = np.array([4, 4, 5, 5, 1, 1])
    cls = neighbors.KNeighborsClassifier(n_neighbors=3, p=1)  # cityblock dist
    cls.fit(X, y)
    y_prob = cls.predict_proba(X)
    real_prob = np.array([[0, 2. / 3, 1. / 3],
                          [1. / 3, 2. / 3, 0],
                          [1. / 3, 0, 2. / 3],
                          [0, 1. / 3, 2. / 3],
                          [2. / 3, 1. / 3, 0],
                          [2. / 3, 1. / 3, 0]])
    assert_array_equal(real_prob, y_prob)
    # Check that it also works with non integer labels
    cls.fit(X, y.astype(str))
    y_prob = cls.predict_proba(X)
    assert_array_equal(real_prob, y_prob)
    # Check that it works with weights='distance'
    cls = neighbors.KNeighborsClassifier(
        n_neighbors=2, p=1, weights='distance')
    cls.fit(X, y)
    y_prob = cls.predict_proba(np.array([[0, 2, 0], [2, 2, 2]]))
    real_prob = np.array([[0, 1, 0], [0, 0.4, 0.6]])
    assert_array_almost_equal(real_prob, y_prob)