Python Examples of sklearn.neighbors.KNeighborsClassifier

Source File: main.py From transferlearning with MIT License

14 votes

def classify_1nn(data_train, data_test):
    '''
    Classification using 1NN
    Inputs: data_train, data_test: train and test csv file path
    Outputs: yprediction and accuracy
    '''
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.metrics import accuracy_score
    from sklearn.preprocessing import StandardScaler
    data = {'src': np.loadtxt(data_train, delimiter=','),
            'tar': np.loadtxt(data_test, delimiter=','),
            }
    Xs, Ys, Xt, Yt = data['src'][:, :-1], data['src'][:, -
                                                      1], data['tar'][:, :-1], data['tar'][:, -1]
    Xs = StandardScaler(with_mean=0, with_std=1).fit_transform(Xs)
    Xt = StandardScaler(with_mean=0, with_std=1).fit_transform(Xt)
    clf = KNeighborsClassifier(n_neighbors=1)
    clf.fit(Xs, Ys)
    ypred = clf.predict(Xt)
    acc = accuracy_score(y_true=Yt, y_pred=ypred)
    print('Acc: {:.4f}'.format(acc))
    return ypred, acc

Source File: test_run.py From nyaggle with MIT License

7 votes

def test_experiment_sklearn_multiclass(tmpdir_name):
    X, y = make_classification_df(n_samples=1024, n_num_features=10, n_cat_features=0,
                                  n_classes=5, random_state=0, id_column='user_id')

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

    params = {
        'n_neighbors': 10
    }

    result = run_experiment(params, X_train, y_train, X_test, tmpdir_name, algorithm_type=KNeighborsClassifier,
                            with_auto_prep=False)

    assert len(np.unique(result.oof_prediction[:, 0])) > 5  # making sure prediction is not binarized
    assert len(np.unique(result.test_prediction[:, 0])) > 5
    assert result.oof_prediction.shape == (len(y_train), 5)
    assert result.test_prediction.shape == (len(y_test), 5)

    _check_file_exists(tmpdir_name)

Source File: testScoreWithAdapaSklearn.py From nyoka with Apache License 2.0

6 votes

def test_31_knn_classifier(self):
        print("\ntest 31 (knn classifier without preprocessing) [binary-class]\n")
        X, X_test, y, features, target, test_file = self.data_utility.get_data_for_binary_classification()

        model = KNeighborsClassifier()
        pipeline_obj = Pipeline([
            ("model", model)
        ])
        pipeline_obj.fit(X,y)
        file_name = 'test31sklearn.pmml'
        
        skl_to_pmml(pipeline_obj, features, target, file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file)
        model_pred = pipeline_obj.predict(X_test)
        model_prob = pipeline_obj.predict_proba(X_test)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)

Source File: test_neighbors.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_kneighbors_classifier_sparse(n_samples=40,
                                      n_features=5,
                                      n_test_pts=10,
                                      n_neighbors=5,
                                      random_state=0):
    # Test k-NN classifier on sparse matrices
    # Like the above, but with various types of sparse matrices
    rng = np.random.RandomState(random_state)
    X = 2 * rng.rand(n_samples, n_features) - 1
    X *= X > .2
    y = ((X ** 2).sum(axis=1) < .5).astype(np.int)

    for sparsemat in SPARSE_TYPES:
        knn = neighbors.KNeighborsClassifier(n_neighbors=n_neighbors,
                                             algorithm='auto')
        knn.fit(sparsemat(X), y)
        epsilon = 1e-5 * (2 * rng.rand(1, n_features) - 1)
        for sparsev in SPARSE_TYPES + (np.asarray,):
            X_eps = sparsev(X[:n_test_pts] + epsilon)
            y_pred = knn.predict(X_eps)
            assert_array_equal(y_pred, y[:n_test_pts])

Source File: test_neighbors.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_neighbors_iris():
    # Sanity checks on the iris dataset
    # Puts three points of each label in the plane and performs a
    # nearest neighbor query on points near the decision boundary.

    for algorithm in ALGORITHMS:
        clf = neighbors.KNeighborsClassifier(n_neighbors=1,
                                             algorithm=algorithm)
        clf.fit(iris.data, iris.target)
        assert_array_equal(clf.predict(iris.data), iris.target)

        clf.set_params(n_neighbors=9, algorithm=algorithm)
        clf.fit(iris.data, iris.target)
        assert np.mean(clf.predict(iris.data) == iris.target) > 0.95

        rgs = neighbors.KNeighborsRegressor(n_neighbors=5, algorithm=algorithm)
        rgs.fit(iris.data, iris.target)
        assert_greater(np.mean(rgs.predict(iris.data).round() == iris.target),
                       0.95)

Source File: test_neighbors.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_neighbors_digits():
    # Sanity check on the digits dataset
    # the 'brute' algorithm has been observed to fail if the input
    # dtype is uint8 due to overflow in distance calculations.

    X = digits.data.astype('uint8')
    Y = digits.target
    (n_samples, n_features) = X.shape
    train_test_boundary = int(n_samples * 0.8)
    train = np.arange(0, train_test_boundary)
    test = np.arange(train_test_boundary, n_samples)
    (X_train, Y_train, X_test, Y_test) = X[train], Y[train], X[test], Y[test]

    clf = neighbors.KNeighborsClassifier(n_neighbors=1, algorithm='brute')
    score_uint8 = clf.fit(X_train, Y_train).score(X_test, Y_test)
    score_float = clf.fit(X_train.astype(float, copy=False), Y_train).score(
        X_test.astype(float, copy=False), Y_test)
    assert_equal(score_uint8, score_float)

Source File: classifier.py From stock-price-prediction with MIT License

6 votes

def buildModel(dataset, method, parameters):
    """
    Build final model for predicting real testing data
    """
    features = dataset.columns[0:-1]

    if method == 'RNN':
        clf = performRNNlass(dataset[features], dataset['UpDown'])
        return clf

    elif method == 'RF':
        clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1)

    elif method == 'KNN':
        clf = neighbors.KNeighborsClassifier()

    elif method == 'SVM':
        c = parameters[0]
        g =  parameters[1]
        clf = SVC(C=c, gamma=g)

    elif method == 'ADA':
        clf = AdaBoostClassifier()

    return clf.fit(dataset[features], dataset['UpDown'])

Source File: testScoreWithAdapaSklearn.py From nyoka with Apache License 2.0

6 votes

def test_32_knn_classifier(self):
        print("\ntest 32 (knn classifier without preprocessing) [multi-class]\n")
        X, X_test, y, features, target, test_file = self.data_utility.get_data_for_multi_class_classification()

        model = KNeighborsClassifier()
        pipeline_obj = Pipeline([
            ("model", model)
        ])
        pipeline_obj.fit(X,y)
        file_name = 'test32sklearn.pmml'
        
        skl_to_pmml(pipeline_obj, features, target, file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file)
        model_pred = pipeline_obj.predict(X_test)
        model_prob = pipeline_obj.predict_proba(X_test)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)

Source File: test_neighbors.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_same_knn_parallel(algorithm):
    X, y = datasets.make_classification(n_samples=30, n_features=5,
                                        n_redundant=0, random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    clf = neighbors.KNeighborsClassifier(n_neighbors=3,
                                         algorithm=algorithm)
    clf.fit(X_train, y_train)
    y = clf.predict(X_test)
    dist, ind = clf.kneighbors(X_test)
    graph = clf.kneighbors_graph(X_test, mode='distance').toarray()

    clf.set_params(n_jobs=3)
    clf.fit(X_train, y_train)
    y_parallel = clf.predict(X_test)
    dist_parallel, ind_parallel = clf.kneighbors(X_test)
    graph_parallel = \
        clf.kneighbors_graph(X_test, mode='distance').toarray()

    assert_array_equal(y, y_parallel)
    assert_array_almost_equal(dist, dist_parallel)
    assert_array_equal(ind, ind_parallel)
    assert_array_almost_equal(graph, graph_parallel)

Source File: sklearn_tune.py From ml-parameter-optimization with MIT License

6 votes

def tune_params(self):
        """
        tune specified (and default) parameters
        """
        self._start_time = time.time()
        self.default_params() # set default parameters
        self.score_init() # set initial score
        if self.dim_reduction is not None:
            knn = Pipeline([('dimred',self.dim_reduction_method())
                            ('knn',KNeighborsClassifier(**self._params))])
            self._pipeline = True
        else:
            knn = KNeighborsClassifier(**self._params)
        self.apply_gridsearch(knn)
        self.print_progress(self._start_time)
        return self

Source File: classifier.py From libfaceid with MIT License

6 votes

def __init__(self, classifier=FaceClassifierModels.DEFAULT):
        self._clf = None
        if classifier == FaceClassifierModels.LINEAR_SVM:
            self._clf = SVC(C=1.0, kernel="linear", probability=True)
        elif classifier == FaceClassifierModels.NAIVE_BAYES:
            self._clf = GaussianNB()
        elif classifier == FaceClassifierModels.RBF_SVM:
            self._clf = SVC(C=1, kernel='rbf', probability=True, gamma=2)
        elif classifier == FaceClassifierModels.NEAREST_NEIGHBORS:
            self._clf = KNeighborsClassifier(1)
        elif classifier == FaceClassifierModels.DECISION_TREE:
            self._clf = DecisionTreeClassifier(max_depth=5)
        elif classifier == FaceClassifierModels.RANDOM_FOREST:
            self._clf = RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1)
        elif classifier == FaceClassifierModels.NEURAL_NET:
            self._clf = MLPClassifier(alpha=1)
        elif classifier == FaceClassifierModels.ADABOOST:
            self._clf = AdaBoostClassifier()
        elif classifier == FaceClassifierModels.QDA:
            self._clf = QuadraticDiscriminantAnalysis()
        print("classifier={}".format(FaceClassifierModels(classifier)))

Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_classification():
    # Check classification for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [1, 2, 4],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyClassifier(),
                           Perceptron(tol=1e-3),
                           DecisionTreeClassifier(),
                           KNeighborsClassifier(),
                           SVC(gamma="scale")]:
        for params in grid:
            BaggingClassifier(base_estimator=base_estimator,
                              random_state=rng,
                              **params).fit(X_train, y_train).predict(X_test)

Source File: knn_missing_data.py From Generative-ConvACs with MIT License

6 votes

def knn_masked_data(trX,trY,missing_data_dir, input_shape, k):
    
    raw_im_data = np.loadtxt(join(script_dir,missing_data_dir,'index.txt'),delimiter=' ',dtype=str)
    raw_mask_data = np.loadtxt(join(script_dir,missing_data_dir,'index_mask.txt'),delimiter=' ',dtype=str)
    # Using 'brute' method since we only want to do one query per classifier
    # so this will be quicker as it avoids overhead of creating a search tree
    knn_m = KNeighborsClassifier(algorithm='brute',n_neighbors=k)
    prob_Y_hat = np.zeros((raw_im_data.shape[0],int(np.max(trY)+1)))
    total_images = raw_im_data.shape[0]
    pbar = progressbar.ProgressBar(widgets=[progressbar.FormatLabel('\rProcessed %(value)d of %(max)d Images '), progressbar.Bar()], maxval=total_images, term_width=50).start()
    for i in range(total_images):
        mask_im=load_image(join(script_dir,missing_data_dir,raw_mask_data[i][0]), input_shape,1).reshape(np.prod(input_shape))
        mask = np.logical_not(mask_im > eps) # since mask is 1 at missing locations
        v_im=load_image(join(script_dir,missing_data_dir,raw_im_data[i][0]), input_shape, 255).reshape(np.prod(input_shape))
        rep_mask = np.tile(mask,(trX.shape[0],1))
        # Corrupt whole training set according to the current mask
        corr_trX = np.multiply(trX, rep_mask)        
        knn_m.fit(corr_trX, trY)
        prob_Y_hat[i,:] = knn_m.predict_proba(v_im.reshape(1,-1))
        pbar.update(i)
    pbar.finish()
    return prob_Y_hat

Source File: structural_tests.py From drifter_ml with MIT License

5 votes

def cls_supervised_clustering(self, data):
        k_measures = []
        X = data[self.column_names]
        y = data[self.target_name]
        for k in range(2, 12):
            knn = neighbors.KNeighborsClassifier(n_neighbors=k)
            knn.fit(X, y)
            y_pred = knn.predict(X)
            k_measures.append((k, metrics.mean_squared_error(y, y_pred)))
        sorted_k_measures = sorted(k_measures, key=lambda t:t[1])
        lowest_mse = sorted_k_measures[0]
        best_k = lowest_mse[0]
        return best_k

Source File: predictor.py From quantified-self with MIT License

5 votes

def __init__(self, n_neighbors=8, slackbot=None):
        self.knn = KNeighborsClassifier(n_neighbors=n_neighbors, weights="distance")

        skill_data = SkillData()
        data_X, data_y = SkillDataLoader().make_data_set(skill_data.q)
        self.knn.fit(data_X, data_y)

        if slackbot is None:
            self.slackbot = SlackerAdapter()
        else:
            self.slackbot = slackbot

Source File: sentiment_analysis_ml.py From Sentiment_Analysis_cnn_lstm_cnnlstm_textcnn_bilstm with Apache License 2.0

5 votes

def nn_classifier(n_neighbors,train_vecs,y_train,test_vecs,y_test):
    clf = neighbors.KNeighborsClassifier(n_neighbors, weights='uniform')
    clf.fit(train_vecs,y_train)
    joblib.dump(clf,storedpaths+'model_nn.pkl')
    test_scores=clf.score(test_vecs,y_test)
    return test_scores
    
# 训练 LogisticRegression 分类算法

Source File: test_nonparametric.py From numpy-ml with GNU General Public License v3.0

5 votes

def test_knn_clf(N=15):
    np.random.seed(12345)

    i = 0
    while i < N:
        N = np.random.randint(2, 100)
        M = np.random.randint(2, 100)
        k = np.random.randint(1, N)
        n_classes = np.random.randint(2, 10)
        ls = np.min([np.random.randint(1, 10), N - 1])
        weights = "uniform"

        X = np.random.rand(N, M)
        X_test = np.random.rand(N, M)
        y = np.random.randint(0, n_classes, size=N)

        knn = KNN(k=k, leaf_size=ls, metric=euclidean, classifier=True, weights=weights)
        knn.fit(X, y)
        preds = knn.predict(X_test)

        gold = KNeighborsClassifier(
            p=2,
            metric="minkowski",
            leaf_size=ls,
            n_neighbors=k,
            weights=weights,
            algorithm="ball_tree",
        )
        gold.fit(X, y)
        gold_preds = gold.predict(X_test)

        for mine, theirs in zip(preds, gold_preds):
            np.testing.assert_almost_equal(mine, theirs)
        print("PASSED")
        i += 1

Source File: _k_neighbors_classifier.py From coremltools with BSD 3-Clause "New" or "Revised" License

5 votes

def _check_fitted(model):
    """Simple wrapper to check if the KNeighborsClassifier has been fitted."""
    return _sklearn_util.check_fitted(
        model, lambda m: hasattr(m, "_fit_method") or hasattr(m, "_fit_X")
    )

Source File: KNN.py From AI_Sudoku with Creative Commons Zero v1.0 Universal

5 votes

def __init__(self, k):
        self.mnist = datasets.fetch_openml('mnist_784', data_home='mnist_dataset/')
        self.data, self.target = self.mnist.data, self.mnist.target
        # Make an array of indices the size of MNIST to use for making the data sets.
        # This array is in random order, so we can use it to scramble up the MNIST data
        self.indx = np.random.choice(len(self.target), 70000, replace=False)
        # Initialising the classifier
        self.classifier = KNeighborsClassifier(n_neighbors=k)

    # method for building the datasets to test with

Source File: scikitlearn.py From sia-cog with MIT License

5 votes

def getModels():
    result = []
    result.append("LinearRegression")
    result.append("BayesianRidge")
    result.append("ARDRegression")
    result.append("ElasticNet")
    result.append("HuberRegressor")
    result.append("Lasso")
    result.append("LassoLars")
    result.append("Rigid")
    result.append("SGDRegressor")
    result.append("SVR")
    result.append("MLPClassifier")
    result.append("KNeighborsClassifier")
    result.append("SVC")
    result.append("GaussianProcessClassifier")
    result.append("DecisionTreeClassifier")
    result.append("RandomForestClassifier")
    result.append("AdaBoostClassifier")
    result.append("GaussianNB")
    result.append("LogisticRegression")
    result.append("QuadraticDiscriminantAnalysis")
    return result

Source File: _k_neighbors_classifier.py From coremltools with BSD 3-Clause "New" or "Revised" License

5 votes

def supports_output_scores(model):
    """KNeighborsClassifier models do not support output scores."""
    return False

Source File: audio_transfer_learning.py From sklearn-audio-transfer-learning with ISC License

5 votes

def define_classification_model():
    """ Select and define the model you will use for the classifier. 
    """
    if config['model_type'] == 'linearSVM': # linearSVM can be faster than SVM
        return LinearSVC(C=1)
    elif config['model_type'] == 'SVM': # non-linearSVM, we can use the kernel trick
        return SVC(C=1, kernel='rbf', gamma='scale')
    elif config['model_type'] == 'kNN': # k-nearest neighbour
        return KNeighborsClassifier(n_neighbors=1, metric='cosine')
    elif config['model_type'] == 'perceptron': # otpimizes log-loss, also known as cross-entropy with sgd
        return SGDClassifier(max_iter=600, verbose=0.5, loss='log', learning_rate='optimal')
    elif config['model_type'] == 'MLP': # otpimizes log-loss, also known as cross-entropy with sgd
        return MLPClassifier(hidden_layer_sizes=(20,), max_iter=600, verbose=10, 
               solver='sgd', learning_rate='constant', learning_rate_init=0.001)

Source File: utils.py From website-fingerprinting with MIT License

5 votes

def train(streams, labels):
    """ This function trains the classifier with the data. """

    # Shuffle the arrays.
    streams, labels = shuffle(streams, labels)

    stream_amount = len(streams)
    training_size = int(stream_amount * 0.9)

    # Get 70% of the streams for training purposes.
    training_x = streams[:training_size]
    training_y = labels[:training_size]

    # Get 30% of the streams for testing purposes
    testing_x = streams[training_size:]
    testing_y = labels[training_size:]

    print("Training size: {}".format(training_size))
    print("Testing size:  {}".format(stream_amount - training_size))

    # Initialize the classifier.
    clf = KNeighborsClassifier()

    # Now lets train our KNN classifier.
    clf = clf.fit(training_x, training_y)

    # Save a snapshot of this classifier.
    joblib.dump(clf, "./classifier-nb.dmp", compress=9)

    # Get the prediction.
    predictions = clf.predict(testing_x)

    print("Accuracy: %s%%" % (accuracy_score(testing_y, predictions) * 100,))

Source File: data_trusting.py From lime-experiments with BSD 2-Clause "Simplified" License

5 votes

def get_classifier(name, vectorizer):
  if name == 'logreg':
    return linear_model.LogisticRegression(fit_intercept=True)
  if name == 'random_forest':
    return ensemble.RandomForestClassifier(n_estimators=1000, random_state=1, max_depth=5, n_jobs=10)
  if name == 'svm':
    return svm.SVC(probability=True, kernel='rbf', C=10,gamma=0.001)
  if name == 'tree':
    return tree.DecisionTreeClassifier(random_state=1)
  if name == 'neighbors':
    return neighbors.KNeighborsClassifier()
  if name == 'embforest':
    return embedding_forest.EmbeddingForest(vectorizer)

Source File: parzen_windows.py From lime-experiments with BSD 2-Clause "Simplified" License

5 votes

def get_classifier(name, vectorizer):
  if name == 'logreg':
    return linear_model.LogisticRegression(fit_intercept=True)
  if name == 'random_forest':
    return ensemble.RandomForestClassifier(n_estimators=1000, random_state=1, max_depth=5, n_jobs=10)
  if name == 'svm':
    return svm.SVC(probability=True, kernel='rbf', C=10,gamma=0.001)
  if name == 'tree':
    return tree.DecisionTreeClassifier(random_state=1)
  if name == 'neighbors':
    return neighbors.KNeighborsClassifier()
  if name == 'embforest':
    return embedding_forest.EmbeddingForest(vectorizer)

Source File: test_neighbors.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_dtype_convert():
    classifier = neighbors.KNeighborsClassifier(n_neighbors=1)
    CLASSES = 15
    X = np.eye(CLASSES)
    y = [ch for ch in 'ABCDEFGHIJKLMNOPQRSTU'[:CLASSES]]

    result = classifier.fit(X, y).predict(X)
    assert_array_equal(result, y)

Source File: test_neighbors.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_knn_forcing_backend(backend, algorithm):
    # Non-regression test which ensure the knn methods are properly working
    # even when forcing the global joblib backend.
    with parallel_backend(backend):
        X, y = datasets.make_classification(n_samples=30, n_features=5,
                                            n_redundant=0, random_state=0)
        X_train, X_test, y_train, y_test = train_test_split(X, y)

        clf = neighbors.KNeighborsClassifier(n_neighbors=3,
                                             algorithm=algorithm,
                                             n_jobs=3)
        clf.fit(X_train, y_train)
        clf.predict(X_test)
        clf.kneighbors(X_test)
        clf.kneighbors_graph(X_test, mode='distance').toarray()

Source File: test_neighbors.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_predict_sparse_ball_kd_tree():
    rng = np.random.RandomState(0)
    X = rng.rand(5, 5)
    y = rng.randint(0, 2, 5)
    nbrs1 = neighbors.KNeighborsClassifier(1, algorithm='kd_tree')
    nbrs2 = neighbors.KNeighborsRegressor(1, algorithm='ball_tree')
    for model in [nbrs1, nbrs2]:
        model.fit(X, y)
        assert_raises(ValueError, model.predict, csr_matrix(X))

Source File: test_neighbors.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_metric_params_interface():
    assert_warns(SyntaxWarning, neighbors.KNeighborsClassifier,
                 metric_params={'p': 3})

Source File: test_neighbors.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_kneighbors_classifier_predict_proba():
    # Test KNeighborsClassifier.predict_proba() method
    X = np.array([[0, 2, 0],
                  [0, 2, 1],
                  [2, 0, 0],
                  [2, 2, 0],
                  [0, 0, 2],
                  [0, 0, 1]])
    y = np.array([4, 4, 5, 5, 1, 1])
    cls = neighbors.KNeighborsClassifier(n_neighbors=3, p=1)  # cityblock dist
    cls.fit(X, y)
    y_prob = cls.predict_proba(X)
    real_prob = np.array([[0, 2. / 3, 1. / 3],
                          [1. / 3, 2. / 3, 0],
                          [1. / 3, 0, 2. / 3],
                          [0, 1. / 3, 2. / 3],
                          [2. / 3, 1. / 3, 0],
                          [2. / 3, 1. / 3, 0]])
    assert_array_equal(real_prob, y_prob)
    # Check that it also works with non integer labels
    cls.fit(X, y.astype(str))
    y_prob = cls.predict_proba(X)
    assert_array_equal(real_prob, y_prob)
    # Check that it works with weights='distance'
    cls = neighbors.KNeighborsClassifier(
        n_neighbors=2, p=1, weights='distance')
    cls.fit(X, y)
    y_prob = cls.predict_proba(np.array([[0, 2, 0], [2, 2, 2]]))
    real_prob = np.array([[0, 1, 0], [0, 0.4, 0.6]])
    assert_array_almost_equal(real_prob, y_prob)

Python sklearn.neighbors.KNeighborsClassifier() Examples