Python sklearn.neighbors() Examples
The following are 22
code examples of sklearn.neighbors().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn
, or try the search function
.
Example #1
Source File: cub_utils.py From omgh with MIT License | 6 votes |
def _pre_calculate(self, force=False): if self.final_storage.check_exists(self.final_storage.instance_path) and not force: self.NNS = self.final_storage.load_instance(self.final_storage.instance_path) else: self.ssfeature_loader.setup() self.Xtrain = self.ssfeature_loader.load_train() self.Xtest = self.ssfeature_loader.load_test() if self.normalize: self.Xtrain = utils.l2_feat_norm(self.Xtrain) self.Xtest = utils.l2_feat_norm(self.Xtest) self.nn_model = sklearn.neighbors.NearestNeighbors(n_neighbors=self.n_neighbors, algorithm='ball_tree', metric='minkowski', p=2) self.nn_model.fit(self.Xtrain) self.NNS = self.nn_model.kneighbors(self.Xtest, self.n_neighbors, return_distance=False) self.final_storage.save_instance(self.final_storage.instance_path, self.NNS) # this needs change for larges n_neighbors if self.n_neighbors == 1: self.NNS = self.NNS.T[0] else: pass
Example #2
Source File: embeddings.py From stanza-old with Apache License 2.0 | 6 votes |
def k_nearest_approx(self, vec, k): """Get the k nearest neighbors of a vector (in terms of cosine similarity). :param (np.array) vec: query vector :param (int) k: number of top neighbors to return :return (list[tuple[str, float]]): a list of (word, cosine similarity) pairs, in descending order """ if not hasattr(self, 'lshf'): self.lshf = self._init_lsh_forest() # TODO(kelvin): make this inner product score, to be consistent with k_nearest distances, neighbors = self.lshf.kneighbors([vec], n_neighbors=k, return_distance=True) scores = np.subtract(1, distances) nbr_score_pairs = self._word_to_score(np.squeeze(neighbors), np.squeeze(scores)) return sorted(nbr_score_pairs.items(), key=lambda x: x[1], reverse=True)
Example #3
Source File: models.py From jh-kaggle-util with Apache License 2.0 | 5 votes |
def run_sklearn(): n_trees = 100 n_folds = 3 # https://www.analyticsvidhya.com/blog/2015/06/tuning-random-forest-model/ alg_list = [ ['rforest',RandomForestClassifier(n_estimators=1000, n_jobs=-1, verbose=1, max_depth=3)], ['extree',ExtraTreesClassifier(n_estimators = 1000,max_depth=3,n_jobs=-1)], ['adaboost',AdaBoostClassifier(base_estimator=None, n_estimators=600, learning_rate=1.0)], ['knn', sklearn.neighbors.KNeighborsClassifier(n_neighbors=5,n_jobs=-1)] ] start_time = time.time() for name,alg in alg_list: train = jhkaggle.train_sklearn.TrainSKLearn("1",name,alg,False) train.run() train = None
Example #4
Source File: baselines.py From rmnist with MIT License | 5 votes |
def baselines(n): td, vd, ts = data_loader.load_data(n) classifiers = [ sklearn.svm.SVC(C=1000), sklearn.svm.SVC(kernel="linear", C=0.1), sklearn.neighbors.KNeighborsClassifier(1), sklearn.tree.DecisionTreeClassifier(), sklearn.ensemble.RandomForestClassifier(max_depth=10, n_estimators=500, max_features=1), sklearn.neural_network.MLPClassifier(alpha=1, hidden_layer_sizes=(500, 100)) ] for clf in classifiers: clf.fit(td[0], td[1]) print "\n{}: {}".format(type(clf).__name__, round(clf.score(vd[0], vd[1])*100, 2))
Example #5
Source File: transfer.py From rmnist with MIT License | 5 votes |
def transfer(n): td, vd, ts = data_loader.load_data(n, abstract=True, expanded=expanded) classifiers = [ #sklearn.svm.SVC(), #sklearn.svm.SVC(kernel="linear", C=0.1), #sklearn.neighbors.KNeighborsClassifier(1), #sklearn.tree.DecisionTreeClassifier(), #sklearn.ensemble.RandomForestClassifier(max_depth=10, n_estimators=500, max_features=1), sklearn.neural_network.MLPClassifier(alpha=1.0, hidden_layer_sizes=(300,), max_iter=500) ] for clf in classifiers: clf.fit(td[0], td[1]) print "\n{}: {}".format(type(clf).__name__, round(clf.score(vd[0], vd[1])*100, 2))
Example #6
Source File: embeddings.py From stanza-old with Apache License 2.0 | 5 votes |
def _init_lsh_forest(self): """Construct an LSH forest for nearest neighbor search.""" import sklearn.neighbors lshf = sklearn.neighbors.LSHForest() lshf.fit(self.array) return lshf
Example #7
Source File: embeddings.py From stanza-old with Apache License 2.0 | 5 votes |
def k_nearest(self, vec, k): """Get the k nearest neighbors of a vector (in terms of highest inner products). :param (np.array) vec: query vector :param (int) k: number of top neighbors to return :return (list[tuple[str, float]]): a list of (word, score) pairs, in descending order """ nbr_score_pairs = self.inner_products(vec) return sorted(nbr_score_pairs.items(), key=lambda x: x[1], reverse=True)[:k]
Example #8
Source File: test_cli.py From mlflow with Apache License 2.0 | 5 votes |
def sk_model(iris_data): x, y = iris_data knn_model = sklearn.neighbors.KNeighborsClassifier() knn_model.fit(x, y) return knn_model
Example #9
Source File: k_neighbors_classifier.py From lale with Apache License 2.0 | 5 votes |
def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = sklearn.neighbors.KNeighborsClassifier(**self._hyperparams)
Example #10
Source File: utils.py From NeuroKit with MIT License | 5 votes |
def _get_embedded(signal, delay=1, dimension=2, r="default", distance="chebyshev", approximate=True, fuzzy=False): """Examples ---------- >>> import neurokit2 as nk >>> >>> signal = nk.signal_simulate(duration=2, frequency=5) >>> delay = nk.complexity_delay(signal) >>> >>> embbeded, count = _get_embedded(signal, delay, r=0.2 * np.std(signal, ddof=1), dimension=2, ... distance='chebyshev', approximate=False) """ # Sanity checks if distance not in sklearn.neighbors.KDTree.valid_metrics: raise ValueError( "NeuroKit error: _get_embedded(): The given metric (%s) is not valid." "The valid metric names are: %s" % (distance, sklearn.neighbors.KDTree.valid_metrics) ) # Get embedded embedded = complexity_embedding(signal, delay=delay, dimension=dimension) if approximate is False: embedded = embedded[:-1] # Removes the last line if fuzzy is False: # Get neighbors count count = _get_count(embedded, r=r, distance=distance) else: # FuzzyEn: Remove the local baselines of vectors embedded -= np.mean(embedded, axis=1, keepdims=True) count = _get_count_fuzzy(embedded, r=r, distance=distance, n=1) return embedded, count # ============================================================================= # Get Count # =============================================================================
Example #11
Source File: graph.py From TextCategorization with MIT License | 5 votes |
def distance_lshforest(z, k=4, metric='cosine'): """Return an approximation of the k-nearest cosine distances.""" assert metric is 'cosine' lshf = sklearn.neighbors.LSHForest() lshf.fit(z) dist, idx = lshf.kneighbors(z, n_neighbors=k + 1) assert dist.min() < 1e-10 dist[dist < 0] = 0 return dist, idx # TODO: other ANNs s.a. NMSLIB, EFANNA, FLANN, Annoy, sklearn neighbors, PANN
Example #12
Source File: field_based_ml_field_detection.py From lexpredict-contraxsuite with GNU Affero General Public License v3.0 | 5 votes |
def init_classifier_impl(field_code: str, init_script: str): if init_script is not None: init_script = init_script.strip() if not init_script: from sklearn import tree as sklearn_tree return sklearn_tree.DecisionTreeClassifier() from sklearn import tree as sklearn_tree from sklearn import neural_network as sklearn_neural_network from sklearn import neighbors as sklearn_neighbors from sklearn import svm as sklearn_svm from sklearn import gaussian_process as sklearn_gaussian_process from sklearn.gaussian_process import kernels as sklearn_gaussian_process_kernels from sklearn import ensemble as sklearn_ensemble from sklearn import naive_bayes as sklearn_naive_bayes from sklearn import discriminant_analysis as sklearn_discriminant_analysis from sklearn import linear_model as sklearn_linear_model eval_locals = { 'sklearn_linear_model': sklearn_linear_model, 'sklearn_tree': sklearn_tree, 'sklearn_neural_network': sklearn_neural_network, 'sklearn_neighbors': sklearn_neighbors, 'sklearn_svm': sklearn_svm, 'sklearn_gaussian_process': sklearn_gaussian_process, 'sklearn_gaussian_process_kernels': sklearn_gaussian_process_kernels, 'sklearn_ensemble': sklearn_ensemble, 'sklearn_naive_bayes': sklearn_naive_bayes, 'sklearn_discriminant_analysis': sklearn_discriminant_analysis } return eval_script('classifier init script of field {0}'.format(field_code), init_script, eval_locals)
Example #13
Source File: graph.py From gconvRNN with MIT License | 5 votes |
def distance_lshforest(z, k=4, metric='cosine'): """Return an approximation of the k-nearest cosine distances.""" assert metric is 'cosine' lshf = sklearn.neighbors.LSHForest() lshf.fit(z) dist, idx = lshf.kneighbors(z, n_neighbors=k+1) assert dist.min() < 1e-10 dist[dist < 0] = 0 return dist, idx # TODO: other ANNs s.a. NMSLIB, EFANNA, FLANN, Annoy, sklearn neighbors, PANN
Example #14
Source File: similarity_encoder.py From dirty_cat with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_kmeans_prototypes(X, n_prototypes, hashing_dim=128, ngram_range=(3, 3), sparse=False, sample_weight=None, random_state=None): """ Computes prototypes based on: - dimensionality reduction (via hashing n-grams) - k-means clustering - nearest neighbor """ vectorizer = HashingVectorizer(analyzer='char', norm=None, alternate_sign=False, ngram_range=ngram_range, n_features=hashing_dim) projected = vectorizer.transform(X) if not sparse: projected = projected.toarray() kmeans = KMeans(n_clusters=n_prototypes, random_state=random_state) kmeans.fit(projected, sample_weight=sample_weight) centers = kmeans.cluster_centers_ neighbors = NearestNeighbors() neighbors.fit(projected) indexes_prototypes = np.unique(neighbors.kneighbors(centers, 1)[-1]) if indexes_prototypes.shape[0] < n_prototypes: warnings.warn('Final number of unique prototypes is lower than ' + 'n_prototypes (expected)') return np.sort(X[indexes_prototypes])
Example #15
Source File: knn_matting.py From knn-matting with MIT License | 5 votes |
def knn_matte(img, trimap, mylambda=100): [m, n, c] = img.shape img, trimap = img/255.0, trimap/255.0 foreground = (trimap > 0.99).astype(int) background = (trimap < 0.01).astype(int) all_constraints = foreground + background print('Finding nearest neighbors') a, b = np.unravel_index(np.arange(m*n), (m, n)) feature_vec = np.append(np.transpose(img.reshape(m*n,c)), [ a, b]/np.sqrt(m*m + n*n), axis=0).T nbrs = sklearn.neighbors.NearestNeighbors(n_neighbors=10, n_jobs=4).fit(feature_vec) knns = nbrs.kneighbors(feature_vec)[1] # Compute Sparse A print('Computing sparse A') row_inds = np.repeat(np.arange(m*n), 10) col_inds = knns.reshape(m*n*10) vals = 1 - np.linalg.norm(feature_vec[row_inds] - feature_vec[col_inds], axis=1)/(c+2) A = scipy.sparse.coo_matrix((vals, (row_inds, col_inds)),shape=(m*n, m*n)) D_script = scipy.sparse.diags(np.ravel(A.sum(axis=1))) L = D_script-A D = scipy.sparse.diags(np.ravel(all_constraints[:,:, 0])) v = np.ravel(foreground[:,:,0]) c = 2*mylambda*np.transpose(v) H = 2*(L + mylambda*D) print('Solving linear system for alpha') warnings.filterwarnings('error') alpha = [] try: alpha = np.minimum(np.maximum(scipy.sparse.linalg.spsolve(H, c), 0), 1).reshape(m, n) except Warning: x = scipy.sparse.linalg.lsqr(H, c) alpha = np.minimum(np.maximum(x[0], 0), 1).reshape(m, n) return alpha
Example #16
Source File: advanced_supvervised_model_trainer.py From healthcareai-py with MIT License | 5 votes |
def knn(self, scoring_metric='roc_auc', hyperparameter_grid=None, randomized_search=True, number_iteration_samples=10): """ A light wrapper for Sklearn's knn classifier that performs randomized search over an overridable default hyperparameter grid. Args: scoring_metric (str): Any sklearn scoring metric appropriate for classification hyperparameter_grid (dict): hyperparameters by name randomized_search (bool): True for randomized search (default) number_iteration_samples (int): Number of models to train during the randomized search for exploring the hyperparameter space. More may lead to a better model, but will take longer. Returns: TrainedSupervisedModel: """ self.validate_classification('KNN') if hyperparameter_grid is None: neighbors = list(range(5, 26)) hyperparameter_grid = {'n_neighbors': neighbors, 'weights': ['uniform', 'distance']} number_iteration_samples = 10 print('KNN Grid: {}'.format(hyperparameter_grid)) algorithm = get_algorithm(KNeighborsClassifier, scoring_metric, hyperparameter_grid, randomized_search, number_iteration_samples=number_iteration_samples) trained_supervised_model = self._create_trained_supervised_model(algorithm) return trained_supervised_model
Example #17
Source File: mutual_information.py From NeuroKit with MIT License | 5 votes |
def _entropy(X, k=1): """Returns the entropy of X. From https://gist.github.com/GaelVaroquaux/ead9898bd3c973c40429. Parameters ----------- X : array-like or shape (n_samples, n_features) The data the entropy of which is computed k : int (optional) number of nearest neighbors for density estimation Returns ------- float entropy of X. Notes --------- - Kozachenko, L. F. & Leonenko, N. N. 1987 Sample estimate of entropy of a random vector. Probl. Inf. Transm. 23, 95-101. - Evans, D. 2008 A computationally efficient estimator for mutual information, Proc. R. Soc. A 464 (2093), 1203-1215. - Kraskov A, Stogbauer H, Grassberger P. (2004). Estimating mutual information. Phys Rev E 69(6 Pt 2):066138. """ # Distance to kth nearest neighbor r = _nearest_distances(X, k) # squared distances n, d = X.shape volume_unit_ball = (np.pi ** (0.5 * d)) / scipy.special.gamma(0.5 * d + 1) # Perez-Cruz et al. (2008). Estimation of Information Theoretic Measures for # Continuous Random Variables, suggets returning: # return d*mean(log(r))+log(volume_unit_ball)+log(n-1)-log(k) return ( d * np.mean(np.log(r + np.finfo(X.dtype).eps)) + np.log(volume_unit_ball) + scipy.special.psi(n) - scipy.special.psi(k) )
Example #18
Source File: mutual_information.py From NeuroKit with MIT License | 5 votes |
def _nearest_distances(X, k=1): """From https://gist.github.com/GaelVaroquaux/ead9898bd3c973c40429 X = array(N,M) N = number of points M = number of dimensions returns the distance to the kth nearest neighbor for every point in X """ knn = sklearn.neighbors.NearestNeighbors(n_neighbors=k + 1) knn.fit(X) d, _ = knn.kneighbors(X) # the first nearest neighbor is itself return d[:, -1] # returns the distance to the kth nearest neighbor
Example #19
Source File: utils.py From NeuroKit with MIT License | 5 votes |
def _get_count_fuzzy(embedded, r, distance="chebyshev", n=1): dist = sklearn.neighbors.DistanceMetric.get_metric(distance) dist = dist.pairwise(embedded) if n > 1: sim = np.exp(-(dist ** n) / r) else: sim = np.exp(-dist / r) # Return the count return np.sum(sim, axis=0) # ============================================================================= # Get R # =============================================================================
Example #20
Source File: utils.py From NeuroKit with MIT License | 5 votes |
def _get_count(embedded, r, distance="chebyshev"): kdtree = sklearn.neighbors.KDTree(embedded, metric=distance) # Return the count return kdtree.query_radius(embedded, r, count_only=True).astype(np.float64)
Example #21
Source File: runDBSCAN.py From simsearch with MIT License | 4 votes |
def findEps(ssearch): """ Find a good epsilon value to use. """ ########################################################################### # Calculate nearest neighbors ########################################################################### # Create a nearest neighbors model--we need 2 nearest neighbors since the # nearest neighbor to a point is going to be itself. nbrs_model = NearestNeighbors(n_neighbors=2, algorithm='brute', metric='cosine').fit(ssearch.index.index) t0 = time.time() # Find nearest neighbors. distances, indices = nbrs_model.kneighbors(ssearch.index.index) elapsed = time.time() - t0 print 'Took %.2f seconds' % elapsed distances = [d[1] for d in distances] indeces = [ind[1] for ind in indices] ########################################################################### # Histogram the nearest neighbor distances. ########################################################################### import matplotlib.pyplot as plt counts, bins, patches = plt.hist(distances, bins=16) plt.title("Nearest neighbor distances") plt.xlabel("Distance") plt.ylabel("Frequency") print '\n%d bins:' % len(counts) countAcc = 0 num_points = len(ssearch.index.index) for i in range(0, len(counts)): countAcc += counts[i] # Calculate the percentage of values which fall below the upper limit # of this bin. prcnt = float(countAcc) / float(num_points) * 100.0 print ' %.2f%% < %.2f' % (prcnt, bins[i + 1])
Example #22
Source File: runDBSCAN.py From simsearch with MIT License | 4 votes |
def findMinPts(ssearch, eps): """ Find a good value for MinPts. """ ########################################################################### # Count neighbors within threshold ########################################################################### print 'Calculating pair-wise distances...' # Calculate pair-wise cosine distance for all documents. t0 = time.time() DD = sklearn.metrics.pairwise.cosine_distances(ssearch.index.index) elapsed = time.time() - t0 print ' Took %.2f seconds' % elapsed print 'Counting number of neighbors...' t0 = time.time() # Create a list to hold the number of neighbors for each point. numNeighbors = [0]*len(DD) for i in range(0, len(DD)): dists = DD[i] count = 0 for j in range(0, len(DD)): if (dists[j] < eps): count += 1 numNeighbors[i] = count elapsed = time.time() - t0 print ' Took %.2f seconds' % elapsed ############################################################################### # Histogram the nearest neighbor distances. ############################################################################### import matplotlib.pyplot as plt counts, bins, patches = plt.hist(numNeighbors, bins=60) plt.title("Number of neighbors") plt.xlabel("Number of neighbors") plt.ylabel("Frequency") print '\n%d bins:' % (len(bins) - 1) binsStr = '' for b in bins: binsStr += ' %0.2f' % b print binsStr