Python umap.UMAP Examples
The following are 30
code examples of umap.UMAP().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
umap
, or try the search function
.
Example #1
Source File: helper.py From BERMUDA with MIT License | 6 votes |
def cal_UMAP(code, pca_dim = 50, n_neighbors = 30, min_dist=0.1, n_components=2, metric='cosine'): """ Calculate UMAP dimensionality reduction Args: code: num_cells * num_features pca_dim: if dimensionality of code > pca_dim, apply PCA first n_neighbors: UMAP parameter min_dist: UMAP parameter n_components: UMAP parameter metric: UMAP parameter Returns: umap_code: num_cells * n_components """ if code.shape[1] > pca_dim: pca = PCA(n_components=pca_dim) code = pca.fit_transform(code) fit = umap.UMAP(n_neighbors=n_neighbors, min_dist=min_dist, n_components=n_components, metric=metric, random_state=0) umap_code = fit.fit_transform(code) return umap_code
Example #2
Source File: EmbeddingsResolver.py From scattertext with Apache License 2.0 | 6 votes |
def project_embeddings(self, projection_model=None, x_dim=0, y_dim=1): ''' :param projection_model: sklearn unsupervised model (e.g., PCA) by default the recommended model is umap.UMAP, which requires UMAP in to be installed :param x_dim: int, default 0, dimension of transformation matrix for x-axis :param y_dim: int, default 1, dimension of transformation matrix for y-axis :return: ''' axes = self.project(projection_model) word_axes = (pd.DataFrame({'term': [w for w in self.vocab_], 'x': axes.T[x_dim], 'y': axes.T[y_dim]}) .set_index('term') .reindex(pd.Series(self.corpus_.get_terms())) .dropna()) self.corpus_ = self.corpus_.remove_terms(set(self.corpus_.get_terms()) - set(word_axes.index)) word_axes = word_axes.reindex(self.corpus_.get_terms()).dropna() return self.corpus_, word_axes
Example #3
Source File: EmbeddingsResolver.py From scattertext with Apache License 2.0 | 6 votes |
def project(self, projection_model=None): ''' :param projection_model: sklearn unsupervised model (e.g., PCA) by default the recommended model is umap.UMAP, which requires UMAP in to be installed :return: array, shape (num dimension, vocab size) ''' if self.embeddings_ is None: raise Exception("Run set_embeddings_model or set_embeddings to get embeddings") if projection_model is None: try: import umap except: raise Exception("Please install umap (pip install umap-learn) to use the default projection_model.") projection_model = umap.UMAP(min_dist=0.5, metric='cosine') axes = projection_model.fit_transform(self.embeddings_) return axes
Example #4
Source File: test_umap_trustworthiness.py From umap with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_discrete_metric_supervised_umap_trustworthiness(): data, labels = make_blobs(50, cluster_std=0.5, random_state=42) embedding = UMAP( n_neighbors=10, min_dist=0.01, target_metric="ordinal", target_weight=0.8, n_epochs=100, random_state=42, ).fit_transform(data, labels) trust = trustworthiness(data, embedding, 10) assert_greater_equal( trust, 0.95, "Insufficiently trustworthy embedding for" "blobs dataset: {}".format(trust), )
Example #5
Source File: train.py From B-SOID with GNU General Public License v3.0 | 6 votes |
def main(train_folders: list): """ :param train_folders: list, training data folders :return f_10fps: 2D array, features :return umap_embeddings: 2D array, embedded UMAP space :return nn_classifier: obj, MLP classifier :return scores: 1D array, cross-validated accuracy :return nn_assignments: neural net predictions """ import bsoid_umap.utils.likelihoodprocessing filenames, training_data, perc_rect = bsoid_umap.utils.likelihoodprocessing.main(train_folders) f_10fps, f_10fps_sc = bsoid_feats(training_data) trained_umap, umap_embeddings = bsoid_umap_embed(f_10fps_sc) hdb_assignments, soft_clusters, soft_assignments = bsoid_hdbscan(umap_embeddings) nn_classifier, scores, nn_assignments = bsoid_nn(f_10fps, soft_assignments) if PLOT: timestr = time.strftime("_%Y%m%d_%H%M") fig1 = plot_classes(umap_embeddings[hdb_assignments >= 0], hdb_assignments[hdb_assignments >= 0]) my_file1 = 'hdb_soft_assignments' fig1.savefig(os.path.join(OUTPUT_PATH, str.join('', (my_file1, timestr, '.svg')))) plot_accuracy(scores) return f_10fps, f_10fps_sc, umap_embeddings, hdb_assignments, soft_assignments, soft_clusters, \ nn_classifier, scores, nn_assignments
Example #6
Source File: test_umap_on_iris.py From umap with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_umap_sparse_transform_on_iris(iris, iris_selection): data = sparse.csr_matrix(iris.data[iris_selection]) assert sparse.issparse(data) fitter = UMAP( n_neighbors=10, min_dist=0.01, random_state=42, n_epochs=100, force_approximation_algorithm=True, ).fit(data) new_data = sparse.csr_matrix(iris.data[~iris_selection]) assert sparse.issparse(new_data) embedding = fitter.transform(new_data) trust = trustworthiness(new_data, embedding, 10) assert_greater_equal( trust, 0.80, "Insufficiently trustworthy transform for" "iris dataset: {}".format(trust), ) # UMAP Clusterability on Iris # ---------------------------
Example #7
Source File: test_umap_trustworthiness.py From umap with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_string_metric_supervised_umap_trustworthiness(): data, labels = make_blobs(50, cluster_std=0.5, random_state=42) labels = np.array(["this", "that", "other"])[labels] embedding = UMAP( n_neighbors=10, min_dist=0.01, target_metric="string", target_weight=0.8, n_epochs=100, random_state=42, ).fit_transform(data, labels) trust = trustworthiness(data, embedding, 10) assert_greater_equal( trust, 0.95, "Insufficiently trustworthy embedding for" "blobs dataset: {}".format(trust), )
Example #8
Source File: test_ingest.py From scanpy with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_ingest_map_embedding_umap(): adata_ref = sc.AnnData(X) adata_new = sc.AnnData(T) sc.pp.neighbors( adata_ref, method='umap', use_rep='X', n_neighbors=4, random_state=0 ) sc.tl.umap(adata_ref, random_state=0) ing = sc.tl.Ingest(adata_ref) ing.fit(adata_new) ing.map_embedding(method='umap') reducer = UMAP(min_dist=0.5, random_state=0, n_neighbors=4) reducer.fit(X) umap_transformed_t = reducer.transform(T) assert np.allclose(ing._obsm['X_umap'], umap_transformed_t)
Example #9
Source File: test_umap_trustworthiness.py From umap with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_metric_supervised_umap_trustworthiness(): data, labels = make_blobs(50, cluster_std=0.5, random_state=42) embedding = UMAP( n_neighbors=10, min_dist=0.01, target_metric="l1", target_weight=0.8, n_epochs=100, random_state=42, ).fit_transform(data, labels) trust = trustworthiness(data, embedding, 10) assert_greater_equal( trust, 0.95, "Insufficiently trustworthy embedding for" "blobs dataset: {}".format(trust), )
Example #10
Source File: train.py From B-SOID with GNU General Public License v3.0 | 6 votes |
def bsoid_hdbscan(umap_embeddings, hdbscan_params=HDBSCAN_PARAMS): """ Trains HDBSCAN (unsupervised) given learned UMAP space :param umap_embeddings: 2D array, embedded UMAP space :param hdbscan_params: dict, HDBSCAN params in GLOBAL_CONFIG :return assignments: HDBSCAN assignments """ highest_numulab = -np.infty numulab = [] min_cluster_range = range(6, 21) logging.info('Running HDBSCAN on {} instances in {} D space...'.format(*umap_embeddings.shape)) for min_c in min_cluster_range: trained_classifier = hdbscan.HDBSCAN(prediction_data=True, min_cluster_size=int(round(0.001 * min_c * umap_embeddings.shape[0])), **hdbscan_params).fit(umap_embeddings) numulab.append(len(np.unique(trained_classifier.labels_))) if numulab[-1] > highest_numulab: logging.info('Adjusting minimum cluster size to maximize cluster number...') highest_numulab = numulab[-1] best_clf = trained_classifier assignments = best_clf.labels_ soft_clusters = hdbscan.all_points_membership_vectors(best_clf) soft_assignments = np.argmax(soft_clusters, axis=1) logging.info('Done predicting labels for {} instances in {} D space...'.format(*umap_embeddings.shape)) return assignments, soft_clusters, soft_assignments
Example #11
Source File: train.py From B-SOID with GNU General Public License v3.0 | 6 votes |
def bsoid_umap_embed(f_10fps_sc, umap_params=UMAP_PARAMS): """ Trains UMAP (unsupervised) given a set of features based on (x,y) positions :param f_10fps_sc: 2D array, standardized/session features :param umap_params: dict, UMAP params in GLOBAL_CONFIG :return trained_umap: object, trained UMAP transformer :return umap_embeddings: 2D array, embedded UMAP space """ feats_train = f_10fps_sc.T logging.info('Transforming all {} instances from {} D into {} D'.format(feats_train.shape[0], feats_train.shape[1], umap_params.get('n_components'))) trained_umap = umap.UMAP(n_neighbors=int(round(np.sqrt(feats_train.shape[0]))), # power law **umap_params).fit(feats_train) umap_embeddings = trained_umap.embedding_ logging.info('Done non-linear transformation with UMAP from {} D into {} D.'.format(feats_train.shape[1], umap_embeddings.shape[1])) return trained_umap, umap_embeddings
Example #12
Source File: umap_reconstruction.py From scikit-lego with MIT License | 6 votes |
def fit(self, X, y=None): """ Fit the model using X as training data. :param X: array-like, shape=(n_columns, n_samples,) training data. :param y: ignored but kept in for pipeline support :return: Returns an instance of self. """ X = check_array(X, estimator=self, dtype=FLOAT_DTYPES) if self.n_components < 2: raise ValueError("Number of components must be at least two.") if not self.threshold: raise ValueError(f"The `threshold` value cannot be `None`.") self.umap_ = umap.UMAP( n_components=self.n_components, n_neighbors=self.n_neighbors, min_dist=self.min_dist, metric=self.metric, random_state=self.random_state, ) self.umap_.fit(X, y) self.offset_ = -self.threshold return self
Example #13
Source File: test_umap_validation_params.py From umap with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_umap_bad_init(nn_data): u = UMAP(init="foobar") assert_raises(ValueError, u.fit, nn_data)
Example #14
Source File: test_umap_validation_params.py From umap with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_umap_negative_sample_rate(nn_data): u = UMAP(negative_sample_rate=-1) assert_raises(ValueError, u.fit, nn_data)
Example #15
Source File: test_umap_validation_params.py From umap with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_umap_bad_metric(nn_data): u = UMAP(metric=45) assert_raises(ValueError, u.fit, nn_data)
Example #16
Source File: test_umap_validation_params.py From umap with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_umap_bad_numeric_init(nn_data): u = UMAP(init=42) assert_raises(ValueError, u.fit, nn_data)
Example #17
Source File: test_umap_validation_params.py From umap with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_umap_bad_matrix_init(nn_data): u = UMAP(init=np.array([[0, 0, 0], [0, 0, 0]])) assert_raises(ValueError, u.fit, nn_data)
Example #18
Source File: umap_view.py From phy with BSD 3-Clause "New" or "Revised" License | 5 votes |
def umap(x): """Perform the dimension reduction of the array x.""" from umap import UMAP return UMAP().fit_transform(x)
Example #19
Source File: visAnnos.py From scMatch with MIT License | 5 votes |
def CalCords(savefolder, em, visMethod): cordFile = os.path.join(savefolder, 'cords_%s.csv' % visMethod) if os.path.exists(cordFile): cords = pd.read_csv(cordFile, index_col=0, header=0) return cords x = em.T.values from sklearn.preprocessing import StandardScaler x = StandardScaler().fit_transform(x) from sklearn.decomposition import PCA pca = PCA(n_components=10) principalComponents = pca.fit_transform(x) if visMethod == 'PCA': cords = pd.DataFrame(data = principalComponents[:,:2], columns = ['x', 'y'], index = em.columns) elif visMethod == 'tSNE': from sklearn.manifold import TSNE tsneComponents = TSNE(n_components=2).fit_transform(principalComponents) cords = pd.DataFrame(data = tsneComponents, columns = ['x', 'y'], index = em.columns) elif visMethod == 'UMAP': import umap umapComponents = umap.UMAP(n_components=2).fit_transform(principalComponents) cords = pd.DataFrame(data = umapComponents, columns = ['x', 'y'], index = em.columns) cords.index.name = 'barcode' cords.to_csv(cordFile, index=True, header=True) return cords
Example #20
Source File: umap_view.py From phy with BSD 3-Clause "New" or "Revised" License | 5 votes |
def attach_to_controller(self, controller): def coords(cluster_ids): """Must return a Bunch object with pos, spike_ids, spike_clusters.""" # We select 200 spikes from the selected clusters. # WARNING: lasso and split will work but will *only split the shown subselection* of # spikes. You should use the `load_all` keyword argument to `coords()` to load all # spikes before computing the spikes inside the lasso, however (1) this could be # prohibitely long with UMAP, and (2) the coordinates will change when reperforming # the dimension reduction on all spikes, so the splitting would be meaningless anyway. # A warning is displayed when trying to split on a view that does not accept the # `load_all` keyword argument, because it means that all relevant spikes (even not # shown ones) are not going to be split. spike_ids = controller.selector(200, cluster_ids) # We get the cluster ids corresponding to the chosen spikes. spike_clusters = controller.supervisor.clustering.spike_clusters[spike_ids] # We get the waveforms of the spikes, across all channels so that we use the # same dimensions for every cluster. data = controller.model.get_waveforms(spike_ids, None) # We reshape the array as a 2D array so that we can pass it to the t-SNE algorithm. (n_spikes, n_samples, n_channels) = data.shape data = data.transpose((0, 2, 1)) # get an (n_spikes, n_channels, n_samples) array data = data.reshape((n_spikes, n_samples * n_channels)) # We perform the dimension reduction. pos = umap(data) return Bunch(pos=pos, spike_ids=spike_ids, spike_clusters=spike_clusters) def create_view(): """Create and return a histogram view.""" return WaveformUMAPView(coords=controller.context.cache(coords)) # Maps a view name to a function that returns a view # when called with no argument. controller.view_creator['WaveformUMAPView'] = create_view
Example #21
Source File: test_umap_validation_params.py From umap with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_umap_negative_learning_rate(nn_data): u = UMAP(learning_rate=-1.5) assert_raises(ValueError, u.fit, nn_data)
Example #22
Source File: test_umap_validation_params.py From umap with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_umap_too_many_neighbors_warns(nn_data): u = UMAP(a=1.2, b=1.75, n_neighbors=2000, n_epochs=11, init="random") u.fit( nn_data[:100,] ) assert_equal(u._a, 1.2) assert_equal(u._b, 1.75)
Example #23
Source File: test_umap_validation_params.py From umap with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_umap_negative_n_neighbours(nn_data): u = UMAP(n_neighbors=-1) assert_raises(ValueError, u.fit, nn_data)
Example #24
Source File: test_umap_validation_params.py From umap with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_umap_too_small_n_neighbours(nn_data): u = UMAP(n_neighbors=0.5) assert_raises(ValueError, u.fit, nn_data)
Example #25
Source File: test_umap_validation_params.py From umap with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_umap_non_integer_n_components(nn_data): u = UMAP(n_components=1.5) assert_raises(ValueError, u.fit, nn_data)
Example #26
Source File: test_umap_validation_params.py From umap with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_umap_negative_min_dist(nn_data): u = UMAP(min_dist=-1) assert_raises(ValueError, u.fit, nn_data)
Example #27
Source File: test_umap_validation_params.py From umap with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_umap_bad_too_large_min_dist(nn_data): u = UMAP(min_dist=2.0) # a RuntimeWarning about division by zero in a,b curve fitting is expected # caught and ignored for this test with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) assert_raises(ValueError, u.fit, nn_data)
Example #28
Source File: test_umap_validation_params.py From umap with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_umap_too_large_op(nn_data): u = UMAP(set_op_mix_ratio=1.5) assert_raises(ValueError, u.fit, nn_data)
Example #29
Source File: test_umap_validation_params.py From umap with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_umap_negative_op(nn_data): u = UMAP(set_op_mix_ratio=-1.0) assert_raises(ValueError, u.fit, nn_data)
Example #30
Source File: test_umap_repeated_data.py From umap with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_repeated_points_large_n(repetition_dense): model = UMAP(n_neighbors=5, unique=True).fit(repetition_dense) assert_equal(model._n_neighbors, 3)