Python scipy.cluster.hierarchy.to_tree() Examples
The following are 11
code examples of scipy.cluster.hierarchy.to_tree().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
, or try the search function
Example #1
Source File: From KitNET-py with MIT License | 5 votes |
def cluster(self,maxClust): D = self.corrDist() Z = linkage(D[np.triu_indices(self.n, 1)]) # create a linkage matrix based on the distance matrix if maxClust < 1: maxClust = 1 if maxClust > self.n: maxClust = self.n map = self.__breakClust__(to_tree(Z),maxClust) return map # a recursive helper function which breaks down the dendrogram branches until all clusters have no more than maxClust elements
Example #2
Source File: From anvio with GNU General Public License v3.0 | 5 votes |
def get_clustering_as_tree(vectors, linkage=constants.linkage_method_default, distance=constants.distance_metric_default, progress=progress): is_distance_and_linkage_compatible(distance, linkage) progress.update('Clustering data with "%s" linkage using "%s" distance' % (linkage, distance)) linkage = hierarchy.linkage(vectors, metric=distance, method=linkage) progress.update('Recovering the tree from the clustering result') tree = hierarchy.to_tree(linkage, rd=False) return tree
Example #3
Source File: From Kitsune-py with MIT License | 5 votes |
def cluster(self,maxClust): D = self.corrDist() Z = linkage(D[np.triu_indices(self.n, 1)]) # create a linkage matrix based on the distance matrix if maxClust < 1: maxClust = 1 if maxClust > self.n: maxClust = self.n map = self.__breakClust__(to_tree(Z),maxClust) return map # a recursive helper function which breaks down the dendrogram branches until all clusters have no more than maxClust elements
Example #4
Source File: From scedar with MIT License | 5 votes |
def hct_from_lkg(hac_z): return HClustTree(sch.to_tree(hac_z))
Example #5
Source File: From scedar with MIT License | 5 votes |
def test_mirac_wrong_args(self): x = np.zeros((10, 10)) # wrong min_cl_n with pytest.raises(ValueError) as excinfo: cluster.MIRAC(x, metric='euclidean', min_cl_n=-0.1) with pytest.raises(ValueError) as excinfo: cluster.MIRAC(x, metric='euclidean', min_cl_n=-0.1) # wrong cl_mdl_scale_factor with pytest.raises(ValueError) as excinfo: cluster.MIRAC(x, metric='euclidean', cl_mdl_scale_factor=-0.1) # wrong encode type with pytest.raises(ValueError) as excinfo: cluster.MIRAC(x, metric='euclidean', encode_type='1') with pytest.raises(ValueError) as excinfo: cluster.MIRAC(x, metric='euclidean', encode_type=1) with pytest.raises(ValueError) as excinfo: cluster.MIRAC(x, metric='euclidean', dim_reduct_method='NONN') # hac tree n_leaves different from n_samples z = sch.linkage([[0], [5], [6], [8], [9], [12]], method='single', optimal_ordering=True) hct = eda.HClustTree(sch.to_tree(z)) with pytest.raises(ValueError) as excinfo: cluster.MIRAC(x, metric='euclidean', hac_tree=hct) # no specific purpose. Just to exaust the coverage
Example #6
Source File: From scedar with MIT License | 5 votes |
def test_bi_partition_min_no_spl(self): # ____|____ 6 # | ___|____ 5 # | | __|___ 4 # | | | | # 3 2 1 0 z = sch.linkage([[0, 0], [1, 1], [3, 3], [6, 6]], metric='euclidean', method='complete', optimal_ordering=True) hct = eda.HClustTree(sch.to_tree(z)) assert hct.leaf_ids() == [3, 2, 1, 0] labs, sids, lst, rst = hct.bi_partition( soft_min_subtree_size=2, return_subtrees=True) assert labs == [0, 0, 1, 1] assert sids == [3, 2, 1, 0] # hct should be changed accordingly assert hct.leaf_ids() == [3, 2, 1, 0] assert hct.left_leaf_ids() == [3, 2] assert hct.right_leaf_ids() == [1, 0] # subtrees assert lst.leaf_ids() == [3, 2] assert rst.leaf_ids() == [1, 0] # prev assert lst._prev is hct assert rst._prev is hct # ids assert == 5 assert == 3 assert == 2 # ids assert == 4 assert == 1 assert == 0
Example #7
Source File: From scedar with MIT License | 5 votes |
def test_bi_partition_min_no_spl_lr_rev(self): # left right reversed # ____|____ 6 # | ___|____ 5 # | | __|___ 4 # | | | | # 3 2 1 0 z = sch.linkage([[0, 0], [1, 1], [3, 3], [6, 6]], metric='euclidean', method='complete', optimal_ordering=True) root = sch.to_tree(z) # reverse left right subtree root_left = root.left root.left = root.right root.right = root_left hct = eda.HClustTree(root) assert hct.leaf_ids() == [2, 1, 0, 3] labs, sids, lst, rst = hct.bi_partition( soft_min_subtree_size=2, return_subtrees=True) assert labs == [0, 0, 1, 1] assert sids == [2, 1, 0, 3] # hct should be changed accordingly assert hct.leaf_ids() == [2, 1, 0, 3] assert hct.left_leaf_ids() == [2, 1] assert hct.right_leaf_ids() == [0, 3] # subtrees assert lst.leaf_ids() == [2, 1] assert rst.leaf_ids() == [0, 3] # prev assert lst._prev is hct assert rst._prev is hct assert hct._left is lst._node assert hct._right is rst._node # ids assert == 4 assert == 0 assert == 3 # ids assert == 5 assert == 2 assert == 1
Example #8
Source File: From scedar with MIT License | 5 votes |
def test_bi_partition_min_spl(self): # _____|_____ # | ____|____ # | __|__ __|__ # | | | | | # 4 3 2 1 0 z = sch.linkage([[0, 0], [1, 1], [3, 3], [4, 4], [10, 10]], metric='euclidean', method='complete', optimal_ordering=True) hct = eda.HClustTree(sch.to_tree(z)) assert hct.leaf_ids() == [4, 3, 2, 1, 0] assert hct.left_leaf_ids() == [4] assert hct.right().left().leaf_ids() == [3, 2] assert hct.right().right().leaf_ids() == [1, 0] labs, sids, lst, rst = hct.bi_partition( soft_min_subtree_size=2, return_subtrees=True) assert labs == [0, 0, 0, 1, 1] assert sids == [4, 3, 2, 1, 0] # hct should be changed accordingly assert hct.leaf_ids() == [4, 3, 2, 1, 0] assert hct.left_leaf_ids() == [4, 3, 2] assert hct.right_leaf_ids() == [1, 0] # left assert lst._prev is hct assert == 4 assert == 3 assert == 2 # right assert rst._prev is hct assert == 1 assert == 0
Example #9
Source File: From scedar with MIT License | 5 votes |
def test_bi_partition_min_switch_spl(self): # _______|________ # | _____|_____ # | ____|____ | # | __|__ __|__ | # | | | | | | # 0 1 2 3 4 5 # round 1: ( ((0, (1, 2)), (3, 4)), (5) ) # round 2: ( (0, (1, 2), (3, (4, 5)) ) z = sch.linkage([[0], [5], [6], [8], [9], [12]], method='single', optimal_ordering=True) root = sch.to_tree(z) assert == 0 assert == 5 assert == 1 assert == 2 assert == 3 assert == 4 hct = eda.HClustTree(root) labs, sids, lst, rst = hct.bi_partition( soft_min_subtree_size=3, return_subtrees=True) assert labs == [0, 0, 0, 1, 1, 1] assert sids == [0, 1, 2, 3, 4, 5] # lst assert hct._left is lst._node assert lst._prev is hct assert lst.left_leaf_ids() == [0] assert lst.right_leaf_ids() == [1, 2] # rst assert hct._right is rst._node assert rst._prev is hct assert rst.left_leaf_ids() == [3] assert rst.right_leaf_ids() == [4, 5]
Example #10
Source File: From CompareM with GNU General Public License v3.0 | 4 votes |
def run(self, pairwise_value_file, method, similarity, max_sim_value, name_col1, name_col2, value_col, output_tree): """Perform hierarchical clustering on pairwise value files. Parameters ---------- pairwise_value_file : str File with pairwise similarity or dissimilarity values. method : str Clustering method to use. similarity : boolean Flag indicating file contain similarity values. max_sim_value : float Maximum value of similarity scores. name_col1 : int Index of first column with genome names. name_col2 : int Index of second column with genome names. value_col : int Index of column with similarity or dissimilarity values. """ diss_vector, genome_labels = self._parse_data(pairwise_value_file, name_col1, name_col2, value_col, similarity, max_sim_value) clusters = hierarchy.linkage(diss_vector, method=method) tree = hierarchy.to_tree(clusters) newick_str = self._save_newick(tree, "", tree.dist, genome_labels) fout = open(output_tree, 'w') fout.write(newick_str + '\n') fout.close()
Example #11
Source File: From SqueezeMeta with GNU General Public License v3.0 | 4 votes |
def run(self, pairwise_value_file, method, similarity, max_sim_value, name_col1, name_col2, value_col, output_tree): """Perform hierarchical clustering on pairwise value files. Parameters ---------- pairwise_value_file : str File with pairwise similarity or dissimilarity values. method : str Clustering method to use. similarity : boolean Flag indicating file contain similarity values. max_sim_value : float Maximum value of similarity scores. name_col1 : int Index of first column with genome names. name_col2 : int Index of second column with genome names. value_col : int Index of column with similarity or dissimilarity values. """ diss_vector, genome_labels = self._parse_data(pairwise_value_file, name_col1, name_col2, value_col, similarity, max_sim_value) clusters = hierarchy.linkage(diss_vector, method=method) tree = hierarchy.to_tree(clusters) newick_str = self._save_newick(tree, "", tree.dist, genome_labels) fout = open(output_tree, 'w') fout.write(newick_str + '\n') fout.close()