Python scipy.cluster.hierarchy.to_tree() Examples

The following are 11 code examples of scipy.cluster.hierarchy.to_tree(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.cluster.hierarchy , or try the search function .
Example #1
Source File: corClust.py    From KitNET-py with MIT License 5 votes vote down vote up
def cluster(self,maxClust):
        D = self.corrDist()
        Z = linkage(D[np.triu_indices(self.n, 1)])  # create a linkage matrix based on the distance matrix
        if maxClust < 1:
            maxClust = 1
        if maxClust > self.n:
            maxClust = self.n
        map = self.__breakClust__(to_tree(Z),maxClust)
        return map

    # a recursive helper function which breaks down the dendrogram branches until all clusters have no more than maxClust elements 
Example #2
Source File: clustering.py    From anvio with GNU General Public License v3.0 5 votes vote down vote up
def get_clustering_as_tree(vectors, linkage=constants.linkage_method_default, distance=constants.distance_metric_default, progress=progress):
    is_distance_and_linkage_compatible(distance, linkage)

    progress.update('Clustering data with "%s" linkage using "%s" distance' % (linkage, distance))
    linkage = hierarchy.linkage(vectors, metric=distance, method=linkage)

    progress.update('Recovering the tree from the clustering result')
    tree = hierarchy.to_tree(linkage, rd=False)

    return tree 
Example #3
Source File: corClust.py    From Kitsune-py with MIT License 5 votes vote down vote up
def cluster(self,maxClust):
        D = self.corrDist()
        Z = linkage(D[np.triu_indices(self.n, 1)])  # create a linkage matrix based on the distance matrix
        if maxClust < 1:
            maxClust = 1
        if maxClust > self.n:
            maxClust = self.n
        map = self.__breakClust__(to_tree(Z),maxClust)
        return map

    # a recursive helper function which breaks down the dendrogram branches until all clusters have no more than maxClust elements 
Example #4
Source File: sdm.py    From scedar with MIT License 5 votes vote down vote up
def hct_from_lkg(hac_z):
        return HClustTree(sch.to_tree(hac_z)) 
Example #5
Source File: test_mirac.py    From scedar with MIT License 5 votes vote down vote up
def test_mirac_wrong_args(self):
        x = np.zeros((10, 10))
        # wrong min_cl_n
        with pytest.raises(ValueError) as excinfo:
            cluster.MIRAC(x, metric='euclidean', min_cl_n=-0.1)

        with pytest.raises(ValueError) as excinfo:
            cluster.MIRAC(x, metric='euclidean', min_cl_n=-0.1)
        # wrong cl_mdl_scale_factor
        with pytest.raises(ValueError) as excinfo:
            cluster.MIRAC(x, metric='euclidean', cl_mdl_scale_factor=-0.1)
        # wrong encode type
        with pytest.raises(ValueError) as excinfo:
            cluster.MIRAC(x, metric='euclidean', encode_type='1')

        with pytest.raises(ValueError) as excinfo:
            cluster.MIRAC(x, metric='euclidean', encode_type=1)

        with pytest.raises(ValueError) as excinfo:
            cluster.MIRAC(x, metric='euclidean', dim_reduct_method='NONN')

        # hac tree n_leaves different from n_samples
        z = sch.linkage([[0], [5], [6], [8], [9], [12]],
                        method='single', optimal_ordering=True)
        hct = eda.HClustTree(sch.to_tree(z))
        with pytest.raises(ValueError) as excinfo:
            cluster.MIRAC(x, metric='euclidean', hac_tree=hct)

    # no specific purpose. Just to exaust the coverage 
Example #6
Source File: test_dense_sdm.py    From scedar with MIT License 5 votes vote down vote up
def test_bi_partition_min_no_spl(self):
        # ____|____ 6
        # |    ___|____ 5
        # |    |    __|___ 4
        # |    |    |    |
        # 3    2    1    0
        z = sch.linkage([[0, 0], [1, 1], [3, 3], [6, 6]],
                        metric='euclidean', method='complete',
                        optimal_ordering=True)
        hct = eda.HClustTree(sch.to_tree(z))
        assert hct.leaf_ids() == [3, 2, 1, 0]
        labs, sids, lst, rst = hct.bi_partition(
            soft_min_subtree_size=2, return_subtrees=True)
        assert labs == [0, 0, 1, 1]
        assert sids == [3, 2, 1, 0]
        # hct should be changed accordingly
        assert hct.leaf_ids() == [3, 2, 1, 0]
        assert hct.left_leaf_ids() == [3, 2]
        assert hct.right_leaf_ids() == [1, 0]
        # subtrees
        assert lst.leaf_ids() == [3, 2]
        assert rst.leaf_ids() == [1, 0]
        # prev
        assert lst._prev is hct
        assert rst._prev is hct
        # ids
        assert lst._node.id == 5
        assert lst._node.left.id == 3
        assert lst._node.right.id == 2
        # ids
        assert rst._node.id == 4
        assert rst._node.left.id == 1
        assert rst._node.right.id == 0 
Example #7
Source File: test_dense_sdm.py    From scedar with MIT License 5 votes vote down vote up
def test_bi_partition_min_no_spl_lr_rev(self):
        # left right reversed
        # ____|____ 6
        # |    ___|____ 5
        # |    |    __|___ 4
        # |    |    |    |
        # 3    2    1    0
        z = sch.linkage([[0, 0], [1, 1], [3, 3], [6, 6]],
                        metric='euclidean', method='complete',
                        optimal_ordering=True)
        root = sch.to_tree(z)
        # reverse left right subtree
        root_left = root.left
        root.left = root.right
        root.right = root_left
        hct = eda.HClustTree(root)
        assert hct.leaf_ids() == [2, 1, 0, 3]
        labs, sids, lst, rst = hct.bi_partition(
            soft_min_subtree_size=2, return_subtrees=True)
        assert labs == [0, 0, 1, 1]
        assert sids == [2, 1, 0, 3]
        # hct should be changed accordingly
        assert hct.leaf_ids() == [2, 1, 0, 3]
        assert hct.left_leaf_ids() == [2, 1]
        assert hct.right_leaf_ids() == [0, 3]
        # subtrees
        assert lst.leaf_ids() == [2, 1]
        assert rst.leaf_ids() == [0, 3]
        # prev
        assert lst._prev is hct
        assert rst._prev is hct
        assert hct._left is lst._node
        assert hct._right is rst._node
        # ids
        assert rst._node.id == 4
        assert rst._node.left.id == 0
        assert rst._node.right.id == 3
        # ids
        assert lst._node.id == 5
        assert lst._node.left.id == 2
        assert lst._node.right.id == 1 
Example #8
Source File: test_dense_sdm.py    From scedar with MIT License 5 votes vote down vote up
def test_bi_partition_min_spl(self):
        # _____|_____
        # |     ____|____
        # |   __|__   __|__
        # |   |   |   |   |
        # 4   3   2   1   0
        z = sch.linkage([[0, 0], [1, 1], [3, 3], [4, 4], [10, 10]],
                        metric='euclidean', method='complete',
                        optimal_ordering=True)
        hct = eda.HClustTree(sch.to_tree(z))
        assert hct.leaf_ids() == [4, 3, 2, 1, 0]
        assert hct.left_leaf_ids() == [4]
        assert hct.right().left().leaf_ids() == [3, 2]
        assert hct.right().right().leaf_ids() == [1, 0]
        labs, sids, lst, rst = hct.bi_partition(
            soft_min_subtree_size=2, return_subtrees=True)
        assert labs == [0, 0, 0, 1, 1]
        assert sids == [4, 3, 2, 1, 0]
        # hct should be changed accordingly
        assert hct.leaf_ids() == [4, 3, 2, 1, 0]
        assert hct.left_leaf_ids() == [4, 3, 2]
        assert hct.right_leaf_ids() == [1, 0]
        # left
        assert lst._prev is hct
        assert lst._node.left.left.id == 4
        assert lst._node.left.right.id == 3
        assert lst._node.right.id == 2
        # right
        assert rst._prev is hct
        assert rst._node.left.id == 1
        assert rst._node.right.id == 0 
Example #9
Source File: test_dense_sdm.py    From scedar with MIT License 5 votes vote down vote up
def test_bi_partition_min_switch_spl(self):
        # _______|________
        # |         _____|_____
        # |     ____|____     |
        # |   __|__   __|__   |
        # |   |   |   |   |   |
        # 0   1   2   3   4   5
        # round 1: ( ((0, (1, 2)), (3, 4)), (5) )
        # round 2: ( (0, (1, 2), (3, (4, 5)) )
        z = sch.linkage([[0], [5], [6], [8], [9], [12]],
                        method='single', optimal_ordering=True)
        root = sch.to_tree(z)
        assert root.left.id == 0
        assert root.right.right.id == 5
        assert root.right.left.left.left.id == 1
        assert root.right.left.left.right.id == 2
        assert root.right.left.right.left.id == 3
        assert root.right.left.right.right.id == 4
        hct = eda.HClustTree(root)
        labs, sids, lst, rst = hct.bi_partition(
            soft_min_subtree_size=3, return_subtrees=True)
        assert labs == [0, 0, 0, 1, 1, 1]
        assert sids == [0, 1, 2, 3, 4, 5]
        # lst
        assert hct._left is lst._node
        assert lst._prev is hct
        assert lst.left_leaf_ids() == [0]
        assert lst.right_leaf_ids() == [1, 2]
        # rst
        assert hct._right is rst._node
        assert rst._prev is hct
        assert rst.left_leaf_ids() == [3]
        assert rst.right_leaf_ids() == [4, 5] 
Example #10
Source File: hierarchical_clustering.py    From CompareM with GNU General Public License v3.0 4 votes vote down vote up
def run(self, pairwise_value_file,
                    method, 
                    similarity,
                    max_sim_value,
                    name_col1,
                    name_col2,
                    value_col,
                    output_tree):
        """Perform hierarchical clustering on pairwise value files.

        Parameters
        ----------
        pairwise_value_file : str
            File with pairwise similarity or dissimilarity values.
        method : str
            Clustering method to use.
        similarity : boolean
            Flag indicating file contain similarity values.
        max_sim_value : float   
            Maximum value of similarity scores.
        name_col1 : int
            Index of first column with genome names.
        name_col2 : int
            Index of second column with genome names.
        value_col : int
            Index of column with similarity or dissimilarity values.
        """
        
        diss_vector, genome_labels = self._parse_data(pairwise_value_file, 
                                                        name_col1, 
                                                        name_col2, 
                                                        value_col, 
                                                        similarity, 
                                                        max_sim_value)
        
        clusters = hierarchy.linkage(diss_vector, method=method)

        tree = hierarchy.to_tree(clusters)
        newick_str = self._save_newick(tree, "", tree.dist, genome_labels)
        
        fout = open(output_tree, 'w')
        fout.write(newick_str + '\n')
        fout.close() 
Example #11
Source File: hierarchical_clustering.py    From SqueezeMeta with GNU General Public License v3.0 4 votes vote down vote up
def run(self, pairwise_value_file,
                    method, 
                    similarity,
                    max_sim_value,
                    name_col1,
                    name_col2,
                    value_col,
                    output_tree):
        """Perform hierarchical clustering on pairwise value files.

        Parameters
        ----------
        pairwise_value_file : str
            File with pairwise similarity or dissimilarity values.
        method : str
            Clustering method to use.
        similarity : boolean
            Flag indicating file contain similarity values.
        max_sim_value : float   
            Maximum value of similarity scores.
        name_col1 : int
            Index of first column with genome names.
        name_col2 : int
            Index of second column with genome names.
        value_col : int
            Index of column with similarity or dissimilarity values.
        """
        
        diss_vector, genome_labels = self._parse_data(pairwise_value_file, 
                                                        name_col1, 
                                                        name_col2, 
                                                        value_col, 
                                                        similarity, 
                                                        max_sim_value)
        
        clusters = hierarchy.linkage(diss_vector, method=method)

        tree = hierarchy.to_tree(clusters)
        newick_str = self._save_newick(tree, "", tree.dist, genome_labels)
        
        fout = open(output_tree, 'w')
        fout.write(newick_str + '\n')
        fout.close()