Python Examples of scipy.cluster.hierarchy.to

Source File: corClust.py From KitNET-py with MIT License

5 votes

def cluster(self,maxClust):
        D = self.corrDist()
        Z = linkage(D[np.triu_indices(self.n, 1)])  # create a linkage matrix based on the distance matrix
        if maxClust < 1:
            maxClust = 1
        if maxClust > self.n:
            maxClust = self.n
        map = self.__breakClust__(to_tree(Z),maxClust)
        return map

    # a recursive helper function which breaks down the dendrogram branches until all clusters have no more than maxClust elements

Source File: clustering.py From anvio with GNU General Public License v3.0

5 votes

def get_clustering_as_tree(vectors, linkage=constants.linkage_method_default, distance=constants.distance_metric_default, progress=progress):
    is_distance_and_linkage_compatible(distance, linkage)

    progress.update('Clustering data with "%s" linkage using "%s" distance' % (linkage, distance))
    linkage = hierarchy.linkage(vectors, metric=distance, method=linkage)

    progress.update('Recovering the tree from the clustering result')
    tree = hierarchy.to_tree(linkage, rd=False)

    return tree

Source File: corClust.py From Kitsune-py with MIT License

5 votes

def cluster(self,maxClust):
        D = self.corrDist()
        Z = linkage(D[np.triu_indices(self.n, 1)])  # create a linkage matrix based on the distance matrix
        if maxClust < 1:
            maxClust = 1
        if maxClust > self.n:
            maxClust = self.n
        map = self.__breakClust__(to_tree(Z),maxClust)
        return map

    # a recursive helper function which breaks down the dendrogram branches until all clusters have no more than maxClust elements

Source File: sdm.py From scedar with MIT License

5 votes

def hct_from_lkg(hac_z):
        return HClustTree(sch.to_tree(hac_z))

Source File: test_mirac.py From scedar with MIT License

5 votes

def test_mirac_wrong_args(self):
        x = np.zeros((10, 10))
        # wrong min_cl_n
        with pytest.raises(ValueError) as excinfo:
            cluster.MIRAC(x, metric='euclidean', min_cl_n=-0.1)

        with pytest.raises(ValueError) as excinfo:
            cluster.MIRAC(x, metric='euclidean', min_cl_n=-0.1)
        # wrong cl_mdl_scale_factor
        with pytest.raises(ValueError) as excinfo:
            cluster.MIRAC(x, metric='euclidean', cl_mdl_scale_factor=-0.1)
        # wrong encode type
        with pytest.raises(ValueError) as excinfo:
            cluster.MIRAC(x, metric='euclidean', encode_type='1')

        with pytest.raises(ValueError) as excinfo:
            cluster.MIRAC(x, metric='euclidean', encode_type=1)

        with pytest.raises(ValueError) as excinfo:
            cluster.MIRAC(x, metric='euclidean', dim_reduct_method='NONN')

        # hac tree n_leaves different from n_samples
        z = sch.linkage([[0], [5], [6], [8], [9], [12]],
                        method='single', optimal_ordering=True)
        hct = eda.HClustTree(sch.to_tree(z))
        with pytest.raises(ValueError) as excinfo:
            cluster.MIRAC(x, metric='euclidean', hac_tree=hct)

    # no specific purpose. Just to exaust the coverage

Source File: test_dense_sdm.py From scedar with MIT License

5 votes

def test_bi_partition_min_no_spl(self):
        # ____|____ 6
        # |    ___|____ 5
        # |    |    __|___ 4
        # |    |    |    |
        # 3    2    1    0
        z = sch.linkage([[0, 0], [1, 1], [3, 3], [6, 6]],
                        metric='euclidean', method='complete',
                        optimal_ordering=True)
        hct = eda.HClustTree(sch.to_tree(z))
        assert hct.leaf_ids() == [3, 2, 1, 0]
        labs, sids, lst, rst = hct.bi_partition(
            soft_min_subtree_size=2, return_subtrees=True)
        assert labs == [0, 0, 1, 1]
        assert sids == [3, 2, 1, 0]
        # hct should be changed accordingly
        assert hct.leaf_ids() == [3, 2, 1, 0]
        assert hct.left_leaf_ids() == [3, 2]
        assert hct.right_leaf_ids() == [1, 0]
        # subtrees
        assert lst.leaf_ids() == [3, 2]
        assert rst.leaf_ids() == [1, 0]
        # prev
        assert lst._prev is hct
        assert rst._prev is hct
        # ids
        assert lst._node.id == 5
        assert lst._node.left.id == 3
        assert lst._node.right.id == 2
        # ids
        assert rst._node.id == 4
        assert rst._node.left.id == 1
        assert rst._node.right.id == 0

Source File: test_dense_sdm.py From scedar with MIT License

5 votes

def test_bi_partition_min_no_spl_lr_rev(self):
        # left right reversed
        # ____|____ 6
        # |    ___|____ 5
        # |    |    __|___ 4
        # |    |    |    |
        # 3    2    1    0
        z = sch.linkage([[0, 0], [1, 1], [3, 3], [6, 6]],
                        metric='euclidean', method='complete',
                        optimal_ordering=True)
        root = sch.to_tree(z)
        # reverse left right subtree
        root_left = root.left
        root.left = root.right
        root.right = root_left
        hct = eda.HClustTree(root)
        assert hct.leaf_ids() == [2, 1, 0, 3]
        labs, sids, lst, rst = hct.bi_partition(
            soft_min_subtree_size=2, return_subtrees=True)
        assert labs == [0, 0, 1, 1]
        assert sids == [2, 1, 0, 3]
        # hct should be changed accordingly
        assert hct.leaf_ids() == [2, 1, 0, 3]
        assert hct.left_leaf_ids() == [2, 1]
        assert hct.right_leaf_ids() == [0, 3]
        # subtrees
        assert lst.leaf_ids() == [2, 1]
        assert rst.leaf_ids() == [0, 3]
        # prev
        assert lst._prev is hct
        assert rst._prev is hct
        assert hct._left is lst._node
        assert hct._right is rst._node
        # ids
        assert rst._node.id == 4
        assert rst._node.left.id == 0
        assert rst._node.right.id == 3
        # ids
        assert lst._node.id == 5
        assert lst._node.left.id == 2
        assert lst._node.right.id == 1

Source File: test_dense_sdm.py From scedar with MIT License

5 votes

def test_bi_partition_min_spl(self):
        # _____|_____
        # |     ____|____
        # |   __|__   __|__
        # |   |   |   |   |
        # 4   3   2   1   0
        z = sch.linkage([[0, 0], [1, 1], [3, 3], [4, 4], [10, 10]],
                        metric='euclidean', method='complete',
                        optimal_ordering=True)
        hct = eda.HClustTree(sch.to_tree(z))
        assert hct.leaf_ids() == [4, 3, 2, 1, 0]
        assert hct.left_leaf_ids() == [4]
        assert hct.right().left().leaf_ids() == [3, 2]
        assert hct.right().right().leaf_ids() == [1, 0]
        labs, sids, lst, rst = hct.bi_partition(
            soft_min_subtree_size=2, return_subtrees=True)
        assert labs == [0, 0, 0, 1, 1]
        assert sids == [4, 3, 2, 1, 0]
        # hct should be changed accordingly
        assert hct.leaf_ids() == [4, 3, 2, 1, 0]
        assert hct.left_leaf_ids() == [4, 3, 2]
        assert hct.right_leaf_ids() == [1, 0]
        # left
        assert lst._prev is hct
        assert lst._node.left.left.id == 4
        assert lst._node.left.right.id == 3
        assert lst._node.right.id == 2
        # right
        assert rst._prev is hct
        assert rst._node.left.id == 1
        assert rst._node.right.id == 0

Source File: test_dense_sdm.py From scedar with MIT License

5 votes

def test_bi_partition_min_switch_spl(self):
        # _______|________
        # |         _____|_____
        # |     ____|____     |
        # |   __|__   __|__   |
        # |   |   |   |   |   |
        # 0   1   2   3   4   5
        # round 1: ( ((0, (1, 2)), (3, 4)), (5) )
        # round 2: ( (0, (1, 2), (3, (4, 5)) )
        z = sch.linkage([[0], [5], [6], [8], [9], [12]],
                        method='single', optimal_ordering=True)
        root = sch.to_tree(z)
        assert root.left.id == 0
        assert root.right.right.id == 5
        assert root.right.left.left.left.id == 1
        assert root.right.left.left.right.id == 2
        assert root.right.left.right.left.id == 3
        assert root.right.left.right.right.id == 4
        hct = eda.HClustTree(root)
        labs, sids, lst, rst = hct.bi_partition(
            soft_min_subtree_size=3, return_subtrees=True)
        assert labs == [0, 0, 0, 1, 1, 1]
        assert sids == [0, 1, 2, 3, 4, 5]
        # lst
        assert hct._left is lst._node
        assert lst._prev is hct
        assert lst.left_leaf_ids() == [0]
        assert lst.right_leaf_ids() == [1, 2]
        # rst
        assert hct._right is rst._node
        assert rst._prev is hct
        assert rst.left_leaf_ids() == [3]
        assert rst.right_leaf_ids() == [4, 5]

Source File: hierarchical_clustering.py From CompareM with GNU General Public License v3.0

4 votes

def run(self, pairwise_value_file,
                    method, 
                    similarity,
                    max_sim_value,
                    name_col1,
                    name_col2,
                    value_col,
                    output_tree):
        """Perform hierarchical clustering on pairwise value files.

        Parameters
        ----------
        pairwise_value_file : str
            File with pairwise similarity or dissimilarity values.
        method : str
            Clustering method to use.
        similarity : boolean
            Flag indicating file contain similarity values.
        max_sim_value : float   
            Maximum value of similarity scores.
        name_col1 : int
            Index of first column with genome names.
        name_col2 : int
            Index of second column with genome names.
        value_col : int
            Index of column with similarity or dissimilarity values.
        """
        
        diss_vector, genome_labels = self._parse_data(pairwise_value_file, 
                                                        name_col1, 
                                                        name_col2, 
                                                        value_col, 
                                                        similarity, 
                                                        max_sim_value)
        
        clusters = hierarchy.linkage(diss_vector, method=method)

        tree = hierarchy.to_tree(clusters)
        newick_str = self._save_newick(tree, "", tree.dist, genome_labels)
        
        fout = open(output_tree, 'w')
        fout.write(newick_str + '\n')
        fout.close()

Source File: hierarchical_clustering.py From SqueezeMeta with GNU General Public License v3.0

4 votes

def run(self, pairwise_value_file,
                    method, 
                    similarity,
                    max_sim_value,
                    name_col1,
                    name_col2,
                    value_col,
                    output_tree):
        """Perform hierarchical clustering on pairwise value files.

        Parameters
        ----------
        pairwise_value_file : str
            File with pairwise similarity or dissimilarity values.
        method : str
            Clustering method to use.
        similarity : boolean
            Flag indicating file contain similarity values.
        max_sim_value : float   
            Maximum value of similarity scores.
        name_col1 : int
            Index of first column with genome names.
        name_col2 : int
            Index of second column with genome names.
        value_col : int
            Index of column with similarity or dissimilarity values.
        """
        
        diss_vector, genome_labels = self._parse_data(pairwise_value_file, 
                                                        name_col1, 
                                                        name_col2, 
                                                        value_col, 
                                                        similarity, 
                                                        max_sim_value)
        
        clusters = hierarchy.linkage(diss_vector, method=method)

        tree = hierarchy.to_tree(clusters)
        newick_str = self._save_newick(tree, "", tree.dist, genome_labels)
        
        fout = open(output_tree, 'w')
        fout.write(newick_str + '\n')
        fout.close()

Python scipy.cluster.hierarchy.to_tree() Examples