Python Examples of sklearn.mixture

Source File: gmm.py From vampyre with MIT License

6 votes

def est_init(self, return_cost=False,ind_out=None,\
        avg_var_cost=True):
        """
        Initial estimator.
        
        See the base class :class:`vampyre.estim.base.Estim` for 
        a complete description.
        
        :param Boolean return_cost:  Flag indicating if :code:`cost` is 
            to be returned
        :returns: :code:`zmean, zvar, [cost]` which are the
            prior mean and variance
        """           
       
        # otherwise, use the mixture estimator
        return self.mix.est_init(return_cost,ind_out,avg_var_cost)

Source File: GMM.py From sprocket with MIT License

6 votes

def __init__(self, n_mix=32, n_iter=100, covtype='full'):
        self.n_mix = n_mix
        self.n_iter = n_iter
        self.covtype = covtype

        self.random_state = np.random.mtrand._rand

        # construct GMM parameter
        if self.covtype == 'full':
            self.param = sklearn.mixture.GaussianMixture(
                n_components=self.n_mix,
                covariance_type=self.covtype,
                max_iter=self.n_iter)
        elif self.covtype == 'block_diag':
            self.param = BlockDiagonalGaussianMixture(
                n_mix=self.n_mix,
                n_iter=self.n_iter)
        else:
            raise ValueError('Covariance type should be full or block_diag')

Source File: GMM.py From sprocket with MIT License

6 votes

def _gmmmap(self, sddata):
        # parameter for sequencial data
        T, sddim = sddata.shape

        # estimate posterior sequence
        wseq = self.pX.predict_proba(sddata)

        # estimate mixture sequence
        cseq = np.argmax(wseq, axis=1)

        mseq = np.zeros((T, sddim))
        covseq = np.zeros((T, sddim, sddim))
        for t in range(T):
            # read maximum likelihood mixture component in frame t
            m = cseq[t]

            # conditional mean vector sequence
            mseq[t] = self.meanY[m] + \
                self.A[m] @ (sddata[t] - self.meanX[m])

            # conditional covariance sequence
            covseq[t] = self.cond_cov_inv[m]

        return cseq, wseq, mseq, covseq

Source File: signal_binarize.py From NeuroKit with MIT License

5 votes

def _signal_binarize(signal, method="threshold", threshold="auto"):
    method = method.lower()  # remove capitalised letters
    if method == "threshold":
        binary = _signal_binarize_threshold(signal, threshold=threshold)
    elif method == "mixture":
        binary = _signal_binarize_mixture(signal, threshold=threshold)
    else:
        raise ValueError("NeuroKit error: signal_binarize(): 'method' should be one of 'threshold' or 'mixture'.")
    return binary


# =============================================================================
# Methods
# =============================================================================

Source File: signal_binarize.py From NeuroKit with MIT License

5 votes

def _signal_binarize_mixture(signal, threshold="auto"):
    if threshold == "auto":
        threshold = 0.5

    # fit a Gaussian Mixture Model with two components
    clf = sklearn.mixture.GaussianMixture(n_components=2, random_state=333)
    clf = clf.fit(signal.reshape(-1, 1))

    # Get predicted probabilities
    probability = clf.predict_proba(signal.reshape(-1, 1))[:, np.argmax(clf.means_[:, 0])]

    binary = np.zeros(len(signal))
    binary[probability >= threshold] = 1
    return binary

Source File: gmm.py From vampyre with MIT License

5 votes

def set_gmm_param(self,probc,meanc,varc):
        """
        Sets the GMM parameters for the mixture estimator
        """
        nc = len(probc)
                
        if self.mix is None:
            # If the mixture estimator does not exist, create it        
            # First, create the component Gaussian estimators
            est_list = []
            for i in range(nc):
                esti = GaussEst(meanc[i], varc[i], self.shape, 
                     var_axes = self.var_axes, zmean_axes='all',
                     is_complex=self.is_complex, map_est=self.map_est)
                est_list.append(esti)
                
            # Create the mixture 
            self.mix = MixEst(est_list,w=probc)                
            
        else:
            # If the mixture distribution is already created,
            # set the parameters of the mixture estimator
            self.probc = probc
            self.mix.w = np.copy(probc)
            for i in range(nc):
                esti = self.mix.est_list[i]
                if not self.mean_fix[i]:
                    esti.zmean = meanc[i]
                if not self.var_fix[i]:
                    esti.zvar = np.copy(varc[i])

Source File: gmm.py From vampyre with MIT License

5 votes

def update_gmm_em(self):
        """
        Updates the GMM parameters using EM estimation
        """
        
        # Get the posterior probabilities, means and variances from the mixture
        # estimator.  The lists have one element for each component in the mixture
        prob_list = self.mix.prob
        zmean_list = self.mix.zmean_list
        zvar_list = self.mix.zvar_list            
        
        # Compute new cluster probabilities mean and variances
        nc = len(prob_list)
        probc = np.zeros(nc)
        meanc = np.zeros(nc)
        varc = []
        for i in range(nc):
            probc[i] = np.mean(prob_list[i])
            meanc[i] = np.mean(prob_list[i]*zmean_list[i])/probc[i]
            dsq = zvar_list[i] + np.abs((zmean_list[i]-meanc[i]))**2
            varci = np.mean((prob_list[i]*dsq)/probc[i],axis=self.var_axes)
            varci = np.maximum(varci, self.zvarmin)
            varc.append(varci)
                

        # Set the parameters
        self.set_gmm_param(probc,meanc,varc)

Source File: GMM.py From sprocket with MIT License

5 votes

def train_singlepath(self, tar_jnt):
        """Fit GMM parameter based on single-path training
        M-step :
            Update GMM parameter using `self.log_resp`, and `tar_jnt`

        Parameters
        ----------
        tar_jnt: array, shape(`T`, `tar_dim`)
            Joint feature vector of original and target feature vector
            consisting of static and delta components, which will be modeled.

        Returns
        -------
        param :
            Sklean-based model parameters of the GMM

        """
        if self.covtype == 'full':
            single_param = sklearn.mixture.GaussianMixture(
                n_components=self.n_mix,
                covariance_type=self.covtype,
                max_iter=1)
        elif self.covtype == 'block_diag':
            single_param = BlockDiagonalGaussianMixture(
                n_mix=self.n_mix,
                n_iter=self.n_iter)
        else:
            raise ValueError('Covariance type should be full or block_diag')

        # initialize target single-path param
        single_param._initialize_parameters(tar_jnt, self.random_state)

        # perform m-step
        single_param._m_step(tar_jnt, self.log_resp)

        return single_param

Source File: GMM.py From sprocket with MIT License

5 votes

def _set_Ab(self):
        # calculate A and b from self.jmean, self.jcov
        sddim = self.jmean.shape[1] // 2

        # calculate inverse covariance for covariance XX in each mixture
        self.covXXinv = np.zeros((self.n_mix, sddim, sddim))
        for m in range(self.n_mix):
            self.covXXinv[m] = np.linalg.inv(self.covXX[m])

        # calculate A, b, and conditional covariance given X
        self.A = np.zeros((self.n_mix, sddim, sddim))
        self.b = np.zeros((self.n_mix, sddim))
        self.cond_cov_inv = np.zeros((self.n_mix, sddim, sddim))
        for m in range(self.n_mix):
            # calculate A (i.e., A = yxcov_m * xxcov_m^-1)
            self.A[m] = self.covYX[m] @ self.covXXinv[m]

            # calculate b (i.e., b = mean^Y - A * mean^X)
            self.b[m] = self.meanY[m] - self.A[m] @ self.meanX[m]

            # calculate conditional covariance
            # (i.e., cov^(Y|X)^-1 = (yycov - A * xycov)^-1)
            self.cond_cov_inv[m] = np.linalg.inv(self.covYY[
                m] - self.A[m] @ self.covXY[m])

        return

Source File: GMM.py From sprocket with MIT License

5 votes

def _set_pX(self):
        # probability density function of X
        self.pX = sklearn.mixture.GaussianMixture(
            n_components=self.n_mix, covariance_type='full')
        self.pX.weights_ = self.w
        self.pX.means_ = self.meanX
        self.pX.covariances_ = self.covXX

        # following function is required to estimate porsterior
        self.pX.precisions_cholesky_ = _compute_precision_cholesky(
            self.covXX, 'full')
        return

Source File: scdv.py From redshells with MIT License

5 votes

def __init__(self, documents: List[List[str]], cluster_size: int, sparsity_percentage: float, gaussian_mixture_kwargs: Dict[Any, Any],
                 dictionary: gensim.corpora.Dictionary, w2v: Union[FastText, Word2Vec]) -> None:
        """

        :param documents: documents for training.
        :param cluster_size:  word cluster size.
        :param sparsity_percentage: sparsity percentage. This must be in [0, 1].
        :param gaussian_mixture_kwargs: Arguments to build `sklearn.mixture.GaussianMixture` except cluster_size. Please see `sklearn.mixture.GaussianMixture.__init__` for details.
        :param dictionary: `gensim.corpora.Dictionary`. 
        """
        logger.info('_build_dictionary...')
        self._dictionary = dictionary
        vocabulary_size = len(self._dictionary.token2id)
        embedding_size = w2v.wv.vector_size

        logger.info('_build_word_embeddings...')
        self._word_embeddings = self._build_word_embeddings(self._dictionary, w2v)
        assert self._word_embeddings.shape == (vocabulary_size, embedding_size)

        logger.info('_build_word_cluster_probabilities...')
        self._word_cluster_probabilities = self._build_word_cluster_probabilities(self._word_embeddings, cluster_size, gaussian_mixture_kwargs)
        assert self._word_cluster_probabilities.shape == (vocabulary_size, cluster_size)

        logger.info('_build_idf...')
        self._idf = self._build_idf(self._dictionary)
        assert self._idf.shape == (vocabulary_size, )

        logger.info('_build_word_cluster_vectors...')
        word_cluster_vectors = self._build_word_cluster_vectors(self._word_embeddings, self._word_cluster_probabilities)
        assert word_cluster_vectors.shape == (vocabulary_size, cluster_size, embedding_size)

        logger.info('_build_word_topic_vectors...')
        word_topic_vectors = self._build_word_topic_vectors(self._idf, word_cluster_vectors)
        assert word_topic_vectors.shape == (vocabulary_size, (cluster_size * embedding_size))

        logger.info('_build_sparsity_threshold...')
        self._sparse_threshold = self._build_sparsity_threshold(word_topic_vectors, self._dictionary, documents, sparsity_percentage)

Source File: distribution.py From L2L with GNU General Public License v3.0

5 votes

def __init__(self, n_components=2, **kwargs):
        self.random_state = None
        self.bayesian_mixture = sklearn.mixture.BayesianGaussianMixture(
            n_components,
            weight_concentration_prior_type='dirichlet_distribution',
            random_state=self.random_state, **kwargs)
        # taken from check_fitted function of BaysianGaussianMixture in the sklearn repository
        self.parametrization = ('covariances_', 'means_', 'weight_concentration_', 'weights_',
                                'mean_precision_', 'degrees_of_freedom_', 'precisions_', 'precisions_cholesky_')
        self.n_components = n_components

Source File: signal_binarize.py From NeuroKit with MIT License

4 votes

def signal_binarize(signal, method="threshold", threshold="auto"):
    """Binarize a continuous signal.

    Convert a continuous signal into zeros and ones depending on a given threshold.

    Parameters
    ----------
    signal : Union[list, np.array, pd.Series]
        The signal (i.e., a time series) in the form of a vector of values.
    method : str
        The algorithm used to discriminate between the two states. Can be one of 'mixture' (default) or
        'threshold'. If 'mixture', will use a Gaussian Mixture Model to categorize between the two states.
        If 'threshold', will consider as activated all points which value is superior to the threshold.
    threshold : float
        If `method` is 'mixture', then it corresponds to the minimum probability required to be considered
        as activated (if 'auto', then 0.5). If `method` is 'threshold', then it corresponds to the minimum
        amplitude to detect as onset. If "auto", takes the value between the max and the min.

    Returns
    -------
    list
        A list or array depending on the type passed.

    Examples
    --------
    >>> import numpy as np
    >>> import pandas as pd
    >>> import neurokit2 as nk
    >>>
    >>> signal = np.cos(np.linspace(start=0, stop=20, num=1000))
    >>> binary = nk.signal_binarize(signal)
    >>> fig = pd.DataFrame({"Raw": signal, "Binary": binary}).plot()
    >>> fig #doctest: +SKIP

    """

    # Return appropriate type
    if isinstance(signal, list):
        binary = _signal_binarize(np.array(signal), method=method, threshold=threshold)
        signal = list(binary)
    elif isinstance(signal, pd.Series):
        signal = signal.copy()  # Avoid annoying pandas warning
        binary = _signal_binarize(signal.values, method=method, threshold=threshold)
        signal[:] = binary
    else:
        signal = _signal_binarize(signal, method=method, threshold=threshold)

    return signal

Python sklearn.mixture() Examples