Python sklearn.mixture.GaussianMixture() Examples

The following are 30 code examples of sklearn.mixture.GaussianMixture(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.mixture , or try the search function .
Example #1
Source File: clusterings.py    From parcellation_fragmenter with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def gmm(n_clusters, samples):

    """
    Run GMM clustering on vertex coordinates.

    Parameters:
    - - - - -
    n_clusters : int
        number of clusters to generate
    samples : array
        Euclidean-space coordinates of vertices
    """

    # Fit Gaussian Mixture Model
    gmm = mixture.GaussianMixture(
        n_components=n_clusters, covariance_type='tied', max_iter=1000,
        init_params='kmeans', verbose=0)
    gmm.fit(samples)

    labels = gmm.predict(samples)
    labels = labels.astype(np.int32)+1

    return labels 
Example #2
Source File: posterior.py    From scVI with MIT License 6 votes vote down vote up
def clustering_scores(self, prediction_algorithm: str = "knn") -> Tuple:
        if self.gene_dataset.n_labels > 1:
            latent, _, labels = self.get_latent()
            if prediction_algorithm == "knn":
                labels_pred = KMeans(
                    self.gene_dataset.n_labels, n_init=200
                ).fit_predict(
                    latent
                )  # n_jobs>1 ?
            elif prediction_algorithm == "gmm":
                gmm = GMM(self.gene_dataset.n_labels)
                gmm.fit(latent)
                labels_pred = gmm.predict(latent)

            asw_score = silhouette_score(latent, labels)
            nmi_score = NMI(labels, labels_pred)
            ari_score = ARI(labels, labels_pred)
            uca_score = unsupervised_clustering_accuracy(labels, labels_pred)[0]
            logger.debug(
                "Clustering Scores:\nSilhouette: %.4f\nNMI: %.4f\nARI: %.4f\nUCA: %.4f"
                % (asw_score, nmi_score, ari_score, uca_score)
            )
            return asw_score, nmi_score, ari_score, uca_score 
Example #3
Source File: differential_entropies.py    From geosketch with MIT License 6 votes vote down vote up
def differential_entropies(X, labels):
    n_samples, n_features = X.shape
    
    labels = np.array(labels)
    names = sorted(set(labels))

    entropies = []
    
    for name in names:
        name_idx = np.where(labels == name)[0]

        gm = GaussianMixture().fit(X[name_idx, :])

        mn = multivariate_normal(
            mean=gm.means_.flatten(),
            cov=gm.covariances_.reshape(n_features, n_features)
        )

        entropies.append(mn.entropy())

    probs = softmax(entropies)

    for name, entropy, prob in zip(names, entropies, probs):
        #print('{}\t{}\t{}'.format(name, entropy, prob))
        print('{}\t{}'.format(name, entropy)) 
Example #4
Source File: alp_gmm.py    From teachDeepRL with MIT License 6 votes vote down vote up
def sample_task(self):
        if (len(self.tasks) < self.nb_random) or (np.random.random() < self.random_task_ratio):
            # Random task sampling
            new_task = self.random_task_generator.sample()
        else:
            # ALP-based task sampling

            # 1 - Retrieve the mean ALP value of each Gaussian in the GMM
            self.alp_means = []
            for pos, _, w in zip(self.gmm.means_, self.gmm.covariances_, self.gmm.weights_):
                self.alp_means.append(pos[-1])

            # 2 - Sample Gaussian proportionally to its mean ALP
            idx = proportional_choice(self.alp_means, eps=0.0)

            # 3 - Sample task in Gaussian, without forgetting to remove ALP dimension
            new_task = np.random.multivariate_normal(self.gmm.means_[idx], self.gmm.covariances_[idx])[:-1]
            new_task = np.clip(new_task, self.mins, self.maxs).astype(np.float32)

        return new_task 
Example #5
Source File: gmm.py    From Speech_Signal_Processing_and_Classification with MIT License 6 votes vote down vote up
def determineComponents(data):
	X,Y = preparingData(data)
	n_components = np.arange(1,10)
	bic = np.zeros(n_components.shape)

	for i,n in enumerate(n_components):
		#fit gmm to data for each value of components
		gmm = GaussianMixture(n_components=n,max_iter=200, covariance_type='diag' ,n_init=3)
		gmm.fit(X)
		#store BIC scores
		bic[i] = gmm.bic(X)

	#Therefore, Bayesian Information Criteria (BIC) is introduced as a cost function composing of 2 terms; 
	#1) minus of log-likelihood and 2) model complexity. Please see my old post. You will see that BIC prefers model 
	#that gives good result while the complexity remains small. In other words, the model whose BIC is smallest is the winner
	#plot the results
	plt.plot(bic)
	plt.show() 
Example #6
Source File: predicting.py    From ImageSetCleaner with GNU General Public License v3.0 6 votes vote down vote up
def detection_with_gaussian_mixture(image_set):
    """

    :param image_set: The bottleneck values of the relevant images.
    :return: Predictions vector
    """

    # Might achieve, better results by initializing weights, or means, given we know when we introduce noisy labels
    clf = mixture.GaussianMixture(n_components=2)

    clf.fit(image_set)

    predictions = clf.predict(image_set)
    predictions = normalize_predictions(predictions)

    return predictions 
Example #7
Source File: gmmfense.py    From platform-resource-manager with Apache License 2.0 6 votes vote down vote up
def __init__(self, data, max_mixture=10, threshold=0.1):
        """
        Class constructor, arguments include:
            data - data to build GMM model
            max_mixture - max number of Gaussian mixtures
            threshold - probability threhold to determine fense
        """
        self.data = data
        self.thresh = threshold
        lowest_bic = np.infty
        components = 1
        bic = []
        n_components_range = range(1, max_mixture + 1)
        for n_components in n_components_range:
            # Fit a Gaussian mixture with EM
            gmm = mixture.GaussianMixture(n_components=n_components,
                                          random_state=1005)
            gmm.fit(data)
            bic.append(gmm.bic(data))
            if bic[-1] < lowest_bic:
                lowest_bic = bic[-1]
                best_gmm = gmm
                components = n_components
        log.debug('best gmm components number: %d, bic %f ', components, lowest_bic)
        self.gmm = best_gmm 
Example #8
Source File: utils.py    From scanobjectnn with MIT License 6 votes vote down vote up
def get_2d_grid_gmm(subdivisions=[5, 5], variance=0.04):
    """
    Compute the weight, mean and covariance of a 2D gmm placed on a 2D grid

    :param subdivisions: 2 element list of number of subdivisions of the 2D space in each axes to form the grid
    :param variance: scalar for spherical gmm.p
    :return gmm: gmm: instance of sklearn GaussianMixture (GMM) object Gauassian mixture model
    """
    # n_gaussians = reduce(lambda x, y: x*y,subdivisions)
    n_gaussians = np.prod(np.array(subdivisions))
    step = [1.0/(subdivisions[0]),  1.0/(subdivisions[1])]

    means = np.mgrid[step[0]-1: 1.0-step[0]: complex(0, subdivisions[0]),
            step[1]-1: 1.0-step[1]: complex(0, subdivisions[1])]
    means = np.reshape(means, [2,-1]).T
    covariances = variance*np.ones_like(means)
    weights = (1.0/n_gaussians)*np.ones(n_gaussians)
    gmm = GaussianMixture(n_components=n_gaussians, covariance_type='diag')
    gmm.weights_ = weights
    gmm.covariances_ = covariances
    gmm.means_ = means
    from sklearn.mixture.gaussian_mixture import _compute_precision_cholesky
    gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances, 'diag')
    return gmm 
Example #9
Source File: statistical.py    From kenchi with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _fit(self, X):
        self.estimator_     = GaussianMixture(
            covariance_type = self.covariance_type,
            init_params     = self.init_params,
            max_iter        = self.max_iter,
            means_init      = self.means_init,
            n_components    = self.n_components,
            n_init          = self.n_init,
            precisions_init = self.precisions_init,
            random_state    = self.random_state,
            reg_covar       = self.reg_covar,
            tol             = self.tol,
            warm_start      = self.warm_start,
            weights_init    = self.weights_init
        ).fit(X)

        return self 
Example #10
Source File: utils.py    From scanobjectnn with MIT License 6 votes vote down vote up
def get_3d_grid_gmm(subdivisions=[5,5,5], variance=0.04):
    """
    Compute the weight, mean and covariance of a gmm placed on a 3D grid
    :param subdivisions: 2 element list of number of subdivisions of the 3D space in each axes to form the grid
    :param variance: scalar for spherical gmm.p
    :return gmm: gmm: instance of sklearn GaussianMixture (GMM) object Gauassian mixture model
    """
    # n_gaussians = reduce(lambda x, y: x*y,subdivisions)
    n_gaussians = np.prod(np.array(subdivisions))
    step = [1.0/(subdivisions[0]),  1.0/(subdivisions[1]),  1.0/(subdivisions[2])]

    means = np.mgrid[ step[0]-1: 1.0-step[0]: complex(0, subdivisions[0]),
                      step[1]-1: 1.0-step[1]: complex(0, subdivisions[1]),
                      step[2]-1: 1.0-step[2]: complex(0, subdivisions[2])]
    means = np.reshape(means, [3, -1]).T
    covariances = variance*np.ones_like(means)
    weights = (1.0/n_gaussians)*np.ones(n_gaussians)
    gmm = GaussianMixture(n_components=n_gaussians, covariance_type='diag')
    gmm.weights_ = weights
    gmm.covariances_ = covariances
    gmm.means_ = means
    from sklearn.mixture.gaussian_mixture import _compute_precision_cholesky
    gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances, 'diag')
    return gmm 
Example #11
Source File: Train_webvision.py    From DivideMix with MIT License 6 votes vote down vote up
def eval_train(model,all_loss):    
    model.eval()
    num_iter = (len(eval_loader.dataset)//eval_loader.batch_size)+1
    losses = torch.zeros(len(eval_loader.dataset))    
    with torch.no_grad():
        for batch_idx, (inputs, targets, index) in enumerate(eval_loader):
            inputs, targets = inputs.cuda(), targets.cuda() 
            outputs = model(inputs) 
            loss = CE(outputs, targets)  
            for b in range(inputs.size(0)):
                losses[index[b]]=loss[b]       
            sys.stdout.write('\r')
            sys.stdout.write('| Evaluating loss Iter[%3d/%3d]\t' %(batch_idx,num_iter)) 
            sys.stdout.flush()    
                                    
    losses = (losses-losses.min())/(losses.max()-losses.min())    
    all_loss.append(losses)

    # fit a two-component GMM to the loss
    input_loss = losses.reshape(-1,1)
    gmm = GaussianMixture(n_components=2,max_iter=10,tol=1e-2,reg_covar=5e-4)
    gmm.fit(input_loss)
    prob = gmm.predict_proba(input_loss) 
    prob = prob[:,gmm.means_.argmin()]         
    return prob,all_loss 
Example #12
Source File: gmmfense.py    From platform-resource-manager with Apache License 2.0 6 votes vote down vote up
def __init__(self, data, max_mixture=10, threshold=0.1):
        """
        Class constructor, arguments include:
            data - data to build GMM model
            max_mixture - max number of Gaussian mixtures
            threshold - probability threhold to determine fense
        """
        self.data = data
        self.thresh = threshold
        lowest_bic = np.infty
        components = 1
        bic = []
        n_components_range = range(1, max_mixture + 1)
        for n_components in n_components_range:
            # Fit a Gaussian mixture with EM
            gmm = mixture.GaussianMixture(n_components=n_components,
                                          random_state=1005)
            gmm.fit(data)
            bic.append(gmm.bic(data))
            if bic[-1] < lowest_bic:
                lowest_bic = bic[-1]
                best_gmm = gmm
                components = n_components
        log.debug('best gmm components number: %d, bic %f ', components, lowest_bic)
        self.gmm = best_gmm 
Example #13
Source File: SCDV.py    From SCDV with MIT License 6 votes vote down vote up
def cluster_GMM(num_clusters, word_vectors):
    # Initalize a GMM object and use it for clustering.
    clf = GaussianMixture(n_components=num_clusters,
                          covariance_type="tied", init_params='kmeans', max_iter=50)
    # Get cluster assignments.
    clf.fit(word_vectors)
    idx = clf.predict(word_vectors)
    print("Clustering Done...", time.time() - start, "seconds")
    # Get probabilities of cluster assignments.
    idx_proba = clf.predict_proba(word_vectors)
    # Dump cluster assignments and probability of cluster assignments.
    joblib.dump(idx, 'gmm_latestclusmodel_len2alldata.pkl')
    print("Cluster Assignments Saved...")

    joblib.dump(idx_proba, 'gmm_prob_latestclusmodel_len2alldata.pkl')
    print("Probabilities of Cluster Assignments Saved...")
    return (idx, idx_proba) 
Example #14
Source File: utils.py    From SDGym with MIT License 6 votes vote down vote up
def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()):
        self.meta = self.get_metadata(data, categorical_columns, ordinal_columns)
        model = []

        self.output_info = []
        self.output_dim = 0
        for id_, info in enumerate(self.meta):
            if info['type'] == CONTINUOUS:
                gm = GaussianMixture(self.n_clusters)
                gm.fit(data[:, id_].reshape([-1, 1]))
                model.append(gm)
                self.output_info += [(1, 'tanh'), (self.n_clusters, 'softmax')]
                self.output_dim += 1 + self.n_clusters
            else:
                model.append(None)
                self.output_info += [(info['size'], 'softmax')]
                self.output_dim += info['size']

        self.model = model 
Example #15
Source File: SCDV.py    From SCDV with MIT License 6 votes vote down vote up
def cluster_GMM(num_clusters, word_vectors):
    # Initalize a GMM object and use it for clustering.
    clf = GaussianMixture(n_components=num_clusters,
                          covariance_type="tied", init_params='kmeans', max_iter=50)
    # Get cluster assignments.
    clf.fit(word_vectors)
    idx = clf.predict(word_vectors)
    print("Clustering Done...", time.time() - start, "seconds")
    # Get probabilities of cluster assignments.
    idx_proba = clf.predict_proba(word_vectors)
    # Dump cluster assignments and probability of cluster assignments. 
    joblib.dump(idx, 'gmm_latestclusmodel_len2alldata.pkl')
    print("Cluster Assignments Saved...")

    joblib.dump(idx_proba, 'gmm_prob_latestclusmodel_len2alldata.pkl')
    print("Probabilities of Cluster Assignments Saved...")
    return (idx, idx_proba) 
Example #16
Source File: evaluate.py    From SDGym with MIT License 6 votes vote down vote up
def _evaluate_gmm_likelihood(train, test, metadata, components=[10, 30]):
    results = list()
    for n_components in components:
        gmm = GaussianMixture(n_components, covariance_type='diag')
        LOGGER.info('Evaluating using %s', gmm)
        gmm.fit(test)
        l1 = gmm.score(train)

        gmm.fit(train)
        l2 = gmm.score(test)

        results.append({
            "name": repr(gmm),
            "syn_likelihood": l1,
            "test_likelihood": l2,
        })

    return pd.DataFrame(results) 
Example #17
Source File: BaseNNMixtureEstimator.py    From Conditional_Density_Estimation with MIT License 6 votes vote down vote up
def _sample_rows_same(self, X):
    """ uses efficient sklearn implementation to sample from gaussian mixture -> only works if all rows of X are the same"""
    weights, locs, scales = self._get_mixture_components(np.expand_dims(X[0], axis=0))

    # make sure that sum of weights < 1
    weights = weights.astype(np.float64)
    weights = weights / np.sum(weights)

    gmm = GaussianMixture(n_components=self.n_centers, covariance_type='diag', max_iter=5, tol=1e-1)
    gmm.fit(np.random.normal(size=(100,self.ndim_y))) # just pretending a fit
    # overriding the GMM parameters with own params
    gmm.converged_ = True
    gmm.weights_ = weights[0]
    gmm.means_ = locs[0]
    gmm.covariances_ = scales[0]
    y_sample, _ = gmm.sample(X.shape[0])
    assert y_sample.shape == (X.shape[0], self.ndim_y)
    return X, y_sample 
Example #18
Source File: vade.py    From UnsupervisedDeepLearning-Pytorch with MIT License 6 votes vote down vote up
def initialize_gmm(self, dataloader):
        use_cuda = torch.cuda.is_available()
        if use_cuda:
            self.cuda()

        self.eval()
        data = []
        for batch_idx, (inputs, _) in enumerate(dataloader):
            inputs = inputs.view(inputs.size(0), -1).float()
            if use_cuda:
                inputs = inputs.cuda()
            inputs = Variable(inputs)
            z, outputs, mu, logvar = self.forward(inputs)
            data.append(z.data.cpu().numpy())
        data = np.concatenate(data)
        gmm = GaussianMixture(n_components=self.n_centroids,covariance_type='diag')
        gmm.fit(data)
        self.u_p.data.copy_(torch.from_numpy(gmm.means_.T.astype(np.float32)))
        self.lambda_p.data.copy_(torch.from_numpy(gmm.covariances_.T.astype(np.float32))) 
Example #19
Source File: clustering.py    From celeb-detection-oss with Mozilla Public License 2.0 6 votes vote down vote up
def clusterize(points, n_components=2, covariance_type='tied',
               centers=None, weights=None, output=None, random_state=1000):
    if centers is not None:
        n_components = len(centers)

    if output is None:
        output = points

    if len(points) < 2:
        return [list(output)]

    gmm = GaussianMixture(n_components=n_components,
                          covariance_type=covariance_type,
                          means_init=centers,
                          weights_init=weights,
                          random_state=random_state)
    gmm.fit(points)
    labels = gmm.predict(points)

    clusters = defaultdict(list)
    for label, point in zip(labels, output):
        clusters[label].append(point)

    return sorted(clusters.values(), key=lambda x: len(x), reverse=True) 
Example #20
Source File: Train_webvision_parallel.py    From DivideMix with MIT License 6 votes vote down vote up
def eval_train(eval_loader,model,device,whichnet,queue):   
    CE = nn.CrossEntropyLoss(reduction='none')
    model.eval()
    num_iter = (len(eval_loader.dataset)//eval_loader.batch_size)+1
    losses = torch.zeros(len(eval_loader.dataset))    
    with torch.no_grad():
        for batch_idx, (inputs, targets, index) in enumerate(eval_loader):
            inputs, targets = inputs.to(device), targets.to(device,non_blocking=True) 
            outputs = model(inputs) 
            loss = CE(outputs, targets)  
            for b in range(inputs.size(0)):
                losses[index[b]]=loss[b]       
            sys.stdout.write('\n')
            sys.stdout.write('|%s Evaluating loss Iter[%3d/%3d]\t' %(whichnet,batch_idx,num_iter)) 
            sys.stdout.flush()    
                                    
    losses = (losses-losses.min())/(losses.max()-losses.min())    

    # fit a two-component GMM to the loss
    input_loss = losses.reshape(-1,1)
    gmm = GaussianMixture(n_components=2,max_iter=10,tol=1e-2,reg_covar=1e-3)
    gmm.fit(input_loss)
    prob = gmm.predict_proba(input_loss) 
    prob = prob[:,gmm.means_.argmin()]         
    queue.put(prob) 
Example #21
Source File: independent.py    From SDGym with MIT License 5 votes vote down vote up
def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()):
        self.dtype = data.dtype
        self.meta = Transformer.get_metadata(data, categorical_columns, ordinal_columns)

        self.models = []
        for id_, info in enumerate(self.meta):
            if info['type'] == CONTINUOUS:
                model = GaussianMixture(self.gmm_n)
                model.fit(data[:, [id_]])
                self.models.append(model)
            else:
                nomial = np.bincount(data[:, id_].astype('int'), minlength=info['size'])
                nomial = nomial / np.sum(nomial)
                self.models.append(nomial) 
Example #22
Source File: graph_cuts.py    From pyImSegm with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def estim_class_model_gmm(features, nb_classes, init='kmeans'):
    """ from all features estimate Gaussian Mixture Model and assuming
    each cluster is a single class compute probability that each feature
    belongs to each class

    :param [[float]] features: list of features per segment
    :param int nb_classes: number of classes
    :param int init: initialisation
    :return [[float]]: probabilities that each feature belongs to each class

    >>> np.random.seed(0)
    >>> fts = np.row_stack([np.random.random((50, 3)) - 1,
    ...                     np.random.random((50, 3)) + 1])
    >>> mm = estim_class_model_gmm(fts, 2)
    >>> mm.predict_proba(fts).shape
    (100, 2)
    """
    logging.debug('estimate GMM for all given features %r and %i component',
                  features.shape, nb_classes)
    # http://scikit-learn.org/stable/modules/generated/sklearn.mixture.GMM.html
    gmm = mixture.GaussianMixture(n_components=nb_classes,
                                  covariance_type='full', max_iter=99)
    if init == 'kmeans':
        # http://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html
        kmeans = cluster.KMeans(n_clusters=nb_classes, init='k-means++',
                                n_jobs=-1)
        y = kmeans.fit_predict(features)
        gmm.fit(features, y)
    else:
        gmm.fit(features)
    logging.info('compute probability of each feature to all component')
    return gmm 
Example #23
Source File: region_growing.py    From pyImSegm with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def transform_rays_model_sets_mean_cdf_mixture(list_rays, nb_components=5, slic_size=15):
    """ compute the mixture model and transform it into cumulative distribution

    :param list(list(int)) list_rays: list ray features (distances)
    :param int nb_components: number components in mixture model
    :param int slic_size: superpixel size
    :return tuple(any,list(list(int))):  mixture model, list of stat/param of models

    >>> np.random.seed(0)
    >>> list_rays = [[9, 4, 9], [4, 9, 7], [9, 7, 11], [10, 8, 10],
    ...              [9, 11, 8], [4, 8, 5], [8, 10, 6], [9, 7, 11]]
    >>> mm, mean_cdf = transform_rays_model_sets_mean_cdf_mixture(list_rays, 2)
    >>> len(mean_cdf)
    2
    """
    rays = np.array(list_rays)
    # mm = mixture.GaussianMixture(n_components=nb_components,
    #                                      covariance_type='diag')
    mm = mixture.BayesianGaussianMixture(n_components=nb_components,
                                         covariance_type='diag')
    mm.fit(rays)
    logging.debug('Mixture model found % components with weights: %r',
                  len(mm.weights_), mm.weights_)

    list_mean_cdf = []
    # stds = mm.covariances_[:, np.eye(mm.means_.shape[1], dtype=bool)]
    # stds = mm.covariances_  # for covariance_type='diag'
    # diff_means = np.max(mm.means_, axis=0) - np.min(mm.means_, axis=0)
    for mean, covar in zip(mm.means_, mm.covariances_):
        std = np.sqrt(covar + 1) * 2 + slic_size
        mean = ndimage.gaussian_filter1d(mean, 1)
        std = ndimage.gaussian_filter1d(std, 1)
        max_dist = np.max(mean + 2 * std)
        cdist = compute_cumulative_distrib(np.array([mean]), np.array([std]),
                                           np.array([1]), max_dist)
        list_mean_cdf.append((mean.tolist(), cdist))

    return mm, list_mean_cdf 
Example #24
Source File: covar_gmm.py    From teachDeepRL with MIT License 5 votes vote down vote up
def __init__(self, mins, maxs, seed=None, params=dict()):
        self.seed = seed
        if not seed:
            self.seed = np.random.randint(42,424242)
        np.random.seed(self.seed)

        # Task space boundaries
        self.mins = np.array(mins)
        self.maxs = np.array(maxs)

        # Range of number of Gaussians to try when fitting the GMM
        self.potential_ks = np.arange(2, 11, 1) if "potential_ks" not in params else params["potential_ks"]
        # Ratio of randomly sampled tasks VS tasks sampling using GMM
        self.random_task_ratio = 0.2 if "random_task_ratio" not in params else params["random_task_ratio"]
        self.random_task_generator = Box(self.mins, self.maxs, dtype=np.float32)

        # Number of episodes between two fit of the GMM
        self.fit_rate = 250 if "fit_rate" not in params else params['fit_rate']
        self.nb_random = self.fit_rate  # Number of bootstrapping episodes

        # Original version do not use Absolute LP, only LP.
        self.absolute_lp = False if "absolute_lp" not in params else params['absolute_lp']

        self.tasks = []
        self.tasks_times_rewards = []
        self.all_times = np.arange(0, 1, 1/self.fit_rate)

        # boring book-keeping
        self.bk = {'weights': [], 'covariances': [], 'means': [], 'tasks_lps': [], 'episodes': []} 
Example #25
Source File: cellularity_detection_superpixels.py    From HistomicsTK with Apache License 2.0 5 votes vote down vote up
def set_superpixel_assignment(self):
        """Fit gaussian mixture model to features and get assignment."""
        mmodel = GaussianMixture(n_components=self.cd.n_gaussian_components)
        self.spixel_labels = mmodel.fit_predict(self.fdata.values) + 1

    # ========================================================================= 
Example #26
Source File: gmm_choose_k.py    From intro_ds with Apache License 2.0 5 votes vote down vote up
def trainModel(data, clusterNum, covType):
    """
    使用混合高斯训练模型
    """
    model = GaussianMixture(n_components=clusterNum, covariance_type=covType)
    model.fit(data)
    return model 
Example #27
Source File: gmm.py    From intro_ds with Apache License 2.0 5 votes vote down vote up
def trainModel(data, clusterNum):
    """
    使用混合高斯对数据进行聚类
    """
    model = GaussianMixture(n_components=clusterNum, covariance_type="full")
    model.fit(data)
    return model 
Example #28
Source File: test_gmm_hmm.py    From hmmlearn with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def create_random_gmm(n_mix, n_features, covariance_type, prng=0):
    prng = check_random_state(prng)
    g = GaussianMixture(n_mix, covariance_type=covariance_type)
    g.means_ = prng.randint(-20, 20, (n_mix, n_features))
    g.covars_ = make_covar_matrix(covariance_type, n_mix, n_features)
    g.weights_ = normalized(prng.rand(n_mix))
    return g 
Example #29
Source File: gmm_vs_spectral.py    From intro_ds with Apache License 2.0 5 votes vote down vote up
def trainGMM(data, clusterNum):
    """
    训练混合高斯模型
    """
    model = GaussianMixture(n_components=clusterNum, covariance_type='full')
    model.fit(data)
    return model 
Example #30
Source File: gmm.py    From ml-ids with MIT License 5 votes vote down vote up
def main():
    """Run the IDS using GMM experiment."""
    week3Data = _parseTrainingData()

    # Scale the training data (ignore the timestamp column)
    scaler = preprocessing.RobustScaler().fit(week3Data[:, 1:])
    X_train = scaler.transform(week3Data[:, 1:])
    del week3Data

    try:
        gmm = pickle.load(open("data/gmm.pkl", "rb"))
        print("Loading pre-trained GMM...")
    except IOError:
        print("Training the Gaussian Mixture...")
        gmm = GaussianMixture(n_components=16,
                              covariance_type='full',
                              #  reg_covar=1,
                              verbose=1,
                              verbose_interval=2).fit(X_train)
        pickle.dump(gmm, open("data/gmm.pkl", "wb"))
    del X_train

    X_orig = _parseTestingData()
    print("Scaling the test data...")
    X_test = scaler.transform(X_orig[:, 1:])

    print("Calculating prosterior probabilies of test data...")
    probs = gmm.predict_proba(X_test)
    del X_test

    scores = _score(probs)
    del probs

    results = np.hstack((X_orig, scores.reshape((scores.shape[0], 1))))

    _outputToCSV(results, "data/gmm_results_max.csv")