Python Examples of sklearn.mixture.GaussianMixture

Source File: clusterings.py From parcellation_fragmenter with BSD 3-Clause "New" or "Revised" License

6 votes

def gmm(n_clusters, samples):

    """
    Run GMM clustering on vertex coordinates.

    Parameters:
    - - - - -
    n_clusters : int
        number of clusters to generate
    samples : array
        Euclidean-space coordinates of vertices
    """

    # Fit Gaussian Mixture Model
    gmm = mixture.GaussianMixture(
        n_components=n_clusters, covariance_type='tied', max_iter=1000,
        init_params='kmeans', verbose=0)
    gmm.fit(samples)

    labels = gmm.predict(samples)
    labels = labels.astype(np.int32)+1

    return labels

Source File: posterior.py From scVI with MIT License

6 votes

def clustering_scores(self, prediction_algorithm: str = "knn") -> Tuple:
        if self.gene_dataset.n_labels > 1:
            latent, _, labels = self.get_latent()
            if prediction_algorithm == "knn":
                labels_pred = KMeans(
                    self.gene_dataset.n_labels, n_init=200
                ).fit_predict(
                    latent
                )  # n_jobs>1 ?
            elif prediction_algorithm == "gmm":
                gmm = GMM(self.gene_dataset.n_labels)
                gmm.fit(latent)
                labels_pred = gmm.predict(latent)

            asw_score = silhouette_score(latent, labels)
            nmi_score = NMI(labels, labels_pred)
            ari_score = ARI(labels, labels_pred)
            uca_score = unsupervised_clustering_accuracy(labels, labels_pred)[0]
            logger.debug(
                "Clustering Scores:\nSilhouette: %.4f\nNMI: %.4f\nARI: %.4f\nUCA: %.4f"
                % (asw_score, nmi_score, ari_score, uca_score)
            )
            return asw_score, nmi_score, ari_score, uca_score

Source File: differential_entropies.py From geosketch with MIT License

6 votes

def differential_entropies(X, labels):
    n_samples, n_features = X.shape
    
    labels = np.array(labels)
    names = sorted(set(labels))

    entropies = []
    
    for name in names:
        name_idx = np.where(labels == name)[0]

        gm = GaussianMixture().fit(X[name_idx, :])

        mn = multivariate_normal(
            mean=gm.means_.flatten(),
            cov=gm.covariances_.reshape(n_features, n_features)
        )

        entropies.append(mn.entropy())

    probs = softmax(entropies)

    for name, entropy, prob in zip(names, entropies, probs):
        #print('{}\t{}\t{}'.format(name, entropy, prob))
        print('{}\t{}'.format(name, entropy))

Source File: alp_gmm.py From teachDeepRL with MIT License

6 votes

def sample_task(self):
        if (len(self.tasks) < self.nb_random) or (np.random.random() < self.random_task_ratio):
            # Random task sampling
            new_task = self.random_task_generator.sample()
        else:
            # ALP-based task sampling

            # 1 - Retrieve the mean ALP value of each Gaussian in the GMM
            self.alp_means = []
            for pos, _, w in zip(self.gmm.means_, self.gmm.covariances_, self.gmm.weights_):
                self.alp_means.append(pos[-1])

            # 2 - Sample Gaussian proportionally to its mean ALP
            idx = proportional_choice(self.alp_means, eps=0.0)

            # 3 - Sample task in Gaussian, without forgetting to remove ALP dimension
            new_task = np.random.multivariate_normal(self.gmm.means_[idx], self.gmm.covariances_[idx])[:-1]
            new_task = np.clip(new_task, self.mins, self.maxs).astype(np.float32)

        return new_task

Source File: gmm.py From Speech_Signal_Processing_and_Classification with MIT License

6 votes

def determineComponents(data):
	X,Y = preparingData(data)
	n_components = np.arange(1,10)
	bic = np.zeros(n_components.shape)

	for i,n in enumerate(n_components):
		#fit gmm to data for each value of components
		gmm = GaussianMixture(n_components=n,max_iter=200, covariance_type='diag' ,n_init=3)
		gmm.fit(X)
		#store BIC scores
		bic[i] = gmm.bic(X)

	#Therefore, Bayesian Information Criteria (BIC) is introduced as a cost function composing of 2 terms; 
	#1) minus of log-likelihood and 2) model complexity. Please see my old post. You will see that BIC prefers model 
	#that gives good result while the complexity remains small. In other words, the model whose BIC is smallest is the winner
	#plot the results
	plt.plot(bic)
	plt.show()

Source File: predicting.py From ImageSetCleaner with GNU General Public License v3.0

6 votes

def detection_with_gaussian_mixture(image_set):
    """

    :param image_set: The bottleneck values of the relevant images.
    :return: Predictions vector
    """

    # Might achieve, better results by initializing weights, or means, given we know when we introduce noisy labels
    clf = mixture.GaussianMixture(n_components=2)

    clf.fit(image_set)

    predictions = clf.predict(image_set)
    predictions = normalize_predictions(predictions)

    return predictions

Source File: gmmfense.py From platform-resource-manager with Apache License 2.0

6 votes

def __init__(self, data, max_mixture=10, threshold=0.1):
        """
        Class constructor, arguments include:
            data - data to build GMM model
            max_mixture - max number of Gaussian mixtures
            threshold - probability threhold to determine fense
        """
        self.data = data
        self.thresh = threshold
        lowest_bic = np.infty
        components = 1
        bic = []
        n_components_range = range(1, max_mixture + 1)
        for n_components in n_components_range:
            # Fit a Gaussian mixture with EM
            gmm = mixture.GaussianMixture(n_components=n_components,
                                          random_state=1005)
            gmm.fit(data)
            bic.append(gmm.bic(data))
            if bic[-1] < lowest_bic:
                lowest_bic = bic[-1]
                best_gmm = gmm
                components = n_components
        log.debug('best gmm components number: %d, bic %f ', components, lowest_bic)
        self.gmm = best_gmm

Source File: utils.py From scanobjectnn with MIT License

6 votes

def get_2d_grid_gmm(subdivisions=[5, 5], variance=0.04):
    """
    Compute the weight, mean and covariance of a 2D gmm placed on a 2D grid

    :param subdivisions: 2 element list of number of subdivisions of the 2D space in each axes to form the grid
    :param variance: scalar for spherical gmm.p
    :return gmm: gmm: instance of sklearn GaussianMixture (GMM) object Gauassian mixture model
    """
    # n_gaussians = reduce(lambda x, y: x*y,subdivisions)
    n_gaussians = np.prod(np.array(subdivisions))
    step = [1.0/(subdivisions[0]),  1.0/(subdivisions[1])]

    means = np.mgrid[step[0]-1: 1.0-step[0]: complex(0, subdivisions[0]),
            step[1]-1: 1.0-step[1]: complex(0, subdivisions[1])]
    means = np.reshape(means, [2,-1]).T
    covariances = variance*np.ones_like(means)
    weights = (1.0/n_gaussians)*np.ones(n_gaussians)
    gmm = GaussianMixture(n_components=n_gaussians, covariance_type='diag')
    gmm.weights_ = weights
    gmm.covariances_ = covariances
    gmm.means_ = means
    from sklearn.mixture.gaussian_mixture import _compute_precision_cholesky
    gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances, 'diag')
    return gmm

Source File: statistical.py From kenchi with BSD 3-Clause "New" or "Revised" License

6 votes

def _fit(self, X):
        self.estimator_     = GaussianMixture(
            covariance_type = self.covariance_type,
            init_params     = self.init_params,
            max_iter        = self.max_iter,
            means_init      = self.means_init,
            n_components    = self.n_components,
            n_init          = self.n_init,
            precisions_init = self.precisions_init,
            random_state    = self.random_state,
            reg_covar       = self.reg_covar,
            tol             = self.tol,
            warm_start      = self.warm_start,
            weights_init    = self.weights_init
        ).fit(X)

        return self

Source File: utils.py From scanobjectnn with MIT License

6 votes

def get_3d_grid_gmm(subdivisions=[5,5,5], variance=0.04):
    """
    Compute the weight, mean and covariance of a gmm placed on a 3D grid
    :param subdivisions: 2 element list of number of subdivisions of the 3D space in each axes to form the grid
    :param variance: scalar for spherical gmm.p
    :return gmm: gmm: instance of sklearn GaussianMixture (GMM) object Gauassian mixture model
    """
    # n_gaussians = reduce(lambda x, y: x*y,subdivisions)
    n_gaussians = np.prod(np.array(subdivisions))
    step = [1.0/(subdivisions[0]),  1.0/(subdivisions[1]),  1.0/(subdivisions[2])]

    means = np.mgrid[ step[0]-1: 1.0-step[0]: complex(0, subdivisions[0]),
                      step[1]-1: 1.0-step[1]: complex(0, subdivisions[1]),
                      step[2]-1: 1.0-step[2]: complex(0, subdivisions[2])]
    means = np.reshape(means, [3, -1]).T
    covariances = variance*np.ones_like(means)
    weights = (1.0/n_gaussians)*np.ones(n_gaussians)
    gmm = GaussianMixture(n_components=n_gaussians, covariance_type='diag')
    gmm.weights_ = weights
    gmm.covariances_ = covariances
    gmm.means_ = means
    from sklearn.mixture.gaussian_mixture import _compute_precision_cholesky
    gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances, 'diag')
    return gmm

Source File: Train_webvision.py From DivideMix with MIT License

6 votes

def eval_train(model,all_loss):    
    model.eval()
    num_iter = (len(eval_loader.dataset)//eval_loader.batch_size)+1
    losses = torch.zeros(len(eval_loader.dataset))    
    with torch.no_grad():
        for batch_idx, (inputs, targets, index) in enumerate(eval_loader):
            inputs, targets = inputs.cuda(), targets.cuda() 
            outputs = model(inputs) 
            loss = CE(outputs, targets)  
            for b in range(inputs.size(0)):
                losses[index[b]]=loss[b]       
            sys.stdout.write('\r')
            sys.stdout.write('| Evaluating loss Iter[%3d/%3d]\t' %(batch_idx,num_iter)) 
            sys.stdout.flush()    
                                    
    losses = (losses-losses.min())/(losses.max()-losses.min())    
    all_loss.append(losses)

    # fit a two-component GMM to the loss
    input_loss = losses.reshape(-1,1)
    gmm = GaussianMixture(n_components=2,max_iter=10,tol=1e-2,reg_covar=5e-4)
    gmm.fit(input_loss)
    prob = gmm.predict_proba(input_loss) 
    prob = prob[:,gmm.means_.argmin()]         
    return prob,all_loss

Source File: gmmfense.py From platform-resource-manager with Apache License 2.0

6 votes

def __init__(self, data, max_mixture=10, threshold=0.1):
        """
        Class constructor, arguments include:
            data - data to build GMM model
            max_mixture - max number of Gaussian mixtures
            threshold - probability threhold to determine fense
        """
        self.data = data
        self.thresh = threshold
        lowest_bic = np.infty
        components = 1
        bic = []
        n_components_range = range(1, max_mixture + 1)
        for n_components in n_components_range:
            # Fit a Gaussian mixture with EM
            gmm = mixture.GaussianMixture(n_components=n_components,
                                          random_state=1005)
            gmm.fit(data)
            bic.append(gmm.bic(data))
            if bic[-1] < lowest_bic:
                lowest_bic = bic[-1]
                best_gmm = gmm
                components = n_components
        log.debug('best gmm components number: %d, bic %f ', components, lowest_bic)
        self.gmm = best_gmm

Source File: SCDV.py From SCDV with MIT License

6 votes

def cluster_GMM(num_clusters, word_vectors):
    # Initalize a GMM object and use it for clustering.
    clf = GaussianMixture(n_components=num_clusters,
                          covariance_type="tied", init_params='kmeans', max_iter=50)
    # Get cluster assignments.
    clf.fit(word_vectors)
    idx = clf.predict(word_vectors)
    print("Clustering Done...", time.time() - start, "seconds")
    # Get probabilities of cluster assignments.
    idx_proba = clf.predict_proba(word_vectors)
    # Dump cluster assignments and probability of cluster assignments.
    joblib.dump(idx, 'gmm_latestclusmodel_len2alldata.pkl')
    print("Cluster Assignments Saved...")

    joblib.dump(idx_proba, 'gmm_prob_latestclusmodel_len2alldata.pkl')
    print("Probabilities of Cluster Assignments Saved...")
    return (idx, idx_proba)

Source File: utils.py From SDGym with MIT License

6 votes

def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()):
        self.meta = self.get_metadata(data, categorical_columns, ordinal_columns)
        model = []

        self.output_info = []
        self.output_dim = 0
        for id_, info in enumerate(self.meta):
            if info['type'] == CONTINUOUS:
                gm = GaussianMixture(self.n_clusters)
                gm.fit(data[:, id_].reshape([-1, 1]))
                model.append(gm)
                self.output_info += [(1, 'tanh'), (self.n_clusters, 'softmax')]
                self.output_dim += 1 + self.n_clusters
            else:
                model.append(None)
                self.output_info += [(info['size'], 'softmax')]
                self.output_dim += info['size']

        self.model = model

Source File: SCDV.py From SCDV with MIT License

6 votes

def cluster_GMM(num_clusters, word_vectors):
    # Initalize a GMM object and use it for clustering.
    clf = GaussianMixture(n_components=num_clusters,
                          covariance_type="tied", init_params='kmeans', max_iter=50)
    # Get cluster assignments.
    clf.fit(word_vectors)
    idx = clf.predict(word_vectors)
    print("Clustering Done...", time.time() - start, "seconds")
    # Get probabilities of cluster assignments.
    idx_proba = clf.predict_proba(word_vectors)
    # Dump cluster assignments and probability of cluster assignments. 
    joblib.dump(idx, 'gmm_latestclusmodel_len2alldata.pkl')
    print("Cluster Assignments Saved...")

    joblib.dump(idx_proba, 'gmm_prob_latestclusmodel_len2alldata.pkl')
    print("Probabilities of Cluster Assignments Saved...")
    return (idx, idx_proba)

Source File: evaluate.py From SDGym with MIT License

6 votes

def _evaluate_gmm_likelihood(train, test, metadata, components=[10, 30]):
    results = list()
    for n_components in components:
        gmm = GaussianMixture(n_components, covariance_type='diag')
        LOGGER.info('Evaluating using %s', gmm)
        gmm.fit(test)
        l1 = gmm.score(train)

        gmm.fit(train)
        l2 = gmm.score(test)

        results.append({
            "name": repr(gmm),
            "syn_likelihood": l1,
            "test_likelihood": l2,
        })

    return pd.DataFrame(results)

Source File: BaseNNMixtureEstimator.py From Conditional_Density_Estimation with MIT License

6 votes

def _sample_rows_same(self, X):
    """ uses efficient sklearn implementation to sample from gaussian mixture -> only works if all rows of X are the same"""
    weights, locs, scales = self._get_mixture_components(np.expand_dims(X[0], axis=0))

    # make sure that sum of weights < 1
    weights = weights.astype(np.float64)
    weights = weights / np.sum(weights)

    gmm = GaussianMixture(n_components=self.n_centers, covariance_type='diag', max_iter=5, tol=1e-1)
    gmm.fit(np.random.normal(size=(100,self.ndim_y))) # just pretending a fit
    # overriding the GMM parameters with own params
    gmm.converged_ = True
    gmm.weights_ = weights[0]
    gmm.means_ = locs[0]
    gmm.covariances_ = scales[0]
    y_sample, _ = gmm.sample(X.shape[0])
    assert y_sample.shape == (X.shape[0], self.ndim_y)
    return X, y_sample

Source File: vade.py From UnsupervisedDeepLearning-Pytorch with MIT License

6 votes

def initialize_gmm(self, dataloader):
        use_cuda = torch.cuda.is_available()
        if use_cuda:
            self.cuda()

        self.eval()
        data = []
        for batch_idx, (inputs, _) in enumerate(dataloader):
            inputs = inputs.view(inputs.size(0), -1).float()
            if use_cuda:
                inputs = inputs.cuda()
            inputs = Variable(inputs)
            z, outputs, mu, logvar = self.forward(inputs)
            data.append(z.data.cpu().numpy())
        data = np.concatenate(data)
        gmm = GaussianMixture(n_components=self.n_centroids,covariance_type='diag')
        gmm.fit(data)
        self.u_p.data.copy_(torch.from_numpy(gmm.means_.T.astype(np.float32)))
        self.lambda_p.data.copy_(torch.from_numpy(gmm.covariances_.T.astype(np.float32)))

Source File: clustering.py From celeb-detection-oss with Mozilla Public License 2.0

6 votes

def clusterize(points, n_components=2, covariance_type='tied',
               centers=None, weights=None, output=None, random_state=1000):
    if centers is not None:
        n_components = len(centers)

    if output is None:
        output = points

    if len(points) < 2:
        return [list(output)]

    gmm = GaussianMixture(n_components=n_components,
                          covariance_type=covariance_type,
                          means_init=centers,
                          weights_init=weights,
                          random_state=random_state)
    gmm.fit(points)
    labels = gmm.predict(points)

    clusters = defaultdict(list)
    for label, point in zip(labels, output):
        clusters[label].append(point)

    return sorted(clusters.values(), key=lambda x: len(x), reverse=True)

Source File: Train_webvision_parallel.py From DivideMix with MIT License

6 votes

def eval_train(eval_loader,model,device,whichnet,queue):   
    CE = nn.CrossEntropyLoss(reduction='none')
    model.eval()
    num_iter = (len(eval_loader.dataset)//eval_loader.batch_size)+1
    losses = torch.zeros(len(eval_loader.dataset))    
    with torch.no_grad():
        for batch_idx, (inputs, targets, index) in enumerate(eval_loader):
            inputs, targets = inputs.to(device), targets.to(device,non_blocking=True) 
            outputs = model(inputs) 
            loss = CE(outputs, targets)  
            for b in range(inputs.size(0)):
                losses[index[b]]=loss[b]       
            sys.stdout.write('\n')
            sys.stdout.write('|%s Evaluating loss Iter[%3d/%3d]\t' %(whichnet,batch_idx,num_iter)) 
            sys.stdout.flush()    
                                    
    losses = (losses-losses.min())/(losses.max()-losses.min())    

    # fit a two-component GMM to the loss
    input_loss = losses.reshape(-1,1)
    gmm = GaussianMixture(n_components=2,max_iter=10,tol=1e-2,reg_covar=1e-3)
    gmm.fit(input_loss)
    prob = gmm.predict_proba(input_loss) 
    prob = prob[:,gmm.means_.argmin()]         
    queue.put(prob)

Source File: independent.py From SDGym with MIT License

5 votes

def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()):
        self.dtype = data.dtype
        self.meta = Transformer.get_metadata(data, categorical_columns, ordinal_columns)

        self.models = []
        for id_, info in enumerate(self.meta):
            if info['type'] == CONTINUOUS:
                model = GaussianMixture(self.gmm_n)
                model.fit(data[:, [id_]])
                self.models.append(model)
            else:
                nomial = np.bincount(data[:, id_].astype('int'), minlength=info['size'])
                nomial = nomial / np.sum(nomial)
                self.models.append(nomial)

Source File: graph_cuts.py From pyImSegm with BSD 3-Clause "New" or "Revised" License

5 votes

def estim_class_model_gmm(features, nb_classes, init='kmeans'):
    """ from all features estimate Gaussian Mixture Model and assuming
    each cluster is a single class compute probability that each feature
    belongs to each class

    :param [[float]] features: list of features per segment
    :param int nb_classes: number of classes
    :param int init: initialisation
    :return [[float]]: probabilities that each feature belongs to each class

    >>> np.random.seed(0)
    >>> fts = np.row_stack([np.random.random((50, 3)) - 1,
    ...                     np.random.random((50, 3)) + 1])
    >>> mm = estim_class_model_gmm(fts, 2)
    >>> mm.predict_proba(fts).shape
    (100, 2)
    """
    logging.debug('estimate GMM for all given features %r and %i component',
                  features.shape, nb_classes)
    # http://scikit-learn.org/stable/modules/generated/sklearn.mixture.GMM.html
    gmm = mixture.GaussianMixture(n_components=nb_classes,
                                  covariance_type='full', max_iter=99)
    if init == 'kmeans':
        # http://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html
        kmeans = cluster.KMeans(n_clusters=nb_classes, init='k-means++',
                                n_jobs=-1)
        y = kmeans.fit_predict(features)
        gmm.fit(features, y)
    else:
        gmm.fit(features)
    logging.info('compute probability of each feature to all component')
    return gmm

Source File: region_growing.py From pyImSegm with BSD 3-Clause "New" or "Revised" License

5 votes

def transform_rays_model_sets_mean_cdf_mixture(list_rays, nb_components=5, slic_size=15):
    """ compute the mixture model and transform it into cumulative distribution

    :param list(list(int)) list_rays: list ray features (distances)
    :param int nb_components: number components in mixture model
    :param int slic_size: superpixel size
    :return tuple(any,list(list(int))):  mixture model, list of stat/param of models

    >>> np.random.seed(0)
    >>> list_rays = [[9, 4, 9], [4, 9, 7], [9, 7, 11], [10, 8, 10],
    ...              [9, 11, 8], [4, 8, 5], [8, 10, 6], [9, 7, 11]]
    >>> mm, mean_cdf = transform_rays_model_sets_mean_cdf_mixture(list_rays, 2)
    >>> len(mean_cdf)
    2
    """
    rays = np.array(list_rays)
    # mm = mixture.GaussianMixture(n_components=nb_components,
    #                                      covariance_type='diag')
    mm = mixture.BayesianGaussianMixture(n_components=nb_components,
                                         covariance_type='diag')
    mm.fit(rays)
    logging.debug('Mixture model found % components with weights: %r',
                  len(mm.weights_), mm.weights_)

    list_mean_cdf = []
    # stds = mm.covariances_[:, np.eye(mm.means_.shape[1], dtype=bool)]
    # stds = mm.covariances_  # for covariance_type='diag'
    # diff_means = np.max(mm.means_, axis=0) - np.min(mm.means_, axis=0)
    for mean, covar in zip(mm.means_, mm.covariances_):
        std = np.sqrt(covar + 1) * 2 + slic_size
        mean = ndimage.gaussian_filter1d(mean, 1)
        std = ndimage.gaussian_filter1d(std, 1)
        max_dist = np.max(mean + 2 * std)
        cdist = compute_cumulative_distrib(np.array([mean]), np.array([std]),
                                           np.array([1]), max_dist)
        list_mean_cdf.append((mean.tolist(), cdist))

    return mm, list_mean_cdf

Source File: covar_gmm.py From teachDeepRL with MIT License

5 votes

def __init__(self, mins, maxs, seed=None, params=dict()):
        self.seed = seed
        if not seed:
            self.seed = np.random.randint(42,424242)
        np.random.seed(self.seed)

        # Task space boundaries
        self.mins = np.array(mins)
        self.maxs = np.array(maxs)

        # Range of number of Gaussians to try when fitting the GMM
        self.potential_ks = np.arange(2, 11, 1) if "potential_ks" not in params else params["potential_ks"]
        # Ratio of randomly sampled tasks VS tasks sampling using GMM
        self.random_task_ratio = 0.2 if "random_task_ratio" not in params else params["random_task_ratio"]
        self.random_task_generator = Box(self.mins, self.maxs, dtype=np.float32)

        # Number of episodes between two fit of the GMM
        self.fit_rate = 250 if "fit_rate" not in params else params['fit_rate']
        self.nb_random = self.fit_rate  # Number of bootstrapping episodes

        # Original version do not use Absolute LP, only LP.
        self.absolute_lp = False if "absolute_lp" not in params else params['absolute_lp']

        self.tasks = []
        self.tasks_times_rewards = []
        self.all_times = np.arange(0, 1, 1/self.fit_rate)

        # boring book-keeping
        self.bk = {'weights': [], 'covariances': [], 'means': [], 'tasks_lps': [], 'episodes': []}

Source File: cellularity_detection_superpixels.py From HistomicsTK with Apache License 2.0

5 votes

def set_superpixel_assignment(self):
        """Fit gaussian mixture model to features and get assignment."""
        mmodel = GaussianMixture(n_components=self.cd.n_gaussian_components)
        self.spixel_labels = mmodel.fit_predict(self.fdata.values) + 1

    # =========================================================================

Source File: gmm_choose_k.py From intro_ds with Apache License 2.0

5 votes

def trainModel(data, clusterNum, covType):
    """
    使用混合高斯训练模型
    """
    model = GaussianMixture(n_components=clusterNum, covariance_type=covType)
    model.fit(data)
    return model

Source File: gmm.py From intro_ds with Apache License 2.0

5 votes

def trainModel(data, clusterNum):
    """
    使用混合高斯对数据进行聚类
    """
    model = GaussianMixture(n_components=clusterNum, covariance_type="full")
    model.fit(data)
    return model

Source File: test_gmm_hmm.py From hmmlearn with BSD 3-Clause "New" or "Revised" License

5 votes

def create_random_gmm(n_mix, n_features, covariance_type, prng=0):
    prng = check_random_state(prng)
    g = GaussianMixture(n_mix, covariance_type=covariance_type)
    g.means_ = prng.randint(-20, 20, (n_mix, n_features))
    g.covars_ = make_covar_matrix(covariance_type, n_mix, n_features)
    g.weights_ = normalized(prng.rand(n_mix))
    return g

Source File: gmm_vs_spectral.py From intro_ds with Apache License 2.0

5 votes

def trainGMM(data, clusterNum):
    """
    训练混合高斯模型
    """
    model = GaussianMixture(n_components=clusterNum, covariance_type='full')
    model.fit(data)
    return model

Source File: gmm.py From ml-ids with MIT License

5 votes

def main():
    """Run the IDS using GMM experiment."""
    week3Data = _parseTrainingData()

    # Scale the training data (ignore the timestamp column)
    scaler = preprocessing.RobustScaler().fit(week3Data[:, 1:])
    X_train = scaler.transform(week3Data[:, 1:])
    del week3Data

    try:
        gmm = pickle.load(open("data/gmm.pkl", "rb"))
        print("Loading pre-trained GMM...")
    except IOError:
        print("Training the Gaussian Mixture...")
        gmm = GaussianMixture(n_components=16,
                              covariance_type='full',
                              #  reg_covar=1,
                              verbose=1,
                              verbose_interval=2).fit(X_train)
        pickle.dump(gmm, open("data/gmm.pkl", "wb"))
    del X_train

    X_orig = _parseTestingData()
    print("Scaling the test data...")
    X_test = scaler.transform(X_orig[:, 1:])

    print("Calculating prosterior probabilies of test data...")
    probs = gmm.predict_proba(X_test)
    del X_test

    scores = _score(probs)
    del probs

    results = np.hstack((X_orig, scores.reshape((scores.shape[0], 1))))

    _outputToCSV(results, "data/gmm_results_max.csv")

Python sklearn.mixture.GaussianMixture() Examples