Python sklearn.mixture.GaussianMixture() Examples
The following are 30
code examples of sklearn.mixture.GaussianMixture().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.mixture
, or try the search function
.
Example #1
Source File: clusterings.py From parcellation_fragmenter with BSD 3-Clause "New" or "Revised" License | 6 votes |
def gmm(n_clusters, samples): """ Run GMM clustering on vertex coordinates. Parameters: - - - - - n_clusters : int number of clusters to generate samples : array Euclidean-space coordinates of vertices """ # Fit Gaussian Mixture Model gmm = mixture.GaussianMixture( n_components=n_clusters, covariance_type='tied', max_iter=1000, init_params='kmeans', verbose=0) gmm.fit(samples) labels = gmm.predict(samples) labels = labels.astype(np.int32)+1 return labels
Example #2
Source File: posterior.py From scVI with MIT License | 6 votes |
def clustering_scores(self, prediction_algorithm: str = "knn") -> Tuple: if self.gene_dataset.n_labels > 1: latent, _, labels = self.get_latent() if prediction_algorithm == "knn": labels_pred = KMeans( self.gene_dataset.n_labels, n_init=200 ).fit_predict( latent ) # n_jobs>1 ? elif prediction_algorithm == "gmm": gmm = GMM(self.gene_dataset.n_labels) gmm.fit(latent) labels_pred = gmm.predict(latent) asw_score = silhouette_score(latent, labels) nmi_score = NMI(labels, labels_pred) ari_score = ARI(labels, labels_pred) uca_score = unsupervised_clustering_accuracy(labels, labels_pred)[0] logger.debug( "Clustering Scores:\nSilhouette: %.4f\nNMI: %.4f\nARI: %.4f\nUCA: %.4f" % (asw_score, nmi_score, ari_score, uca_score) ) return asw_score, nmi_score, ari_score, uca_score
Example #3
Source File: differential_entropies.py From geosketch with MIT License | 6 votes |
def differential_entropies(X, labels): n_samples, n_features = X.shape labels = np.array(labels) names = sorted(set(labels)) entropies = [] for name in names: name_idx = np.where(labels == name)[0] gm = GaussianMixture().fit(X[name_idx, :]) mn = multivariate_normal( mean=gm.means_.flatten(), cov=gm.covariances_.reshape(n_features, n_features) ) entropies.append(mn.entropy()) probs = softmax(entropies) for name, entropy, prob in zip(names, entropies, probs): #print('{}\t{}\t{}'.format(name, entropy, prob)) print('{}\t{}'.format(name, entropy))
Example #4
Source File: alp_gmm.py From teachDeepRL with MIT License | 6 votes |
def sample_task(self): if (len(self.tasks) < self.nb_random) or (np.random.random() < self.random_task_ratio): # Random task sampling new_task = self.random_task_generator.sample() else: # ALP-based task sampling # 1 - Retrieve the mean ALP value of each Gaussian in the GMM self.alp_means = [] for pos, _, w in zip(self.gmm.means_, self.gmm.covariances_, self.gmm.weights_): self.alp_means.append(pos[-1]) # 2 - Sample Gaussian proportionally to its mean ALP idx = proportional_choice(self.alp_means, eps=0.0) # 3 - Sample task in Gaussian, without forgetting to remove ALP dimension new_task = np.random.multivariate_normal(self.gmm.means_[idx], self.gmm.covariances_[idx])[:-1] new_task = np.clip(new_task, self.mins, self.maxs).astype(np.float32) return new_task
Example #5
Source File: gmm.py From Speech_Signal_Processing_and_Classification with MIT License | 6 votes |
def determineComponents(data): X,Y = preparingData(data) n_components = np.arange(1,10) bic = np.zeros(n_components.shape) for i,n in enumerate(n_components): #fit gmm to data for each value of components gmm = GaussianMixture(n_components=n,max_iter=200, covariance_type='diag' ,n_init=3) gmm.fit(X) #store BIC scores bic[i] = gmm.bic(X) #Therefore, Bayesian Information Criteria (BIC) is introduced as a cost function composing of 2 terms; #1) minus of log-likelihood and 2) model complexity. Please see my old post. You will see that BIC prefers model #that gives good result while the complexity remains small. In other words, the model whose BIC is smallest is the winner #plot the results plt.plot(bic) plt.show()
Example #6
Source File: predicting.py From ImageSetCleaner with GNU General Public License v3.0 | 6 votes |
def detection_with_gaussian_mixture(image_set): """ :param image_set: The bottleneck values of the relevant images. :return: Predictions vector """ # Might achieve, better results by initializing weights, or means, given we know when we introduce noisy labels clf = mixture.GaussianMixture(n_components=2) clf.fit(image_set) predictions = clf.predict(image_set) predictions = normalize_predictions(predictions) return predictions
Example #7
Source File: gmmfense.py From platform-resource-manager with Apache License 2.0 | 6 votes |
def __init__(self, data, max_mixture=10, threshold=0.1): """ Class constructor, arguments include: data - data to build GMM model max_mixture - max number of Gaussian mixtures threshold - probability threhold to determine fense """ self.data = data self.thresh = threshold lowest_bic = np.infty components = 1 bic = [] n_components_range = range(1, max_mixture + 1) for n_components in n_components_range: # Fit a Gaussian mixture with EM gmm = mixture.GaussianMixture(n_components=n_components, random_state=1005) gmm.fit(data) bic.append(gmm.bic(data)) if bic[-1] < lowest_bic: lowest_bic = bic[-1] best_gmm = gmm components = n_components log.debug('best gmm components number: %d, bic %f ', components, lowest_bic) self.gmm = best_gmm
Example #8
Source File: utils.py From scanobjectnn with MIT License | 6 votes |
def get_2d_grid_gmm(subdivisions=[5, 5], variance=0.04): """ Compute the weight, mean and covariance of a 2D gmm placed on a 2D grid :param subdivisions: 2 element list of number of subdivisions of the 2D space in each axes to form the grid :param variance: scalar for spherical gmm.p :return gmm: gmm: instance of sklearn GaussianMixture (GMM) object Gauassian mixture model """ # n_gaussians = reduce(lambda x, y: x*y,subdivisions) n_gaussians = np.prod(np.array(subdivisions)) step = [1.0/(subdivisions[0]), 1.0/(subdivisions[1])] means = np.mgrid[step[0]-1: 1.0-step[0]: complex(0, subdivisions[0]), step[1]-1: 1.0-step[1]: complex(0, subdivisions[1])] means = np.reshape(means, [2,-1]).T covariances = variance*np.ones_like(means) weights = (1.0/n_gaussians)*np.ones(n_gaussians) gmm = GaussianMixture(n_components=n_gaussians, covariance_type='diag') gmm.weights_ = weights gmm.covariances_ = covariances gmm.means_ = means from sklearn.mixture.gaussian_mixture import _compute_precision_cholesky gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances, 'diag') return gmm
Example #9
Source File: statistical.py From kenchi with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _fit(self, X): self.estimator_ = GaussianMixture( covariance_type = self.covariance_type, init_params = self.init_params, max_iter = self.max_iter, means_init = self.means_init, n_components = self.n_components, n_init = self.n_init, precisions_init = self.precisions_init, random_state = self.random_state, reg_covar = self.reg_covar, tol = self.tol, warm_start = self.warm_start, weights_init = self.weights_init ).fit(X) return self
Example #10
Source File: utils.py From scanobjectnn with MIT License | 6 votes |
def get_3d_grid_gmm(subdivisions=[5,5,5], variance=0.04): """ Compute the weight, mean and covariance of a gmm placed on a 3D grid :param subdivisions: 2 element list of number of subdivisions of the 3D space in each axes to form the grid :param variance: scalar for spherical gmm.p :return gmm: gmm: instance of sklearn GaussianMixture (GMM) object Gauassian mixture model """ # n_gaussians = reduce(lambda x, y: x*y,subdivisions) n_gaussians = np.prod(np.array(subdivisions)) step = [1.0/(subdivisions[0]), 1.0/(subdivisions[1]), 1.0/(subdivisions[2])] means = np.mgrid[ step[0]-1: 1.0-step[0]: complex(0, subdivisions[0]), step[1]-1: 1.0-step[1]: complex(0, subdivisions[1]), step[2]-1: 1.0-step[2]: complex(0, subdivisions[2])] means = np.reshape(means, [3, -1]).T covariances = variance*np.ones_like(means) weights = (1.0/n_gaussians)*np.ones(n_gaussians) gmm = GaussianMixture(n_components=n_gaussians, covariance_type='diag') gmm.weights_ = weights gmm.covariances_ = covariances gmm.means_ = means from sklearn.mixture.gaussian_mixture import _compute_precision_cholesky gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances, 'diag') return gmm
Example #11
Source File: Train_webvision.py From DivideMix with MIT License | 6 votes |
def eval_train(model,all_loss): model.eval() num_iter = (len(eval_loader.dataset)//eval_loader.batch_size)+1 losses = torch.zeros(len(eval_loader.dataset)) with torch.no_grad(): for batch_idx, (inputs, targets, index) in enumerate(eval_loader): inputs, targets = inputs.cuda(), targets.cuda() outputs = model(inputs) loss = CE(outputs, targets) for b in range(inputs.size(0)): losses[index[b]]=loss[b] sys.stdout.write('\r') sys.stdout.write('| Evaluating loss Iter[%3d/%3d]\t' %(batch_idx,num_iter)) sys.stdout.flush() losses = (losses-losses.min())/(losses.max()-losses.min()) all_loss.append(losses) # fit a two-component GMM to the loss input_loss = losses.reshape(-1,1) gmm = GaussianMixture(n_components=2,max_iter=10,tol=1e-2,reg_covar=5e-4) gmm.fit(input_loss) prob = gmm.predict_proba(input_loss) prob = prob[:,gmm.means_.argmin()] return prob,all_loss
Example #12
Source File: gmmfense.py From platform-resource-manager with Apache License 2.0 | 6 votes |
def __init__(self, data, max_mixture=10, threshold=0.1): """ Class constructor, arguments include: data - data to build GMM model max_mixture - max number of Gaussian mixtures threshold - probability threhold to determine fense """ self.data = data self.thresh = threshold lowest_bic = np.infty components = 1 bic = [] n_components_range = range(1, max_mixture + 1) for n_components in n_components_range: # Fit a Gaussian mixture with EM gmm = mixture.GaussianMixture(n_components=n_components, random_state=1005) gmm.fit(data) bic.append(gmm.bic(data)) if bic[-1] < lowest_bic: lowest_bic = bic[-1] best_gmm = gmm components = n_components log.debug('best gmm components number: %d, bic %f ', components, lowest_bic) self.gmm = best_gmm
Example #13
Source File: SCDV.py From SCDV with MIT License | 6 votes |
def cluster_GMM(num_clusters, word_vectors): # Initalize a GMM object and use it for clustering. clf = GaussianMixture(n_components=num_clusters, covariance_type="tied", init_params='kmeans', max_iter=50) # Get cluster assignments. clf.fit(word_vectors) idx = clf.predict(word_vectors) print("Clustering Done...", time.time() - start, "seconds") # Get probabilities of cluster assignments. idx_proba = clf.predict_proba(word_vectors) # Dump cluster assignments and probability of cluster assignments. joblib.dump(idx, 'gmm_latestclusmodel_len2alldata.pkl') print("Cluster Assignments Saved...") joblib.dump(idx_proba, 'gmm_prob_latestclusmodel_len2alldata.pkl') print("Probabilities of Cluster Assignments Saved...") return (idx, idx_proba)
Example #14
Source File: utils.py From SDGym with MIT License | 6 votes |
def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()): self.meta = self.get_metadata(data, categorical_columns, ordinal_columns) model = [] self.output_info = [] self.output_dim = 0 for id_, info in enumerate(self.meta): if info['type'] == CONTINUOUS: gm = GaussianMixture(self.n_clusters) gm.fit(data[:, id_].reshape([-1, 1])) model.append(gm) self.output_info += [(1, 'tanh'), (self.n_clusters, 'softmax')] self.output_dim += 1 + self.n_clusters else: model.append(None) self.output_info += [(info['size'], 'softmax')] self.output_dim += info['size'] self.model = model
Example #15
Source File: SCDV.py From SCDV with MIT License | 6 votes |
def cluster_GMM(num_clusters, word_vectors): # Initalize a GMM object and use it for clustering. clf = GaussianMixture(n_components=num_clusters, covariance_type="tied", init_params='kmeans', max_iter=50) # Get cluster assignments. clf.fit(word_vectors) idx = clf.predict(word_vectors) print("Clustering Done...", time.time() - start, "seconds") # Get probabilities of cluster assignments. idx_proba = clf.predict_proba(word_vectors) # Dump cluster assignments and probability of cluster assignments. joblib.dump(idx, 'gmm_latestclusmodel_len2alldata.pkl') print("Cluster Assignments Saved...") joblib.dump(idx_proba, 'gmm_prob_latestclusmodel_len2alldata.pkl') print("Probabilities of Cluster Assignments Saved...") return (idx, idx_proba)
Example #16
Source File: evaluate.py From SDGym with MIT License | 6 votes |
def _evaluate_gmm_likelihood(train, test, metadata, components=[10, 30]): results = list() for n_components in components: gmm = GaussianMixture(n_components, covariance_type='diag') LOGGER.info('Evaluating using %s', gmm) gmm.fit(test) l1 = gmm.score(train) gmm.fit(train) l2 = gmm.score(test) results.append({ "name": repr(gmm), "syn_likelihood": l1, "test_likelihood": l2, }) return pd.DataFrame(results)
Example #17
Source File: BaseNNMixtureEstimator.py From Conditional_Density_Estimation with MIT License | 6 votes |
def _sample_rows_same(self, X): """ uses efficient sklearn implementation to sample from gaussian mixture -> only works if all rows of X are the same""" weights, locs, scales = self._get_mixture_components(np.expand_dims(X[0], axis=0)) # make sure that sum of weights < 1 weights = weights.astype(np.float64) weights = weights / np.sum(weights) gmm = GaussianMixture(n_components=self.n_centers, covariance_type='diag', max_iter=5, tol=1e-1) gmm.fit(np.random.normal(size=(100,self.ndim_y))) # just pretending a fit # overriding the GMM parameters with own params gmm.converged_ = True gmm.weights_ = weights[0] gmm.means_ = locs[0] gmm.covariances_ = scales[0] y_sample, _ = gmm.sample(X.shape[0]) assert y_sample.shape == (X.shape[0], self.ndim_y) return X, y_sample
Example #18
Source File: vade.py From UnsupervisedDeepLearning-Pytorch with MIT License | 6 votes |
def initialize_gmm(self, dataloader): use_cuda = torch.cuda.is_available() if use_cuda: self.cuda() self.eval() data = [] for batch_idx, (inputs, _) in enumerate(dataloader): inputs = inputs.view(inputs.size(0), -1).float() if use_cuda: inputs = inputs.cuda() inputs = Variable(inputs) z, outputs, mu, logvar = self.forward(inputs) data.append(z.data.cpu().numpy()) data = np.concatenate(data) gmm = GaussianMixture(n_components=self.n_centroids,covariance_type='diag') gmm.fit(data) self.u_p.data.copy_(torch.from_numpy(gmm.means_.T.astype(np.float32))) self.lambda_p.data.copy_(torch.from_numpy(gmm.covariances_.T.astype(np.float32)))
Example #19
Source File: clustering.py From celeb-detection-oss with Mozilla Public License 2.0 | 6 votes |
def clusterize(points, n_components=2, covariance_type='tied', centers=None, weights=None, output=None, random_state=1000): if centers is not None: n_components = len(centers) if output is None: output = points if len(points) < 2: return [list(output)] gmm = GaussianMixture(n_components=n_components, covariance_type=covariance_type, means_init=centers, weights_init=weights, random_state=random_state) gmm.fit(points) labels = gmm.predict(points) clusters = defaultdict(list) for label, point in zip(labels, output): clusters[label].append(point) return sorted(clusters.values(), key=lambda x: len(x), reverse=True)
Example #20
Source File: Train_webvision_parallel.py From DivideMix with MIT License | 6 votes |
def eval_train(eval_loader,model,device,whichnet,queue): CE = nn.CrossEntropyLoss(reduction='none') model.eval() num_iter = (len(eval_loader.dataset)//eval_loader.batch_size)+1 losses = torch.zeros(len(eval_loader.dataset)) with torch.no_grad(): for batch_idx, (inputs, targets, index) in enumerate(eval_loader): inputs, targets = inputs.to(device), targets.to(device,non_blocking=True) outputs = model(inputs) loss = CE(outputs, targets) for b in range(inputs.size(0)): losses[index[b]]=loss[b] sys.stdout.write('\n') sys.stdout.write('|%s Evaluating loss Iter[%3d/%3d]\t' %(whichnet,batch_idx,num_iter)) sys.stdout.flush() losses = (losses-losses.min())/(losses.max()-losses.min()) # fit a two-component GMM to the loss input_loss = losses.reshape(-1,1) gmm = GaussianMixture(n_components=2,max_iter=10,tol=1e-2,reg_covar=1e-3) gmm.fit(input_loss) prob = gmm.predict_proba(input_loss) prob = prob[:,gmm.means_.argmin()] queue.put(prob)
Example #21
Source File: independent.py From SDGym with MIT License | 5 votes |
def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()): self.dtype = data.dtype self.meta = Transformer.get_metadata(data, categorical_columns, ordinal_columns) self.models = [] for id_, info in enumerate(self.meta): if info['type'] == CONTINUOUS: model = GaussianMixture(self.gmm_n) model.fit(data[:, [id_]]) self.models.append(model) else: nomial = np.bincount(data[:, id_].astype('int'), minlength=info['size']) nomial = nomial / np.sum(nomial) self.models.append(nomial)
Example #22
Source File: graph_cuts.py From pyImSegm with BSD 3-Clause "New" or "Revised" License | 5 votes |
def estim_class_model_gmm(features, nb_classes, init='kmeans'): """ from all features estimate Gaussian Mixture Model and assuming each cluster is a single class compute probability that each feature belongs to each class :param [[float]] features: list of features per segment :param int nb_classes: number of classes :param int init: initialisation :return [[float]]: probabilities that each feature belongs to each class >>> np.random.seed(0) >>> fts = np.row_stack([np.random.random((50, 3)) - 1, ... np.random.random((50, 3)) + 1]) >>> mm = estim_class_model_gmm(fts, 2) >>> mm.predict_proba(fts).shape (100, 2) """ logging.debug('estimate GMM for all given features %r and %i component', features.shape, nb_classes) # http://scikit-learn.org/stable/modules/generated/sklearn.mixture.GMM.html gmm = mixture.GaussianMixture(n_components=nb_classes, covariance_type='full', max_iter=99) if init == 'kmeans': # http://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html kmeans = cluster.KMeans(n_clusters=nb_classes, init='k-means++', n_jobs=-1) y = kmeans.fit_predict(features) gmm.fit(features, y) else: gmm.fit(features) logging.info('compute probability of each feature to all component') return gmm
Example #23
Source File: region_growing.py From pyImSegm with BSD 3-Clause "New" or "Revised" License | 5 votes |
def transform_rays_model_sets_mean_cdf_mixture(list_rays, nb_components=5, slic_size=15): """ compute the mixture model and transform it into cumulative distribution :param list(list(int)) list_rays: list ray features (distances) :param int nb_components: number components in mixture model :param int slic_size: superpixel size :return tuple(any,list(list(int))): mixture model, list of stat/param of models >>> np.random.seed(0) >>> list_rays = [[9, 4, 9], [4, 9, 7], [9, 7, 11], [10, 8, 10], ... [9, 11, 8], [4, 8, 5], [8, 10, 6], [9, 7, 11]] >>> mm, mean_cdf = transform_rays_model_sets_mean_cdf_mixture(list_rays, 2) >>> len(mean_cdf) 2 """ rays = np.array(list_rays) # mm = mixture.GaussianMixture(n_components=nb_components, # covariance_type='diag') mm = mixture.BayesianGaussianMixture(n_components=nb_components, covariance_type='diag') mm.fit(rays) logging.debug('Mixture model found % components with weights: %r', len(mm.weights_), mm.weights_) list_mean_cdf = [] # stds = mm.covariances_[:, np.eye(mm.means_.shape[1], dtype=bool)] # stds = mm.covariances_ # for covariance_type='diag' # diff_means = np.max(mm.means_, axis=0) - np.min(mm.means_, axis=0) for mean, covar in zip(mm.means_, mm.covariances_): std = np.sqrt(covar + 1) * 2 + slic_size mean = ndimage.gaussian_filter1d(mean, 1) std = ndimage.gaussian_filter1d(std, 1) max_dist = np.max(mean + 2 * std) cdist = compute_cumulative_distrib(np.array([mean]), np.array([std]), np.array([1]), max_dist) list_mean_cdf.append((mean.tolist(), cdist)) return mm, list_mean_cdf
Example #24
Source File: covar_gmm.py From teachDeepRL with MIT License | 5 votes |
def __init__(self, mins, maxs, seed=None, params=dict()): self.seed = seed if not seed: self.seed = np.random.randint(42,424242) np.random.seed(self.seed) # Task space boundaries self.mins = np.array(mins) self.maxs = np.array(maxs) # Range of number of Gaussians to try when fitting the GMM self.potential_ks = np.arange(2, 11, 1) if "potential_ks" not in params else params["potential_ks"] # Ratio of randomly sampled tasks VS tasks sampling using GMM self.random_task_ratio = 0.2 if "random_task_ratio" not in params else params["random_task_ratio"] self.random_task_generator = Box(self.mins, self.maxs, dtype=np.float32) # Number of episodes between two fit of the GMM self.fit_rate = 250 if "fit_rate" not in params else params['fit_rate'] self.nb_random = self.fit_rate # Number of bootstrapping episodes # Original version do not use Absolute LP, only LP. self.absolute_lp = False if "absolute_lp" not in params else params['absolute_lp'] self.tasks = [] self.tasks_times_rewards = [] self.all_times = np.arange(0, 1, 1/self.fit_rate) # boring book-keeping self.bk = {'weights': [], 'covariances': [], 'means': [], 'tasks_lps': [], 'episodes': []}
Example #25
Source File: cellularity_detection_superpixels.py From HistomicsTK with Apache License 2.0 | 5 votes |
def set_superpixel_assignment(self): """Fit gaussian mixture model to features and get assignment.""" mmodel = GaussianMixture(n_components=self.cd.n_gaussian_components) self.spixel_labels = mmodel.fit_predict(self.fdata.values) + 1 # =========================================================================
Example #26
Source File: gmm_choose_k.py From intro_ds with Apache License 2.0 | 5 votes |
def trainModel(data, clusterNum, covType): """ 使用混合高斯训练模型 """ model = GaussianMixture(n_components=clusterNum, covariance_type=covType) model.fit(data) return model
Example #27
Source File: gmm.py From intro_ds with Apache License 2.0 | 5 votes |
def trainModel(data, clusterNum): """ 使用混合高斯对数据进行聚类 """ model = GaussianMixture(n_components=clusterNum, covariance_type="full") model.fit(data) return model
Example #28
Source File: test_gmm_hmm.py From hmmlearn with BSD 3-Clause "New" or "Revised" License | 5 votes |
def create_random_gmm(n_mix, n_features, covariance_type, prng=0): prng = check_random_state(prng) g = GaussianMixture(n_mix, covariance_type=covariance_type) g.means_ = prng.randint(-20, 20, (n_mix, n_features)) g.covars_ = make_covar_matrix(covariance_type, n_mix, n_features) g.weights_ = normalized(prng.rand(n_mix)) return g
Example #29
Source File: gmm_vs_spectral.py From intro_ds with Apache License 2.0 | 5 votes |
def trainGMM(data, clusterNum): """ 训练混合高斯模型 """ model = GaussianMixture(n_components=clusterNum, covariance_type='full') model.fit(data) return model
Example #30
Source File: gmm.py From ml-ids with MIT License | 5 votes |
def main(): """Run the IDS using GMM experiment.""" week3Data = _parseTrainingData() # Scale the training data (ignore the timestamp column) scaler = preprocessing.RobustScaler().fit(week3Data[:, 1:]) X_train = scaler.transform(week3Data[:, 1:]) del week3Data try: gmm = pickle.load(open("data/gmm.pkl", "rb")) print("Loading pre-trained GMM...") except IOError: print("Training the Gaussian Mixture...") gmm = GaussianMixture(n_components=16, covariance_type='full', # reg_covar=1, verbose=1, verbose_interval=2).fit(X_train) pickle.dump(gmm, open("data/gmm.pkl", "wb")) del X_train X_orig = _parseTestingData() print("Scaling the test data...") X_test = scaler.transform(X_orig[:, 1:]) print("Calculating prosterior probabilies of test data...") probs = gmm.predict_proba(X_test) del X_test scores = _score(probs) del probs results = np.hstack((X_orig, scores.reshape((scores.shape[0], 1)))) _outputToCSV(results, "data/gmm_results_max.csv")