Python sklearn.decomposition.NMF Examples
The following are 30
code examples of sklearn.decomposition.NMF().
Example #1
Source File: From dynamic-nmf with Apache License 2.0 | 7 votes |
def rank_terms( self, topic_index, top = -1 ): """ Return the top ranked terms for the specified topic, generated during the last NMF run. """ if self.H is None: raise ValueError("No results for previous run available") # NB: reverse top_indices = np.argsort( self.H[topic_index,:] )[::-1] # truncate if necessary if top < 1 or top > len(top_indices): return top_indices return top_indices[0:top]
Example #2
Source File: From poem_generator with Apache License 2.0 | 7 votes |
def write_topics(ftopics, fwords, ftopics_words, poem_words, n_topic, n_topic_words): count_matrix = count_vect.fit_transform(poem_words) tfidf = TfidfTransformer().fit_transform(count_matrix) nmf = decomposition.NMF(n_components=n_topic).fit(tfidf) feature_names = count_vect.get_feature_names() fw =, 'w', 'utf-8') for topic in nmf.components_: fw.write(' '.join([feature_names[i] for i in topic.argsort()[:-n_topic_words - 1:-1]]) + '\n') fw.close() print('Write topics done.') fw =, 'wb') pickle.dump(feature_names, fw) fw.close() print('Write words done.') fw =, 'wb') pickle.dump(nmf.components_, fw) fw.close() print('Write topic_words done.')
Example #3
Source File: From pandas-ml with BSD 3-Clause "New" or "Revised" License | 7 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.decomposition.PCA, decomposition.PCA) self.assertIs(df.decomposition.IncrementalPCA, decomposition.IncrementalPCA) self.assertIs(df.decomposition.KernelPCA, decomposition.KernelPCA) self.assertIs(df.decomposition.FactorAnalysis, decomposition.FactorAnalysis) self.assertIs(df.decomposition.FastICA, decomposition.FastICA) self.assertIs(df.decomposition.TruncatedSVD, decomposition.TruncatedSVD) self.assertIs(df.decomposition.NMF, decomposition.NMF) self.assertIs(df.decomposition.SparsePCA, decomposition.SparsePCA) self.assertIs(df.decomposition.MiniBatchSparsePCA, decomposition.MiniBatchSparsePCA) self.assertIs(df.decomposition.SparseCoder, decomposition.SparseCoder) self.assertIs(df.decomposition.DictionaryLearning, decomposition.DictionaryLearning) self.assertIs(df.decomposition.MiniBatchDictionaryLearning, decomposition.MiniBatchDictionaryLearning) self.assertIs(df.decomposition.LatentDirichletAllocation, decomposition.LatentDirichletAllocation)
Example #4
Source File: From Python-DevOps with MIT License | 6 votes |
def train_nmf(corpus, n_topics=10, max_df=0.95, min_df=2, cleaning=clearstring, stop_words='english'): if cleaning is not None: for i in range(len(corpus)): corpus[i] = cleaning(corpus[i]) tfidf_vectorizer = TfidfVectorizer( max_df=max_df, min_df=min_df, stop_words=stop_words) tfidf = tfidf_vectorizer.fit_transform(corpus) tfidf_features = tfidf_vectorizer.get_feature_names() nmf = NMF( n_components=n_topics, random_state=1, alpha=.1, l1_ratio=.5, init='nndsvd').fit(tfidf) return TOPIC(tfidf_features, nmf)
Example #5
Source File: From atap with Apache License 2.0 | 6 votes |
def __init__(self, n_topics=50, estimator='LDA'): """ n_topics is the desired number of topics To use Latent Semantic Analysis, set estimator to 'LSA', To use Non-Negative Matrix Factorization, set estimator to 'NMF', otherwise, defaults to Latent Dirichlet Allocation ('LDA'). """ self.n_topics = n_topics if estimator == 'LSA': self.estimator = TruncatedSVD(n_components=self.n_topics) elif estimator == 'NMF': self.estimator = NMF(n_components=self.n_topics) else: self.estimator = LatentDirichletAllocation(n_topics=self.n_topics) self.model = Pipeline([ ('norm', TextNormalizer()), ('tfidf', CountVectorizer(tokenizer=identity, preprocessor=None, lowercase=False)), ('model', self.estimator) ])
Example #6
Source File: From BoostedFactorization with GNU General Public License v3.0 | 6 votes |
def fit_and_score_NMF(self, new_residuals): """ Factorizing a residual matrix, returning the approximate target and an embedding. :param new_residuals: Input target matrix. :return scores: Approximate target matrix. :return W: Embedding matrix. """ model = NMF(n_components=self.args.dimensions, init="random", verbose=False, alpha=self.args.alpha) W = model.fit_transform(new_residuals) H = model.components_ print("Scoring started.\n") sub_scores = np.sum(np.multiply(W[self.index_1, :], H[:, self.index_2].T), axis=1) scores = np.maximum(, 0) scores = sparse.csr_matrix((scores, (self.index_1, self.index_2)), shape=self.shape, dtype=np.float32) return scores, W
Example #7
Source File: From sklearn-onnx with MIT License | 6 votes |
def nmf_to_onnx(W, H, op_version=12): """ The function converts a NMF described by matrices *W*, *H* (*WH* approximate training data *M*). into a function which takes two indices *(i, j)* and returns the predictions for it. It assumes these indices applies on the training data. """ col = OnnxArrayFeatureExtractor(H, 'col') row = OnnxArrayFeatureExtractor(W.T, 'row') dot = OnnxMul(col, row, op_version=op_version) res = OnnxReduceSum(dot, output_names="rec", op_version=op_version) indices_type = np.array([0], dtype=np.int64) onx = res.to_onnx(inputs={'col': indices_type, 'row': indices_type}, outputs=[('rec', FloatTensorType((None, 1)))], target_opset=op_version) return onx
Example #8
Source File: From GraphRole with MIT License | 6 votes |
def get_nmf_decomposition( X: np.ndarray, n_roles: int, ) -> FactorTuple: """ Compute NMF decomposition :param X: matrix to factor :param n_roles: rank of decomposition """ nmf = NMF(n_components=n_roles, solver='mu', init='nndsvda') with warnings.catch_warnings(): # ignore convergence warning from NMF since # this will result in a large cost anyways warnings.simplefilter('ignore') G = nmf.fit_transform(X) F = nmf.components_ return G, F
Example #9
Source File: From role2vec with GNU General Public License v3.0 | 6 votes |
def factorize_string_matrix(self): """ Creating string labels by factorization. """ rows = [node for node, features in self.binned_features.items() for feature in features] columns = [int(feature) for node, features in self.binned_features.items() for feature in features] scores = [1 for i in range(len(columns))] row_number = max(rows)+1 column_number = max(columns)+1 features = csr_matrix((scores, (rows, columns)), shape=(row_number, column_number)) model = NMF(n_components=self.args.factors, init="random", random_state=self.args.seed, alpha=self.args.beta) factors = model.fit_transform(features) kmeans = KMeans(n_clusters=self.args.clusters, random_state=self.args.seed).fit(factors) labels = kmeans.labels_ features = {str(node): str(labels[node]) for node in self.graph.nodes()} return features
Example #10
Source File: From karateclub with GNU General Public License v3.0 | 6 votes |
def _fit_and_score_NMF(self, new_residuals): """ Factorizing a residual matrix, returning the approximate target, and an embedding. Arg types: * **new_residuals** *(COO Scipy matrix)* - The residual matrix. Return types: * **scores** *(COO Scipy matrix)* - The residual scores. * **W** *(Numpy array)* - The embedding matrix. """ model = NMF(n_components=self.dimensions, init="random", verbose=False, alpha=self.alpha) W = model.fit_transform(new_residuals) H = model.components_ sub_scores = np.sum(np.multiply(W[self._index_1, :], H[:, self._index_2].T), axis=1) scores = np.maximum(, 0) scores = sparse.csr_matrix((scores, (self._index_1, self._index_2)), shape=self._shape, dtype=np.float32) return scores, W
Example #11
Source File: From topic-stability with Apache License 2.0 | 6 votes |
def apply( self, X, k = 2 ): """ Apply NMF to the specified document-term matrix X. """ import nimfa self.W = None self.H = None initialize_only = self.max_iters < 1 if self.update == "euclidean": objective = "fro" else: objective = "div" lsnmf = nimfa.Lsnmf(X, max_iter = self.max_iters, rank = k, seed = self.init_strategy, update = self.update, objective = objective, test_conv = self.test_conv ) res = lsnmf() # TODO: fix try: self.W = res.basis().todense() self.H = res.coef().todense() except: self.W = res.basis() self.H = res.coef() # last number of iterations self.n_iter = res.n_iter
Example #12
Source File: From yelp with GNU Lesser General Public License v2.1 | 6 votes |
def factorize_nmf(): print('factorizing matrix') newsgroups_mmf_file = '/Users/fpena/tmp/nmf_graphlab/newsgroups/newsgroups_matrix.mmf' document_term_matrix = mmread(newsgroups_mmf_file) factorizer = decomposition.NMF( init="nndsvd", n_components=Constants.TOPIC_MODEL_NUM_TOPICS, max_iter=Constants.TOPIC_MODEL_ITERATIONS, alpha=Constants.NMF_REGULARIZATION, l1_ratio=Constants.NMF_REGULARIZATION_RATIO ) document_topic_matrix = \ factorizer.fit_transform(document_term_matrix) topic_term_matrix = factorizer.components_ # mmwrite(mmf_file, small_matrix) # mmwrite(newsgroups_mmf_file, X)
Example #13
Source File: From yelp with GNU Lesser General Public License v2.1 | 5 votes |
def build_stable_topic_model(self): matrices = [] for i in range(Constants.TOPIC_MODEL_PASSES): topic_term_matrix = self.build_single_topic_model().transpose() matrices.append(topic_term_matrix) stack_matrix = numpy.hstack(matrices) stack_matrix = normalize(stack_matrix, axis=0) stack_matrix = stack_matrix.transpose() print "Stack matrix M of size %s" % str(stack_matrix.shape) self.topic_model = decomposition.NMF( init="nndsvd", n_components=self.num_topics, max_iter=Constants.TOPIC_MODEL_ITERATIONS, alpha=Constants.NMF_REGULARIZATION, l1_ratio=Constants.NMF_REGULARIZATION_RATIO ) self.document_topic_matrix = \ self.topic_model.fit_transform(stack_matrix) self.topic_term_matrix = self.topic_model.components_ row_sums = self.topic_term_matrix.sum(axis=1) self.topic_term_matrix /= row_sums[:, numpy.newaxis] print "Generated factor W of size %s and factor H of size %s" % ( str(self.document_topic_matrix.shape), str(self.topic_term_matrix.shape) ) # return model
Example #14
Source File: From yelp with GNU Lesser General Public License v2.1 | 5 votes |
def build_single_topic_model(self): # print('%s: building NMF topic model' % # time.strftime("%Y/%m/%d-%H:%M:%S")) topic_model = decomposition.NMF( init="nndsvd", n_components=self.num_topics, max_iter=Constants.TOPIC_MODEL_ITERATIONS, alpha=Constants.NMF_REGULARIZATION, l1_ratio=Constants.NMF_REGULARIZATION_RATIO ) topic_model.fit_transform(self.document_term_matrix) topic_term_matrix = topic_model.components_ return topic_term_matrix
Example #15
Source File: From yelp with GNU Lesser General Public License v2.1 | 5 votes |
def rank_terms( self, topic_index, top = -1 ): """ Return the top ranked terms for the specified topic, generated during the last NMF run. """ if self.H is None: raise ValueError("No results for previous run available") # NB: reverse top_indices = np.argsort( self.H[topic_index,:] )[::-1] # truncate if necessary if top < 1 or top > len(top_indices): return top_indices return top_indices[0:top]
Example #16
Source File: From Python-DevOps with MIT License | 5 votes |
def train_nmf(corpus,n_topics=10, max_df=0.95, min_df=2,cleaning=clearstring,stop_words='english'): if cleaning is not None: for i in range(len(corpus)): corpus[i] = cleaning(corpus[i]) tfidf_vectorizer = TfidfVectorizer(max_df = max_df, min_df = min_df, stop_words = stop_words) tfidf = tfidf_vectorizer.fit_transform(corpus) tfidf_features = tfidf_vectorizer.get_feature_names() nmf = NMF(n_components=n_topics, random_state = 1, alpha =.1, l1_ratio=.5, init = 'nndsvd').fit(tfidf) return TOPIC(tfidf_features,nmf)
Example #17
Source File: From hypers with BSD 3-Clause "New" or "Revised" License | 5 votes |
def calculate(self, n_components: int = 4, **kwargs) -> Tuple[np.ndarray, np.ndarray]: if n_components is None: n_components = self.X.shape[-1] mdl = NMF(n_components=n_components, **kwargs) self.ims = mdl.fit_transform(self.X.collapse()).reshape([:-1] + (n_components,)) self.spcs = mdl.components_.transpose() return self.ims, self.spcs
Example #18
Source File: From topic-ensemble with Apache License 2.0 | 5 votes |
def apply( self, X, k = 2, init_W = None, init_H = None ): """ Apply NMF to the specified document-term matrix X. """ self.W = None self.H = None random_seed = np.random.randint( 1, 100000 ) if not (init_W is None or init_H is None): model = decomposition.NMF( init="custom", n_components=k, max_iter=self.max_iters, random_state = random_seed ) self.W = model.fit_transform( X, W=init_W, H=init_H ) else: model = decomposition.NMF( init=self.init_strategy, n_components=k, max_iter=self.max_iters, random_state = random_seed ) self.W = model.fit_transform( X ) self.H = model.components_
Example #19
Source File: From topic-ensemble with Apache License 2.0 | 5 votes |
def rank_terms( self, topic_index, top = -1 ): """ Return the top ranked terms for the specified topic, generated during the last NMF run. """ if self.H is None: raise ValueError("No results for previous run available") # NB: reverse top_indices = np.argsort( self.H[topic_index,:] )[::-1] # truncate if necessary if top < 1 or top > len(top_indices): return top_indices return top_indices[0:top]
Example #20
Source File: From mltk-algo-contrib with Apache License 2.0 | 5 votes |
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), floats=['beta_loss','tol','alpha','l1_ratio'], strs=['init','solver'], ints=['k','max_iter','random_state'], bools=['versbose','shuffle'], aliases={'k': 'n_components'} ) self.estimator = _NMF(**out_params)
Example #21
Source File: From RecSys2019_DeepLearning_Evaluation with GNU Affero General Public License v3.0 | 5 votes |
def fit(self, num_factors=100, l1_ratio = 0.5, solver = "multiplicative_update", init_type = "random", beta_loss = "frobenius", verbose = False, random_seed = None): assert l1_ratio>= 0 and l1_ratio<=1, "{}: l1_ratio must be between 0 and 1, provided value was {}".format(self.RECOMMENDER_NAME, l1_ratio) if solver not in self.SOLVER_VALUES: raise ValueError("Value for 'solver' not recognized. Acceptable values are {}, provided was '{}'".format(self.SOLVER_VALUES.keys(), solver)) if init_type not in self.INIT_VALUES: raise ValueError("Value for 'init_type' not recognized. Acceptable values are {}, provided was '{}'".format(self.INIT_VALUES, init_type)) if beta_loss not in self.BETA_LOSS_VALUES: raise ValueError("Value for 'beta_loss' not recognized. Acceptable values are {}, provided was '{}'".format(self.BETA_LOSS_VALUES, beta_loss)) self._print("Computing NMF decomposition...") nmf_solver = NMF(n_components = num_factors, init = init_type, solver = self.SOLVER_VALUES[solver], beta_loss = beta_loss, random_state = random_seed, l1_ratio = l1_ratio, shuffle = True, verbose = verbose, max_iter = 500) self.ITEM_factors = nmf_solver.components_.copy().T self.USER_factors = nmf_solver.transform(self.URM_train) self._print("Computing NMF decomposition... Done!")
Example #22
Source File: From twitter-stock-recommendation with MIT License | 5 votes |
def test_check_estimator_clones(): # check that check_estimator doesn't modify the estimator it receives from sklearn.datasets import load_iris iris = load_iris() for Estimator in [GaussianMixture, LinearRegression, RandomForestClassifier, NMF, SGDClassifier, MiniBatchKMeans]: with ignore_warnings(category=FutureWarning): # when 'est = SGDClassifier()' est = Estimator() set_checking_parameters(est) set_random_state(est) # without fitting old_hash = joblib.hash(est) check_estimator(est) assert_equal(old_hash, joblib.hash(est)) with ignore_warnings(category=FutureWarning): # when 'est = SGDClassifier()' est = Estimator() set_checking_parameters(est) set_random_state(est) # with fitting + 10, old_hash = joblib.hash(est) check_estimator(est) assert_equal(old_hash, joblib.hash(est))
Example #23
Source File: From topic-stability with Apache License 2.0 | 5 votes |
def apply( self, X, k = 2 ): """ Apply NMF to the specified document-term matrix X. """ from sklearn import decomposition self.W = None self.H = None model = decomposition.NMF(init=self.init_strategy, n_components=k, max_iter=self.max_iters) self.W = model.fit_transform(X) self.H = model.components_
Example #24
Source File: From topic-stability with Apache License 2.0 | 5 votes |
def rank_terms( self, topic_index, top = -1 ): """ Return the top ranked terms for the specified topic, generated during the last NMF run. """ if self.H is None: raise ValueError("No results for previous run available") # NB: reverse top_indices = np.argsort( self.H[topic_index,:] )[::-1] # truncate if necessary if top < 1 or top > len(top_indices): return top_indices return top_indices[0:top]
Example #25
Source File: From topic-stability with Apache License 2.0 | 5 votes |
def rank_terms( self, topic_index, top = -1 ): """ Return the top ranked terms for the specified topic, generated during the last NMF run. """ if self.H is None: raise ValueError("No results for previous run available") h = np.array( self.H[topic_index,:] ).flatten() # NB: reverse ordering top_indices = np.argsort(h)[::-1] # truncate if top < 1 or top > len(top_indices): return top_indices return top_indices[0:top]
Example #26
Source File: From DANMF with GNU General Public License v3.0 | 5 votes |
def sklearn_pretrain(self, i): """ Pretraining a single layer of the model with sklearn. :param i: Layer index. """ nmf_model = NMF(n_components=self.args.layers[i], init="random", random_state=self.args.seed, max_iter=self.args.pre_iterations) U = nmf_model.fit_transform(self.Z) V = nmf_model.components_ return U, V
Example #27
Source File: From scattertext with Apache License 2.0 | 5 votes |
def get_topics_from_model( self, pipe=Pipeline([ ('tfidf', TfidfTransformer(sublinear_tf=True)), ('nmf', (NMF(n_components=30, alpha=.1, l1_ratio=.5, random_state=0)))]), num_terms_per_topic=10): ''' Parameters ---------- pipe : Pipeline For example, `Pipeline([ ('tfidf', TfidfTransformer(sublinear_tf=True)), ('nmf', (NMF(n_components=30, alpha=.1, l1_ratio=.5, random_state=0)))])` The last transformer must populate a `components_` attribute when finished. num_terms_per_topic : int Returns ------- dict: {term: [term1, ...], ...} ''' pipe.fit_transform(self.sentX) topic_model = {} for topic_idx, topic in enumerate(pipe._final_estimator.components_): term_list = [self.termidxstore.getval(i) for i in topic.argsort()[:-num_terms_per_topic - 1:-1] if topic[i] > 0] if len(term_list) > 0: topic_model['%s. %s' % (topic_idx, term_list[0])] = term_list else: Warning("Topic %s has no terms with scores > 0. Omitting." % (topic_idx)) return topic_model
Example #28
Source File: From MNIST-baselines with MIT License | 5 votes |
def skNMF(data, dim): model = NMF(n_components=dim) return model.transform(data) # Max-min norm
Example #29
Source File: From karateclub with GNU General Public License v3.0 | 5 votes |
def _sklearn_pretrain(self, i): """ Pre-training a single layer of the model with sklearn. Arg types: * **i** *(int)* - The layer index. """ nmf_model = NMF(n_components=self.layers[i], init="random", random_state=self.seed, max_iter=self.pre_iterations) U = nmf_model.fit_transform(self._Z) V = nmf_model.components_ return U, V
Example #30
Source File: From karateclub with GNU General Public License v3.0 | 5 votes |
def _pre_training(self): """ Pre-training each NMF layer. """ self._U_s = [] self._V_s = [] for i in range(self._p): self._setup_z(i) U, V = self._sklearn_pretrain(i) self._U_s.append(U) self._V_s.append(V)