Python sklearn.decomposition.NMF Examples
The following are 30
code examples of sklearn.decomposition.NMF().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.decomposition
, or try the search function
.
Example #1
Source File: nmf.py From dynamic-nmf with Apache License 2.0 | 7 votes |
def rank_terms( self, topic_index, top = -1 ): """ Return the top ranked terms for the specified topic, generated during the last NMF run. """ if self.H is None: raise ValueError("No results for previous run available") # NB: reverse top_indices = np.argsort( self.H[topic_index,:] )[::-1] # truncate if necessary if top < 1 or top > len(top_indices): return top_indices return top_indices[0:top]
Example #2
Source File: get_topic.py From poem_generator with Apache License 2.0 | 7 votes |
def write_topics(ftopics, fwords, ftopics_words, poem_words, n_topic, n_topic_words): count_matrix = count_vect.fit_transform(poem_words) tfidf = TfidfTransformer().fit_transform(count_matrix) nmf = decomposition.NMF(n_components=n_topic).fit(tfidf) feature_names = count_vect.get_feature_names() fw = codecs.open(ftopics, 'w', 'utf-8') for topic in nmf.components_: fw.write(' '.join([feature_names[i] for i in topic.argsort()[:-n_topic_words - 1:-1]]) + '\n') fw.close() print('Write topics done.') fw = codecs.open(fwords, 'wb') pickle.dump(feature_names, fw) fw.close() print('Write words done.') fw = codecs.open(ftopics_words, 'wb') pickle.dump(nmf.components_, fw) fw.close() print('Write topic_words done.')
Example #3
Source File: test_decomposition.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 7 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.decomposition.PCA, decomposition.PCA) self.assertIs(df.decomposition.IncrementalPCA, decomposition.IncrementalPCA) self.assertIs(df.decomposition.KernelPCA, decomposition.KernelPCA) self.assertIs(df.decomposition.FactorAnalysis, decomposition.FactorAnalysis) self.assertIs(df.decomposition.FastICA, decomposition.FastICA) self.assertIs(df.decomposition.TruncatedSVD, decomposition.TruncatedSVD) self.assertIs(df.decomposition.NMF, decomposition.NMF) self.assertIs(df.decomposition.SparsePCA, decomposition.SparsePCA) self.assertIs(df.decomposition.MiniBatchSparsePCA, decomposition.MiniBatchSparsePCA) self.assertIs(df.decomposition.SparseCoder, decomposition.SparseCoder) self.assertIs(df.decomposition.DictionaryLearning, decomposition.DictionaryLearning) self.assertIs(df.decomposition.MiniBatchDictionaryLearning, decomposition.MiniBatchDictionaryLearning) self.assertIs(df.decomposition.LatentDirichletAllocation, decomposition.LatentDirichletAllocation)
Example #4
Source File: topic.py From Python-DevOps with MIT License | 6 votes |
def train_nmf(corpus, n_topics=10, max_df=0.95, min_df=2, cleaning=clearstring, stop_words='english'): if cleaning is not None: for i in range(len(corpus)): corpus[i] = cleaning(corpus[i]) tfidf_vectorizer = TfidfVectorizer( max_df=max_df, min_df=min_df, stop_words=stop_words) tfidf = tfidf_vectorizer.fit_transform(corpus) tfidf_features = tfidf_vectorizer.get_feature_names() nmf = NMF( n_components=n_topics, random_state=1, alpha=.1, l1_ratio=.5, init='nndsvd').fit(tfidf) return TOPIC(tfidf_features, nmf)
Example #5
Source File: topics.py From atap with Apache License 2.0 | 6 votes |
def __init__(self, n_topics=50, estimator='LDA'): """ n_topics is the desired number of topics To use Latent Semantic Analysis, set estimator to 'LSA', To use Non-Negative Matrix Factorization, set estimator to 'NMF', otherwise, defaults to Latent Dirichlet Allocation ('LDA'). """ self.n_topics = n_topics if estimator == 'LSA': self.estimator = TruncatedSVD(n_components=self.n_topics) elif estimator == 'NMF': self.estimator = NMF(n_components=self.n_topics) else: self.estimator = LatentDirichletAllocation(n_topics=self.n_topics) self.model = Pipeline([ ('norm', TextNormalizer()), ('tfidf', CountVectorizer(tokenizer=identity, preprocessor=None, lowercase=False)), ('model', self.estimator) ])
Example #6
Source File: boosted_embedding.py From BoostedFactorization with GNU General Public License v3.0 | 6 votes |
def fit_and_score_NMF(self, new_residuals): """ Factorizing a residual matrix, returning the approximate target and an embedding. :param new_residuals: Input target matrix. :return scores: Approximate target matrix. :return W: Embedding matrix. """ model = NMF(n_components=self.args.dimensions, init="random", verbose=False, alpha=self.args.alpha) W = model.fit_transform(new_residuals) H = model.components_ print("Scoring started.\n") sub_scores = np.sum(np.multiply(W[self.index_1, :], H[:, self.index_2].T), axis=1) scores = np.maximum(self.residuals.data-sub_scores, 0) scores = sparse.csr_matrix((scores, (self.index_1, self.index_2)), shape=self.shape, dtype=np.float32) return scores, W
Example #7
Source File: plot_nmf.py From sklearn-onnx with MIT License | 6 votes |
def nmf_to_onnx(W, H, op_version=12): """ The function converts a NMF described by matrices *W*, *H* (*WH* approximate training data *M*). into a function which takes two indices *(i, j)* and returns the predictions for it. It assumes these indices applies on the training data. """ col = OnnxArrayFeatureExtractor(H, 'col') row = OnnxArrayFeatureExtractor(W.T, 'row') dot = OnnxMul(col, row, op_version=op_version) res = OnnxReduceSum(dot, output_names="rec", op_version=op_version) indices_type = np.array([0], dtype=np.int64) onx = res.to_onnx(inputs={'col': indices_type, 'row': indices_type}, outputs=[('rec', FloatTensorType((None, 1)))], target_opset=op_version) return onx
Example #8
Source File: factor.py From GraphRole with MIT License | 6 votes |
def get_nmf_decomposition( X: np.ndarray, n_roles: int, ) -> FactorTuple: """ Compute NMF decomposition :param X: matrix to factor :param n_roles: rank of decomposition """ nmf = NMF(n_components=n_roles, solver='mu', init='nndsvda') with warnings.catch_warnings(): # ignore convergence warning from NMF since # this will result in a large cost anyways warnings.simplefilter('ignore') G = nmf.fit_transform(X) F = nmf.components_ return G, F
Example #9
Source File: motif_count.py From role2vec with GNU General Public License v3.0 | 6 votes |
def factorize_string_matrix(self): """ Creating string labels by factorization. """ rows = [node for node, features in self.binned_features.items() for feature in features] columns = [int(feature) for node, features in self.binned_features.items() for feature in features] scores = [1 for i in range(len(columns))] row_number = max(rows)+1 column_number = max(columns)+1 features = csr_matrix((scores, (rows, columns)), shape=(row_number, column_number)) model = NMF(n_components=self.args.factors, init="random", random_state=self.args.seed, alpha=self.args.beta) factors = model.fit_transform(features) kmeans = KMeans(n_clusters=self.args.clusters, random_state=self.args.seed).fit(factors) labels = kmeans.labels_ features = {str(node): str(labels[node]) for node in self.graph.nodes()} return features
Example #10
Source File: boostne.py From karateclub with GNU General Public License v3.0 | 6 votes |
def _fit_and_score_NMF(self, new_residuals): """ Factorizing a residual matrix, returning the approximate target, and an embedding. Arg types: * **new_residuals** *(COO Scipy matrix)* - The residual matrix. Return types: * **scores** *(COO Scipy matrix)* - The residual scores. * **W** *(Numpy array)* - The embedding matrix. """ model = NMF(n_components=self.dimensions, init="random", verbose=False, alpha=self.alpha) W = model.fit_transform(new_residuals) H = model.components_ sub_scores = np.sum(np.multiply(W[self._index_1, :], H[:, self._index_2].T), axis=1) scores = np.maximum(self._residuals.data-sub_scores, 0) scores = sparse.csr_matrix((scores, (self._index_1, self._index_2)), shape=self._shape, dtype=np.float32) return scores, W
Example #11
Source File: nmf.py From topic-stability with Apache License 2.0 | 6 votes |
def apply( self, X, k = 2 ): """ Apply NMF to the specified document-term matrix X. """ import nimfa self.W = None self.H = None initialize_only = self.max_iters < 1 if self.update == "euclidean": objective = "fro" else: objective = "div" lsnmf = nimfa.Lsnmf(X, max_iter = self.max_iters, rank = k, seed = self.init_strategy, update = self.update, objective = objective, test_conv = self.test_conv ) res = lsnmf() # TODO: fix try: self.W = res.basis().todense() self.H = res.coef().todense() except: self.W = res.basis() self.H = res.coef() # last number of iterations self.n_iter = res.n_iter
Example #12
Source File: main.py From yelp with GNU Lesser General Public License v2.1 | 6 votes |
def factorize_nmf(): print('factorizing matrix') newsgroups_mmf_file = '/Users/fpena/tmp/nmf_graphlab/newsgroups/newsgroups_matrix.mmf' document_term_matrix = mmread(newsgroups_mmf_file) factorizer = decomposition.NMF( init="nndsvd", n_components=Constants.TOPIC_MODEL_NUM_TOPICS, max_iter=Constants.TOPIC_MODEL_ITERATIONS, alpha=Constants.NMF_REGULARIZATION, l1_ratio=Constants.NMF_REGULARIZATION_RATIO ) document_topic_matrix = \ factorizer.fit_transform(document_term_matrix) topic_term_matrix = factorizer.components_ # mmwrite(mmf_file, small_matrix) # mmwrite(newsgroups_mmf_file, X)
Example #13
Source File: nmf_context_extractor.py From yelp with GNU Lesser General Public License v2.1 | 5 votes |
def build_stable_topic_model(self): matrices = [] for i in range(Constants.TOPIC_MODEL_PASSES): topic_term_matrix = self.build_single_topic_model().transpose() matrices.append(topic_term_matrix) stack_matrix = numpy.hstack(matrices) stack_matrix = normalize(stack_matrix, axis=0) stack_matrix = stack_matrix.transpose() print "Stack matrix M of size %s" % str(stack_matrix.shape) self.topic_model = decomposition.NMF( init="nndsvd", n_components=self.num_topics, max_iter=Constants.TOPIC_MODEL_ITERATIONS, alpha=Constants.NMF_REGULARIZATION, l1_ratio=Constants.NMF_REGULARIZATION_RATIO ) self.document_topic_matrix = \ self.topic_model.fit_transform(stack_matrix) self.topic_term_matrix = self.topic_model.components_ row_sums = self.topic_term_matrix.sum(axis=1) self.topic_term_matrix /= row_sums[:, numpy.newaxis] print "Generated factor W of size %s and factor H of size %s" % ( str(self.document_topic_matrix.shape), str(self.topic_term_matrix.shape) ) # return model
Example #14
Source File: nmf_context_extractor.py From yelp with GNU Lesser General Public License v2.1 | 5 votes |
def build_single_topic_model(self): # print('%s: building NMF topic model' % # time.strftime("%Y/%m/%d-%H:%M:%S")) topic_model = decomposition.NMF( init="nndsvd", n_components=self.num_topics, max_iter=Constants.TOPIC_MODEL_ITERATIONS, alpha=Constants.NMF_REGULARIZATION, l1_ratio=Constants.NMF_REGULARIZATION_RATIO ) topic_model.fit_transform(self.document_term_matrix) topic_term_matrix = topic_model.components_ return topic_term_matrix
Example #15
Source File: nmf.py From yelp with GNU Lesser General Public License v2.1 | 5 votes |
def rank_terms( self, topic_index, top = -1 ): """ Return the top ranked terms for the specified topic, generated during the last NMF run. """ if self.H is None: raise ValueError("No results for previous run available") # NB: reverse top_indices = np.argsort( self.H[topic_index,:] )[::-1] # truncate if necessary if top < 1 or top > len(top_indices): return top_indices return top_indices[0:top]
Example #16
Source File: topic.py From Python-DevOps with MIT License | 5 votes |
def train_nmf(corpus,n_topics=10, max_df=0.95, min_df=2,cleaning=clearstring,stop_words='english'): if cleaning is not None: for i in range(len(corpus)): corpus[i] = cleaning(corpus[i]) tfidf_vectorizer = TfidfVectorizer(max_df = max_df, min_df = min_df, stop_words = stop_words) tfidf = tfidf_vectorizer.fit_transform(corpus) tfidf_features = tfidf_vectorizer.get_feature_names() nmf = NMF(n_components=n_topics, random_state = 1, alpha =.1, l1_ratio=.5, init = 'nndsvd').fit(tfidf) return TOPIC(tfidf_features,nmf)
Example #17
Source File: decomposition.py From hypers with BSD 3-Clause "New" or "Revised" License | 5 votes |
def calculate(self, n_components: int = 4, **kwargs) -> Tuple[np.ndarray, np.ndarray]: if n_components is None: n_components = self.X.shape[-1] mdl = NMF(n_components=n_components, **kwargs) self.ims = mdl.fit_transform(self.X.collapse()).reshape(self.X.data.shape[:-1] + (n_components,)) self.spcs = mdl.components_.transpose() return self.ims, self.spcs
Example #18
Source File: nmf.py From topic-ensemble with Apache License 2.0 | 5 votes |
def apply( self, X, k = 2, init_W = None, init_H = None ): """ Apply NMF to the specified document-term matrix X. """ self.W = None self.H = None random_seed = np.random.randint( 1, 100000 ) if not (init_W is None or init_H is None): model = decomposition.NMF( init="custom", n_components=k, max_iter=self.max_iters, random_state = random_seed ) self.W = model.fit_transform( X, W=init_W, H=init_H ) else: model = decomposition.NMF( init=self.init_strategy, n_components=k, max_iter=self.max_iters, random_state = random_seed ) self.W = model.fit_transform( X ) self.H = model.components_
Example #19
Source File: nmf.py From topic-ensemble with Apache License 2.0 | 5 votes |
def rank_terms( self, topic_index, top = -1 ): """ Return the top ranked terms for the specified topic, generated during the last NMF run. """ if self.H is None: raise ValueError("No results for previous run available") # NB: reverse top_indices = np.argsort( self.H[topic_index,:] )[::-1] # truncate if necessary if top < 1 or top > len(top_indices): return top_indices return top_indices[0:top]
Example #20
Source File: NMF.py From mltk-algo-contrib with Apache License 2.0 | 5 votes |
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), floats=['beta_loss','tol','alpha','l1_ratio'], strs=['init','solver'], ints=['k','max_iter','random_state'], bools=['versbose','shuffle'], aliases={'k': 'n_components'} ) self.estimator = _NMF(**out_params)
Example #21
Source File: NMFRecommender.py From RecSys2019_DeepLearning_Evaluation with GNU Affero General Public License v3.0 | 5 votes |
def fit(self, num_factors=100, l1_ratio = 0.5, solver = "multiplicative_update", init_type = "random", beta_loss = "frobenius", verbose = False, random_seed = None): assert l1_ratio>= 0 and l1_ratio<=1, "{}: l1_ratio must be between 0 and 1, provided value was {}".format(self.RECOMMENDER_NAME, l1_ratio) if solver not in self.SOLVER_VALUES: raise ValueError("Value for 'solver' not recognized. Acceptable values are {}, provided was '{}'".format(self.SOLVER_VALUES.keys(), solver)) if init_type not in self.INIT_VALUES: raise ValueError("Value for 'init_type' not recognized. Acceptable values are {}, provided was '{}'".format(self.INIT_VALUES, init_type)) if beta_loss not in self.BETA_LOSS_VALUES: raise ValueError("Value for 'beta_loss' not recognized. Acceptable values are {}, provided was '{}'".format(self.BETA_LOSS_VALUES, beta_loss)) self._print("Computing NMF decomposition...") nmf_solver = NMF(n_components = num_factors, init = init_type, solver = self.SOLVER_VALUES[solver], beta_loss = beta_loss, random_state = random_seed, l1_ratio = l1_ratio, shuffle = True, verbose = verbose, max_iter = 500) nmf_solver.fit(self.URM_train) self.ITEM_factors = nmf_solver.components_.copy().T self.USER_factors = nmf_solver.transform(self.URM_train) self._print("Computing NMF decomposition... Done!")
Example #22
Source File: test_estimator_checks.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_check_estimator_clones(): # check that check_estimator doesn't modify the estimator it receives from sklearn.datasets import load_iris iris = load_iris() for Estimator in [GaussianMixture, LinearRegression, RandomForestClassifier, NMF, SGDClassifier, MiniBatchKMeans]: with ignore_warnings(category=FutureWarning): # when 'est = SGDClassifier()' est = Estimator() set_checking_parameters(est) set_random_state(est) # without fitting old_hash = joblib.hash(est) check_estimator(est) assert_equal(old_hash, joblib.hash(est)) with ignore_warnings(category=FutureWarning): # when 'est = SGDClassifier()' est = Estimator() set_checking_parameters(est) set_random_state(est) # with fitting est.fit(iris.data + 10, iris.target) old_hash = joblib.hash(est) check_estimator(est) assert_equal(old_hash, joblib.hash(est))
Example #23
Source File: nmf.py From topic-stability with Apache License 2.0 | 5 votes |
def apply( self, X, k = 2 ): """ Apply NMF to the specified document-term matrix X. """ from sklearn import decomposition self.W = None self.H = None model = decomposition.NMF(init=self.init_strategy, n_components=k, max_iter=self.max_iters) self.W = model.fit_transform(X) self.H = model.components_
Example #24
Source File: nmf.py From topic-stability with Apache License 2.0 | 5 votes |
def rank_terms( self, topic_index, top = -1 ): """ Return the top ranked terms for the specified topic, generated during the last NMF run. """ if self.H is None: raise ValueError("No results for previous run available") # NB: reverse top_indices = np.argsort( self.H[topic_index,:] )[::-1] # truncate if necessary if top < 1 or top > len(top_indices): return top_indices return top_indices[0:top]
Example #25
Source File: nmf.py From topic-stability with Apache License 2.0 | 5 votes |
def rank_terms( self, topic_index, top = -1 ): """ Return the top ranked terms for the specified topic, generated during the last NMF run. """ if self.H is None: raise ValueError("No results for previous run available") h = np.array( self.H[topic_index,:] ).flatten() # NB: reverse ordering top_indices = np.argsort(h)[::-1] # truncate if top < 1 or top > len(top_indices): return top_indices return top_indices[0:top]
Example #26
Source File: danmf.py From DANMF with GNU General Public License v3.0 | 5 votes |
def sklearn_pretrain(self, i): """ Pretraining a single layer of the model with sklearn. :param i: Layer index. """ nmf_model = NMF(n_components=self.args.layers[i], init="random", random_state=self.args.seed, max_iter=self.args.pre_iterations) U = nmf_model.fit_transform(self.Z) V = nmf_model.components_ return U, V
Example #27
Source File: SentencesForTopicModeling.py From scattertext with Apache License 2.0 | 5 votes |
def get_topics_from_model( self, pipe=Pipeline([ ('tfidf', TfidfTransformer(sublinear_tf=True)), ('nmf', (NMF(n_components=30, alpha=.1, l1_ratio=.5, random_state=0)))]), num_terms_per_topic=10): ''' Parameters ---------- pipe : Pipeline For example, `Pipeline([ ('tfidf', TfidfTransformer(sublinear_tf=True)), ('nmf', (NMF(n_components=30, alpha=.1, l1_ratio=.5, random_state=0)))])` The last transformer must populate a `components_` attribute when finished. num_terms_per_topic : int Returns ------- dict: {term: [term1, ...], ...} ''' pipe.fit_transform(self.sentX) topic_model = {} for topic_idx, topic in enumerate(pipe._final_estimator.components_): term_list = [self.termidxstore.getval(i) for i in topic.argsort()[:-num_terms_per_topic - 1:-1] if topic[i] > 0] if len(term_list) > 0: topic_model['%s. %s' % (topic_idx, term_list[0])] = term_list else: Warning("Topic %s has no terms with scores > 0. Omitting." % (topic_idx)) return topic_model
Example #28
Source File: utils.py From MNIST-baselines with MIT License | 5 votes |
def skNMF(data, dim): model = NMF(n_components=dim) model.fit(data) return model.transform(data) # Max-min norm
Example #29
Source File: danmf.py From karateclub with GNU General Public License v3.0 | 5 votes |
def _sklearn_pretrain(self, i): """ Pre-training a single layer of the model with sklearn. Arg types: * **i** *(int)* - The layer index. """ nmf_model = NMF(n_components=self.layers[i], init="random", random_state=self.seed, max_iter=self.pre_iterations) U = nmf_model.fit_transform(self._Z) V = nmf_model.components_ return U, V
Example #30
Source File: danmf.py From karateclub with GNU General Public License v3.0 | 5 votes |
def _pre_training(self): """ Pre-training each NMF layer. """ self._U_s = [] self._V_s = [] for i in range(self._p): self._setup_z(i) U, V = self._sklearn_pretrain(i) self._U_s.append(U) self._V_s.append(V)