Python Examples of sklearn.decomposition.TruncatedSVD

Source File: recommender.py From atap with Apache License 2.0

7 votes

def fit_transform(self, documents):
        # Vectorizer will be False if pipeline hasn't been fit yet,
        # Trigger fit_transform and save the vectorizer and lexicon.
        if self.vectorizer == False:
            self.lexicon = self.pipeline.fit_transform(documents)
            self.vect = self.pipeline.named_steps['tfidf']
            self.knn = self.pipeline.named_steps['knn']
            self.save()
        # If there's a stored vectorizer and prefitted lexicon,
        # use them instead.
        else:
            self.vect = self.vectorizer
            self.knn = Pipeline([
                ('svd', TruncatedSVD(n_components=100)),
                ('knn', KNNTransformer(k=self.k, algorithm='ball_tree'))
            ])
            self.knn.fit_transform(self.lexicon)

Source File: test_decompose.py From skutil with BSD 3-Clause "New" or "Revised" License

6 votes

def test_selective_tsvd():
    original = X
    cols = [original.columns[0], original.columns[1]]  # Only perform on first two columns...
    compare_cols = np.array(
        original[['petal length (cm)', 'petal width (cm)']].as_matrix())  # should be the same as the trans cols

    transformer = SelectiveTruncatedSVD(cols=cols, n_components=1).fit(original)
    transformed = transformer.transform(original)

    untouched_cols = np.array(transformed[['petal length (cm)', 'petal width (cm)']].as_matrix())
    assert_array_almost_equal(compare_cols, untouched_cols)
    assert 'Concept1' in transformed.columns
    assert transformed.shape[1] == 3
    assert isinstance(transformer.get_decomposition(), TruncatedSVD)
    assert SelectiveTruncatedSVD().get_decomposition() is None  # default None

    # test the selective mixin
    assert isinstance(transformer.cols, list)

Source File: feature_vector_space.py From kaggle-HomeDepot with MIT License

6 votes

def transform(self):
        ## get common vocabulary
        tfidf = self._init_word_ngram_tfidf(self.ngram)
        tfidf.fit(list(self.obs_corpus) + list(self.target_corpus))
        vocabulary = tfidf.vocabulary_
        ## obs tfidf
        tfidf = self._init_word_ngram_tfidf(self.ngram, vocabulary)
        X_obs = tfidf.fit_transform(self.obs_corpus)
        ## targetument tfidf
        tfidf = self._init_word_ngram_tfidf(self.ngram, vocabulary)
        X_target = tfidf.fit_transform(self.target_corpus)
        ## svd
        svd = TruncatedSVD(n_components = self.svd_dim, 
                n_iter=self.svd_n_iter, random_state=config.RANDOM_SEED)
        svd.fit(scipy.sparse.vstack((X_obs, X_target)))
        X_obs = svd.transform(X_obs)
        X_target = svd.transform(X_target)
        ## cosine similarity
        sim = list(map(dist_utils._cosine_sim, X_obs, X_target))
        sim = np.asarray(sim).squeeze()
        return sim

Source File: test_forest.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_random_hasher():
    # test random forest hashing on circles dataset
    # make sure that it is linearly separable.
    # even after projected to two SVD dimensions
    # Note: Not all random_states produce perfect results.
    hasher = RandomTreesEmbedding(n_estimators=30, random_state=1)
    X, y = datasets.make_circles(factor=0.5)
    X_transformed = hasher.fit_transform(X)

    # test fit and transform:
    hasher = RandomTreesEmbedding(n_estimators=30, random_state=1)
    assert_array_equal(hasher.fit(X).transform(X).toarray(),
                       X_transformed.toarray())

    # one leaf active per data point per forest
    assert_equal(X_transformed.shape[0], X.shape[0])
    assert_array_equal(X_transformed.sum(axis=1), hasher.n_estimators)
    svd = TruncatedSVD(n_components=2)
    X_reduced = svd.fit_transform(X_transformed)
    linear_clf = LinearSVC()
    linear_clf.fit(X_reduced, y)
    assert_equal(linear_clf.score(X_reduced, y), 1.)

Source File: feature_vector_space.py From kaggle-HomeDepot with MIT License

6 votes

def transform(self):
        # ngrams
        obs_ngrams = list(map(lambda x: ngram_utils._ngrams(x.split(" "), self.obs_ngram, "_"), self.obs_corpus))
        target_ngrams = list(map(lambda x: ngram_utils._ngrams(x.split(" "), self.target_ngram, "_"), self.target_corpus))
        # cooccurrence ngrams
        cooc_terms = list(map(lambda lst1,lst2: self._get_cooc_terms(lst1, lst2, "X"), obs_ngrams, target_ngrams))
        ## tfidf
        tfidf = self._init_word_ngram_tfidf(ngram=1)
        X = tfidf.fit_transform(cooc_terms)
        ## svd
        svd = TruncatedSVD(n_components=self.svd_dim, 
                n_iter=self.svd_n_iter, random_state=config.RANDOM_SEED)
        return svd.fit_transform(X)


# 2nd in CrowdFlower (preprocessing_mikhail.py)

Source File: feathernode.py From karateclub with GNU General Public License v3.0

6 votes

def _reduce_dimensions(self, X):
        """
        Using Truncated SVD.

        Arg types:
            * **X** *(Scipy COO or Numpy array)* - The wide feature matrix.

        Return types:
            * **X** *(Numpy array)* - The reduced feature matrix of nodes.
        """
        svd = TruncatedSVD(n_components=self.reduction_dimensions,
                           n_iter=self.svd_iterations,
                           random_state=self.seed)
        svd.fit(X)
        X = svd.transform(X)
        return X

Source File: recommender.py From atap with Apache License 2.0

6 votes

def __init__(self, k=3, **kwargs):
        self.k = k
        self.pipeline = Pipeline([
            ('norm', TextNormalizer(minimum=10, maximum=100)),
            ('tfidf', TfidfVectorizer()),
            ('knn', Pipeline([
                ('svd', TruncatedSVD(n_components=100)),
                ('model', KNNTransformer(k=self.k, algorithm='ball_tree'))
            ]))
        ])

        self.lex_path = "lexicon.pkl"
        self.vect_path = "vect.pkl"
        self.vectorizer = False
        self.lexicon = None
        self.load()

Source File: AE_ts_model.py From AE_ts with MIT License

6 votes

def plot_z_run(z_run, label, ):
    f1, ax1 = plt.subplots(2, 1)

    # First fit a PCA
    PCA_model = TruncatedSVD(n_components=3).fit(z_run)
    z_run_reduced = PCA_model.transform(z_run)
    ax1[0].scatter(z_run_reduced[:, 0], z_run_reduced[:, 1], c=label, marker='*', linewidths=0)
    ax1[0].set_title('PCA on z_run')

    # THen fit a tSNE
    tSNE_model = TSNE(verbose=2, perplexity=80, min_grad_norm=1E-12, n_iter=3000)
    z_run_tsne = tSNE_model.fit_transform(z_run)
    ax1[1].scatter(z_run_tsne[:, 0], z_run_tsne[:, 1], c=label, marker='*', linewidths=0)
    ax1[1].set_title('tSNE on z_run')

    plt.show()
    return

Source File: topics.py From atap with Apache License 2.0

6 votes

def __init__(self, n_topics=50, estimator='LDA'):
        """
        n_topics is the desired number of topics
        To use Latent Semantic Analysis, set estimator to 'LSA',
        To use Non-Negative Matrix Factorization, set estimator to 'NMF',
        otherwise, defaults to Latent Dirichlet Allocation ('LDA').
        """
        self.n_topics = n_topics

        if estimator == 'LSA':
            self.estimator = TruncatedSVD(n_components=self.n_topics)
        elif estimator == 'NMF':
            self.estimator = NMF(n_components=self.n_topics)
        else:
            self.estimator = LatentDirichletAllocation(n_topics=self.n_topics)

        self.model = Pipeline([
            ('norm', TextNormalizer()),
            ('tfidf', CountVectorizer(tokenizer=identity,
                                      preprocessor=None, lowercase=False)),
            ('model', self.estimator)
        ])

Source File: build.py From atap with Apache License 2.0

6 votes

def create_pipeline(estimator, reduction=False):

    steps = [
        ('normalize', TextNormalizer()),
        ('vectorize', TfidfVectorizer(
            tokenizer=identity, preprocessor=None, lowercase=False
        ))
    ]

    if reduction:
        steps.append((
            'reduction', TruncatedSVD(n_components=10000)
        ))

    # Add the estimator
    steps.append(('classifier', estimator))
    return Pipeline(steps)

Source File: run.py From themarketingtechnologist with Apache License 2.0

6 votes

def reduce_dimensionality(X, n_features):
        """
        Apply PCA or SVD to reduce dimension to n_features.
        :param X:
        :param n_features:
        :return:
        """
        # Initialize reduction method: PCA or SVD
        # reducer = PCA(n_components=n_features)
        reducer = TruncatedSVD(n_components=n_features)
        # Fit and transform data to n_features-dimensional space
        reducer.fit(X)
        X = reducer.transform(X)
        logging.debug("Reduced number of features to {0}".format(n_features))
        logging.debug("Percentage explained: %s\n" % reducer.explained_variance_ratio_.sum())
        return X

Source File: tadw.py From karateclub with GNU General Public License v3.0

6 votes

def _create_reduced_features(self, X):
        """
        Creating a dense reduced node feature matrix.

        Arg types:
            * **X** *(Scipy COO or Numpy array)* - The wide feature matrix.

        Return types:
            * **T** *(Numpy array)* - The reduced feature matrix of nodes.
        """
        svd = TruncatedSVD(n_components=self.reduction_dimensions,
                           n_iter=self.svd_iterations,
                           random_state=self.seed)
        svd.fit(X)
        T = svd.transform(X)
        return T.T

Source File: reduce_kNN.py From practicalDataAnalysisCookbook with GNU General Public License v2.0

6 votes

def fit_truncatedSVD(data):
    '''
        Fit the model with truncated SVD principal components
    '''
    # keyword parameters for the PCA
    kwrd_params = {
        'algorithm': 'randomized', 
        'n_components': 5, 
        'n_iter': 5,
        'random_state': 42, 
        'tol': 0.0
    }

    # reduce the data
    reduced = reduceDimensions(cd.TruncatedSVD, 
        data, **kwrd_params)

    # prepare the data for the classifier
    data_l = prepare_data(data, reduced, 
        kwrd_params['n_components'])

    # fit the model
    class_fit_predict_print(data_l)

# the file name of the dataset

Source File: test_embeddingsResolver.py From scattertext with Apache License 2.0

6 votes

def test_resolve_embeddings(self):
        tdm = self.corpus.get_unigram_corpus().select(ClassPercentageCompactor(term_count=1))
        embeddings_resolver = EmbeddingsResolver(tdm)
        # embeddings = TruncatedSVD(n_components=20).fit_transform(tdm.get_term_doc_mat().T).T
        # embeddings_resolver.set_embeddings(embeddings)
        embeddings_resolver = embeddings_resolver.set_embeddings(tdm.get_term_doc_mat())
        if self.assertRaisesRegex:
            with self.assertRaisesRegex(Exception,
                                        "You have already set embeddings by running set_embeddings or set_embeddings_model."):
                embeddings_resolver.set_embeddings_model(None)
        embeddings_resolver = EmbeddingsResolver(tdm)

        embeddings_resolver = embeddings_resolver.set_embeddings_model(MockWord2Vec(tdm.get_terms()))
        if self.assertRaisesRegex:
            with self.assertRaisesRegex(Exception,
                                        "You have already set embeddings by running set_embeddings or set_embeddings_model."):
                embeddings_resolver.set_embeddings(tdm.get_term_doc_mat())
        c, axes = embeddings_resolver.project_embeddings(projection_model=TruncatedSVD(3))
        self.assertIsInstance(c, ParsedCorpus)
        self.assertEqual(axes.to_dict(), pd.DataFrame(index=['speak'], data={'x': [0.,], 'y':[0.,]}).to_dict())

Source File: test_truncated_svd.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_truncated_svd_eq_pca():
    # TruncatedSVD should be equal to PCA on centered data

    X_c = X - X.mean(axis=0)

    params = dict(n_components=10, random_state=42)

    svd = TruncatedSVD(algorithm='arpack', **params)
    pca = PCA(svd_solver='arpack', **params)

    Xt_svd = svd.fit_transform(X_c)
    Xt_pca = pca.fit_transform(X_c)

    assert_allclose(Xt_svd, Xt_pca, rtol=1e-9)
    assert_allclose(pca.mean_, 0, atol=1e-9)
    assert_allclose(svd.components_, pca.components_)

Source File: grarep.py From GraRep with GNU General Public License v3.0

6 votes

def optimize(self):
        """
        Learning an embedding.
        """
        print("\nOptimization started.\n")
        self.embeddings = []
        for step in tqdm(range(self.args.order)):
            target_matrix = self._create_target_matrix()

            svd = TruncatedSVD(n_components=self.args.dimensions,
                               n_iter=self.args.iterations,
                               random_state=self.args.seed)

            svd.fit(target_matrix)
            embedding = svd.transform(target_matrix)
            self.embeddings.append(embedding)

Source File: processing.py From CAIL2019 with MIT License

6 votes

def do_tfidf_feature(df, tfidf):
    n_components = 30
    svd = TruncatedSVD(
        n_components=n_components, algorithm="arpack", random_state=2019
    )

    col_tfidf = tfidf.transform(df["col"])

    feature_names = tfidf.get_feature_names()
    ret_df = pd.DataFrame(col_tfidf.toarray(), columns=feature_names)
    return ret_df

    col_svd = svd.fit_transform(col_tfidf)

    best_fearures = [
        feature_names[i] + "i" for i in svd.components_[0].argsort()[::-1]
    ]
    ret_df = pd.DataFrame(col_svd, columns=best_fearures[:n_components])
    return ret_df

Source File: ml_tune.py From ml-parameter-optimization with MIT License

6 votes

def dim_reduction_method(self):
        """
        select dimensionality reduction method
        """
        if self.dim_reduction=='pca':
            return PCA()
        elif self.dim_reduction=='factor-analysis':
            return FactorAnalysis()
        elif self.dim_reduction=='fast-ica':
            return FastICA()
        elif self.dim_reduction=='kernel-pca':
            return KernelPCA()
        elif self.dim_reduction=='sparse-pca':
            return SparsePCA()
        elif self.dim_reduction=='truncated-svd':
            return TruncatedSVD()
        elif self.dim_reduction!=None:
            raise ValueError('%s is not a supported dimensionality reduction method. Valid inputs are: \
                             "pca","factor-analysis","fast-ica,"kernel-pca","sparse-pca","truncated-svd".' 
                             %(self.dim_reduction))

Source File: topic.py From Python-DevOps with MIT License

5 votes

def train_lsa(corpus,n_topics, max_df=0.95, min_df=2,cleaning=clearstring,stop_words='english'):
    if cleaning is not None:
        for i in range(len(corpus)): corpus[i] = cleaning(corpus[i])
    tfidf_vectorizer = TfidfVectorizer(max_df = max_df, min_df = min_df, stop_words = stop_words)
    tfidf = tfidf_vectorizer.fit_transform(corpus)
    tfidf_features = tfidf_vectorizer.get_feature_names()
    tfidf = Normalizer().fit_transform(tfidf)
    lsa = TruncatedSVD(n_topics).fit(tfidf)
    return TOPIC(tfidf_features,lsa)

Source File: word_utils.py From embedding with MIT License

5 votes

def latent_semantic_analysis(corpus_fname, output_fname):
    make_save_path(output_fname)
    corpus = [sent.replace('\n', '').strip() for sent in open(corpus_fname, 'r').readlines()]
    # construct co-occurrence matrix (=word_context)
    # dynamic weight if True. co-occurrence weight = [1, (w-1)/w, (w-2)/w, ... 1/w]
    input_matrix, idx2vocab = sent_to_word_contexts_matrix(
        corpus,
        windows=3,
        min_tf=10,
        dynamic_weight=True,
        verbose=True)
    # compute truncated SVD
    cooc_svd = TruncatedSVD(n_components=100)
    cooc_vecs = cooc_svd.fit_transform(input_matrix)
    with open(output_fname + "-cooc.vecs", 'w') as f1:
        for word, vec in zip(idx2vocab, cooc_vecs):
            str_vec = [str(el) for el in vec]
            f1.writelines(word + ' ' + ' '.join(str_vec) + "\n")
    # Shift PPMI at k=0, (equal PPMI)
    # pmi(word, contexts)
    # px: Probability of rows(items)
    # py: Probability of columns(features)
    pmi_matrix, _, _ = pmi(input_matrix, min_pmi=math.log(5))
    # compute truncated SVD
    pmi_svd = TruncatedSVD(n_components=100)
    pmi_vecs = pmi_svd.fit_transform(input_matrix)
    with open(output_fname + "-pmi.vecs", 'w') as f2:
        for word, vec in zip(idx2vocab, pmi_vecs):
            str_vec = [str(el) for el in vec]
            f2.writelines(word + ' ' + ' '.join(str_vec) + "\n")

Source File: TruncatedSVD.py From mltk-algo-contrib with Apache License 2.0

5 votes

def __init__(self, options):
        self.handle_options(options)
        out_params = convert_params(
            options.get('params', {}),
            floats=['tol'],
            strs=['algorithm'],
            ints=['k','n_iter','random_state'],
            aliases={'k': 'n_components'}
        )

        self.estimator = _TruncatedSVD(**out_params)

Source File: firmware_clustering.py From Firmware_Slap with GNU General Public License v3.0

5 votes

def single_cluster(all_functions, centroid_count=2):
    vect, func_sparse = funcs_to_sparse(all_functions)

    transformer = Normalizer().fit(func_sparse)

    func_sparse = transformer.transform(func_sparse)

    # svd = TruncatedSVD(random_state=2)
    # svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)

    # func_sparse = svd.fit_transform(func_sparse)

    labels = []

    result = KMeans(n_clusters=centroid_count, random_state=2).fit(func_sparse)

    score = silhouette_score(func_sparse,
                             result.labels_,
                             metric="cosine",
                             random_state=2,
                             sample_size=5000)
    labels.append(result.labels_)

    print("Clusters {:<3} | Silhoette Score : {}".format(
        centroid_count, score))

    return result.labels_

Source File: sent_utils.py From embedding with MIT License

5 votes

def latent_semantic_analysis(corpus_fname, output_fname, tokenizer_name="mecab"):
    make_save_path(output_fname)
    tokenizer = get_tokenizer(tokenizer_name)
    titles, raw_corpus, noun_corpus = [], [], []
    with open(corpus_fname, 'r', encoding='utf-8') as f:
        for line in f:
            try:
                title, document = line.strip().split("\u241E")
                titles.append(title)
                raw_corpus.append(document)
                nouns = tokenizer.nouns(document)
                noun_corpus.append(' '.join(nouns))
            except:
                continue
    # construct tf-idf matrix
    vectorizer = TfidfVectorizer(
        min_df=1,
        ngram_range=(1, 1),
        lowercase=True,
        tokenizer=lambda x: x.split())
    input_matrix = vectorizer.fit_transform(noun_corpus)
    # compute truncated SVD
    svd = TruncatedSVD(n_components=100)
    vecs = svd.fit_transform(input_matrix)
    with open(output_fname, 'w') as f:
        for doc_idx, vec in enumerate(vecs):
            str_vec = [str(el) for el in vec]
            f.writelines(titles[doc_idx] + "\u241E" + raw_corpus[doc_idx] + '\u241E' + ' '.join(str_vec) + "\n")

Source File: cluster.py From text-classifier with Apache License 2.0

5 votes

def show_plt(feature_matrix, labels):
    from sklearn.decomposition import TruncatedSVD
    import matplotlib.pyplot as plt
    svd = TruncatedSVD()
    plot_columns = svd.fit_transform(feature_matrix)
    plt.scatter(x=plot_columns[:, 0], y=plot_columns[:, 1], c=labels)
    plt.show()

Source File: learn.py From partisan-discourse with Apache License 2.0

5 votes

def construct_pipeline(classifier):
    """
    This function creates a feature extraction pipeline that accepts data
    from a CorpusLoader and appends the classification model to the end of
    the pipeline, returning a newly constructed Pipeline object that is
    ready to be fit and trained!
    """

    return Pipeline([
        # Create a Feature Union of Text Stats and Bag of Words
        ('union', FeatureUnion(
            transformer_list = [

                # Pipeline for pulling document structure features
                ('stats', Pipeline([
                    ('stats', TextStats()),
                    ('vect', DictVectorizer()),
                ])),

                # Pipeline for creating a bag of words TF-IDF vector
                ('bow', Pipeline([
                    ('tokens', TextNormalizer()),
                    ('tfidf',  TfidfVectorizer(
                        tokenizer=identity, preprocessor=None, lowercase=False
                    )),
                    ('best', TruncatedSVD(n_components=1000)),
                ])),

            ],

            # weight components in feature union
            transformer_weights = {
                'stats': 0.15,
                'bow': 0.85,
            },
        )),

        # Append the estimator to the end of the pipeline
        ('classifier', classifier),
    ])

Source File: category_vector.py From talkingdata-adtracking-fraud-detection with MIT License

5 votes

def create_features_from_dataframe(self, df_train: pd.DataFrame, df_test: pd.DataFrame):
        train_length = len(df_train)
        n_components = 30
        df_data: pd.DataFrame = pd.concat([df_train, df_test])
        pipeline = make_pipeline(
            OneHotEncoder(),
            TfidfTransformer(),
            TruncatedSVD(n_components=30, random_state=71)
        )
        features = pipeline.fit_transform(df_data[['ip', 'app', 'os', 'device', 'channel']].values).astype(np.float32)
        feature_columns = []
        for i in range(n_components):
            feature_columns.append(self.name + '_{}'.format(i))
        return pd.DataFrame(data=features[:train_length], columns=feature_columns), \
               pd.DataFrame(data=features[train_length:], columns=feature_columns)

Source File: category_vector.py From talkingdata-adtracking-fraud-detection with MIT License

5 votes

def create_features_from_dataframe(self, df_train: pd.DataFrame, df_test: pd.DataFrame):
        train_length = len(df_train)
        n_components = 30
        df_data: pd.DataFrame = pd.concat([df_train, df_test])
        pipeline = make_pipeline(
            OneHotEncoder(),
            TruncatedSVD(n_components=n_components, random_state=71)
        )
        features = pipeline.fit_transform(df_data[['ip', 'app', 'os', 'device', 'channel']].values).astype(np.float32)
        feature_columns = []
        for i in range(n_components):
            feature_columns.append(self.name + '_{}'.format(i))
        return pd.DataFrame(data=features[:train_length], columns=feature_columns), \
               pd.DataFrame(data=features[train_length:], columns=feature_columns)

Source File: category_vector.py From talkingdata-adtracking-fraud-detection with MIT License

5 votes

def transformer_factory(self) -> TransformerMixin:
        return TruncatedSVD(n_components=self.width, random_state=71)

Source File: firmware_clustering.py From Firmware_Slap with GNU General Public License v3.0

5 votes

def get_single_cluster(all_functions, centroid_count=2):
    return_dict = {}
    vect, func_sparse = funcs_to_sparse(all_functions)

    transformer = Normalizer().fit(func_sparse)

    func_sparse = transformer.transform(func_sparse)

    # svd = TruncatedSVD(random_state=2)
    # svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)

    # func_sparse = svd.fit_transform(func_sparse)

    labels = []

    result = KMeans(n_clusters=centroid_count, random_state=2).fit(func_sparse)

    score = silhouette_score(func_sparse,
                             result.labels_,
                             metric="cosine",
                             random_state=2,
                             sample_size=5000)
    labels.append(result.labels_)

    #print("Clusters {:<3} | Silhoette Score : {}".format(centroid_count, score))
    return_dict['count'] = centroid_count
    return_dict['score'] = score
    return_dict['labels'] = result.labels_

    return return_dict

Source File: text_char_tfidf_count_transformers.py From driverlessai-recipes with Apache License 2.0

5 votes

def fit_transform(self, X: dt.Frame, y: np.array = None):
        X = X.to_pandas().astype(str).iloc[:, 0].fillna("NA")
        # Count Vectorizer
        self.cnt_vec = CountVectorizer(analyzer="char", ngram_range=(1, self.max_ngram))
        X = self.cnt_vec.fit_transform(X)
        # Truncated SVD
        if len(self.cnt_vec.vocabulary_) <= self.n_svd_comp:
            self.n_svd_comp = len(self.cnt_vec.vocabulary_) - 1
        self.truncated_svd = TruncatedSVD(n_components=self.n_svd_comp, random_state=2019)
        X = self.truncated_svd.fit_transform(X)
        return X

Python sklearn.decomposition.TruncatedSVD() Examples