Python Examples of sklearn.decomposition.IncrementalPCA

Source File: test_incremental_pca.py From Mastering-Elasticsearch-7.0 with MIT License

8 votes

def test_incremental_pca_partial_fit():
    # Test that fit and partial_fit get equivalent results.
    rng = np.random.RandomState(1999)
    n, p = 50, 3
    X = rng.randn(n, p)  # spherical data
    X[:, 1] *= .00001  # make middle component relatively small
    X += [5, 4, 3]  # make a large mean

    # same check that we can find the original data from the transformed
    # signal (since the data is almost of rank n_components)
    batch_size = 10
    ipca = IncrementalPCA(n_components=2, batch_size=batch_size).fit(X)
    pipca = IncrementalPCA(n_components=2, batch_size=batch_size)
    # Add one to make sure endpoint is included
    batch_itr = np.arange(0, n + 1, batch_size)
    for i, j in zip(batch_itr[:-1], batch_itr[1:]):
        pipca.partial_fit(X[i:j, :])
    assert_almost_equal(ipca.components_, pipca.components_, decimal=3)

Source File: test_decomposition.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

7 votes

def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.decomposition.PCA, decomposition.PCA)
        self.assertIs(df.decomposition.IncrementalPCA,
                      decomposition.IncrementalPCA)
        self.assertIs(df.decomposition.KernelPCA, decomposition.KernelPCA)
        self.assertIs(df.decomposition.FactorAnalysis,
                      decomposition.FactorAnalysis)
        self.assertIs(df.decomposition.FastICA, decomposition.FastICA)
        self.assertIs(df.decomposition.TruncatedSVD, decomposition.TruncatedSVD)
        self.assertIs(df.decomposition.NMF, decomposition.NMF)
        self.assertIs(df.decomposition.SparsePCA, decomposition.SparsePCA)
        self.assertIs(df.decomposition.MiniBatchSparsePCA,
                      decomposition.MiniBatchSparsePCA)
        self.assertIs(df.decomposition.SparseCoder, decomposition.SparseCoder)
        self.assertIs(df.decomposition.DictionaryLearning,
                      decomposition.DictionaryLearning)
        self.assertIs(df.decomposition.MiniBatchDictionaryLearning,
                      decomposition.MiniBatchDictionaryLearning)

        self.assertIs(df.decomposition.LatentDirichletAllocation,
                      decomposition.LatentDirichletAllocation)

Source File: test_incremental_pca.py From dask-ml with BSD 3-Clause "New" or "Revised" License

6 votes

def test_incremental_pca_validation():
    # Test that n_components is >=1 and <= n_features.
    X = np.array([[0, 1, 0], [1, 0, 0]])
    X = da.from_array(X, chunks=[4, -1])
    n_samples, n_features = X.shape
    for n_components in [-1, 0, 0.99, 4]:
        with pytest.raises(
            ValueError,
            match="n_components={} invalid"
            " for n_features={}, need more rows than"
            " columns for IncrementalPCA"
            " processing".format(n_components, n_features),
        ):
            IncrementalPCA(n_components, batch_size=10).fit(X)

    # Tests that n_components is also <= n_samples.
    n_components = 3
    with pytest.raises(
        ValueError,
        match="n_components={} must be"
        " less or equal to the batch number of"
        " samples {}".format(n_components, n_samples),
    ):
        IncrementalPCA(n_components=n_components).partial_fit(X)

Source File: test_incremental_pca.py From dask-ml with BSD 3-Clause "New" or "Revised" License

6 votes

def test_incremental_pca_check_projection():
    # Test that the projection of data is correct.
    rng = np.random.RandomState(1999)
    n, p = 100, 3
    X = rng.randn(n, p) * 0.1
    X[:10] += np.array([3, 4, 5])
    Xt = 0.1 * rng.randn(1, p) + np.array([3, 4, 5])
    X = da.from_array(X, chunks=(3, -1))
    Xt = da.from_array(Xt, chunks=(4, 3))

    # Get the reconstruction of the generated data X
    # Note that Xt has the same "components" as X, just separated
    # This is what we want to ensure is recreated correctly
    Yt = IncrementalPCA(n_components=2).fit(X).transform(Xt)
    assert isinstance(Yt, da.Array)

    # Normalize
    Yt /= np.sqrt((Yt ** 2).sum())

    # Make sure that the first element of Yt is ~1, this means
    # the reconstruction worked as expected
    assert_almost_equal(np.abs(Yt[0][0]), 1.0, 1)

Source File: test_incremental_pca.py From twitter-stock-recommendation with MIT License

6 votes

def test_incremental_pca():
    # Incremental PCA on dense arrays.
    X = iris.data
    batch_size = X.shape[0] // 3
    ipca = IncrementalPCA(n_components=2, batch_size=batch_size)
    pca = PCA(n_components=2)
    pca.fit_transform(X)

    X_transformed = ipca.fit_transform(X)

    np.testing.assert_equal(X_transformed.shape, (X.shape[0], 2))
    assert_almost_equal(ipca.explained_variance_ratio_.sum(),
                        pca.explained_variance_ratio_.sum(), 1)

    for n_components in [1, 2, X.shape[1]]:
        ipca = IncrementalPCA(n_components, batch_size=batch_size)
        ipca.fit(X)
        cov = ipca.get_covariance()
        precision = ipca.get_precision()
        assert_array_almost_equal(np.dot(cov, precision),
                                  np.eye(X.shape[1]))

Source File: test_incremental_pca.py From twitter-stock-recommendation with MIT License

6 votes

def test_incremental_pca_check_projection():
    # Test that the projection of data is correct.
    rng = np.random.RandomState(1999)
    n, p = 100, 3
    X = rng.randn(n, p) * .1
    X[:10] += np.array([3, 4, 5])
    Xt = 0.1 * rng.randn(1, p) + np.array([3, 4, 5])

    # Get the reconstruction of the generated data X
    # Note that Xt has the same "components" as X, just separated
    # This is what we want to ensure is recreated correctly
    Yt = IncrementalPCA(n_components=2).fit(X).transform(Xt)

    # Normalize
    Yt /= np.sqrt((Yt ** 2).sum())

    # Make sure that the first element of Yt is ~1, this means
    # the reconstruction worked as expected
    assert_almost_equal(np.abs(Yt[0][0]), 1., 1)

Source File: luminolFunc.py From pyodds with MIT License

6 votes

def fit(self,X):
        """Fit detector.
        Parameters
        ----------
        X : dataframe of shape (n_samples, n_features)
            The input samples.
        """
        # a=str(ts[:,0])
        X=X.to_numpy()
        timestamp = np.asarray(X[:,0].astype(np.datetime64))
        pca = IncrementalPCA(n_components=1)
        value=np.reshape(pca.fit_transform(X[:,1:]),-1)
        X = pd.Series(value, index=timestamp)
        X.index = X.index.map(lambda d: to_epoch(str(d)))
        lts = TimeSeries(X.to_dict())
        self.ts=timestamp
        self.ts_value=value
        self.detector = anomaly_detector.AnomalyDetector(lts)

        return self

Source File: test_incremental_pca.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_incremental_pca_partial_fit_float_division():
    # Test to ensure float division is used in all versions of Python
    # (non-regression test for issue #9489)

    rng = np.random.RandomState(0)
    A = rng.randn(5, 3) + 2
    B = rng.randn(7, 3) + 5

    pca = IncrementalPCA(n_components=2)
    pca.partial_fit(A)
    # Set n_samples_seen_ to be a floating point number instead of an int
    pca.n_samples_seen_ = float(pca.n_samples_seen_)
    pca.partial_fit(B)
    singular_vals_float_samples_seen = pca.singular_values_

    pca2 = IncrementalPCA(n_components=2)
    pca2.partial_fit(A)
    pca2.partial_fit(B)
    singular_vals_int_samples_seen = pca2.singular_values_

    np.testing.assert_allclose(singular_vals_float_samples_seen,
                               singular_vals_int_samples_seen)

Source File: test_incremental_pca.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_whitening():
    # Test that PCA and IncrementalPCA transforms match to sign flip.
    X = datasets.make_low_rank_matrix(1000, 10, tail_strength=0.,
                                      effective_rank=2, random_state=1999)
    prec = 3
    n_samples, n_features = X.shape
    for nc in [None, 9]:
        pca = PCA(whiten=True, n_components=nc).fit(X)
        ipca = IncrementalPCA(whiten=True, n_components=nc,
                              batch_size=250).fit(X)

        Xt_pca = pca.transform(X)
        Xt_ipca = ipca.transform(X)
        assert_almost_equal(np.abs(Xt_pca), np.abs(Xt_ipca), decimal=prec)
        Xinv_ipca = ipca.inverse_transform(Xt_ipca)
        Xinv_pca = pca.inverse_transform(Xt_pca)
        assert_almost_equal(X, Xinv_ipca, decimal=prec)
        assert_almost_equal(X, Xinv_pca, decimal=prec)
        assert_almost_equal(Xinv_pca, Xinv_ipca, decimal=prec)

Source File: test_incremental_pca.py From dask-ml with BSD 3-Clause "New" or "Revised" License

6 votes

def test_incremental_pca_set_params():
    # Test that components_ sign is stable over batch sizes.
    rng = np.random.RandomState(1999)
    n_samples = 100
    n_features = 20
    X = rng.randn(n_samples, n_features)
    X2 = rng.randn(n_samples, n_features)
    X3 = rng.randn(n_samples, n_features)
    X = da.from_array(X, chunks=[4, -1])
    X2 = da.from_array(X2, chunks=[4, -1])
    X3 = da.from_array(X3, chunks=[4, -1])

    ipca = IncrementalPCA(n_components=20)
    ipca.fit(X)
    # Decreasing number of components
    ipca.set_params(n_components=10)
    with pytest.raises(ValueError):
        ipca.partial_fit(X2)
    # Increasing number of components
    ipca.set_params(n_components=15)
    with pytest.raises(ValueError):
        ipca.partial_fit(X3)
    # Returning to original setting
    ipca.set_params(n_components=20)
    ipca.partial_fit(X)

Source File: pca.py From tedana with GNU Lesser General Public License v2.1

6 votes

def low_mem_pca(data):
    """
    Run Singular Value Decomposition (SVD) on input data.

    Parameters
    ----------
    data : (S [*E] x T) array_like
        Optimally combined (S x T) or full multi-echo (S*E x T) data.

    Returns
    -------
    u : (S [*E] x C) array_like
        Component weight map for each component.
    s : (C,) array_like
        Variance explained for each component.
    v : (C x T) array_like
        Component timeseries.
    """
    from sklearn.decomposition import IncrementalPCA
    ppca = IncrementalPCA(n_components=(data.shape[-1] - 1))
    ppca.fit(data)
    v = ppca.components_.T
    s = ppca.explained_variance_
    u = np.dot(np.dot(data, v), np.diag(1. / s))
    return u, s, v

Source File: test_incremental_pca.py From twitter-stock-recommendation with MIT License

6 votes

def test_incremental_pca_set_params():
    # Test that components_ sign is stable over batch sizes.
    rng = np.random.RandomState(1999)
    n_samples = 100
    n_features = 20
    X = rng.randn(n_samples, n_features)
    X2 = rng.randn(n_samples, n_features)
    X3 = rng.randn(n_samples, n_features)
    ipca = IncrementalPCA(n_components=20)
    ipca.fit(X)
    # Decreasing number of components
    ipca.set_params(n_components=10)
    assert_raises(ValueError, ipca.partial_fit, X2)
    # Increasing number of components
    ipca.set_params(n_components=15)
    assert_raises(ValueError, ipca.partial_fit, X3)
    # Returning to original setting
    ipca.set_params(n_components=20)
    ipca.partial_fit(X)

Source File: test_incremental_pca.py From dask-ml with BSD 3-Clause "New" or "Revised" License

6 votes

def test_whitening(svd_solver):
    # Test that PCA and IncrementalPCA transforms match to sign flip.
    X = datasets.make_low_rank_matrix(
        1000, 10, tail_strength=0.0, effective_rank=2, random_state=1999
    )
    X = da.from_array(X, chunks=[200, -1])
    prec = 3
    n_samples, n_features = X.shape
    for nc in [None, 9]:
        pca = PCA(whiten=True, n_components=nc, svd_solver=svd_solver).fit(X)
        ipca = IncrementalPCA(
            whiten=True, n_components=nc, batch_size=250, svd_solver=svd_solver
        ).fit(X)

        Xt_pca = pca.transform(X)
        Xt_ipca = ipca.transform(X)
        assert_almost_equal(np.abs(Xt_pca), np.abs(Xt_ipca), decimal=prec)
        Xinv_ipca = ipca.inverse_transform(Xt_ipca)
        Xinv_pca = pca.inverse_transform(Xt_pca)
        assert_almost_equal(X.compute(), Xinv_ipca, decimal=prec)
        assert_almost_equal(X.compute(), Xinv_pca, decimal=prec)
        assert_almost_equal(Xinv_pca, Xinv_ipca, decimal=prec)

Source File: test_incremental_pca.py From twitter-stock-recommendation with MIT License

6 votes

def test_incremental_pca_partial_fit():
    # Test that fit and partial_fit get equivalent results.
    rng = np.random.RandomState(1999)
    n, p = 50, 3
    X = rng.randn(n, p)  # spherical data
    X[:, 1] *= .00001  # make middle component relatively small
    X += [5, 4, 3]  # make a large mean

    # same check that we can find the original data from the transformed
    # signal (since the data is almost of rank n_components)
    batch_size = 10
    ipca = IncrementalPCA(n_components=2, batch_size=batch_size).fit(X)
    pipca = IncrementalPCA(n_components=2, batch_size=batch_size)
    # Add one to make sure endpoint is included
    batch_itr = np.arange(0, n + 1, batch_size)
    for i, j in zip(batch_itr[:-1], batch_itr[1:]):
        pipca.partial_fit(X[i:j, :])
    assert_almost_equal(ipca.components_, pipca.components_, decimal=3)

Source File: test_incremental_pca.py From dask-ml with BSD 3-Clause "New" or "Revised" License

6 votes

def test_explained_variances(svd_solver):
    # Test that PCA and IncrementalPCA calculations match
    X = datasets.make_low_rank_matrix(
        1000, 100, tail_strength=0.0, effective_rank=10, random_state=1999
    )
    X = da.from_array(X, chunks=[400, -1])
    prec = 3
    n_samples, n_features = X.shape
    for nc in [None, 99]:
        pca = PCA(n_components=nc, svd_solver=svd_solver).fit(X)
        ipca = IncrementalPCA(
            n_components=nc, batch_size=100, svd_solver=svd_solver
        ).fit(X)
        assert_almost_equal(
            pca.explained_variance_, ipca.explained_variance_, decimal=prec
        )
        assert_almost_equal(
            pca.explained_variance_ratio_, ipca.explained_variance_ratio_, decimal=prec
        )
        assert_almost_equal(pca.noise_variance_, ipca.noise_variance_, decimal=prec)

Source File: test_incremental_pca.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_n_components_none():
    # Ensures that n_components == None is handled correctly
    rng = np.random.RandomState(1999)
    for n_samples, n_features in [(50, 10), (10, 50)]:
        X = rng.rand(n_samples, n_features)
        ipca = IncrementalPCA(n_components=None)

        # First partial_fit call, ipca.n_components_ is inferred from
        # min(X.shape)
        ipca.partial_fit(X)
        assert ipca.n_components_ == min(X.shape)

        # Second partial_fit call, ipca.n_components_ is inferred from
        # ipca.components_ computed from the first partial_fit call
        ipca.partial_fit(X)
        assert ipca.n_components_ == ipca.components_.shape[0]

Source File: test_incremental_pca.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_incremental_pca_validation():
    # Test that n_components is >=1 and <= n_features.
    X = np.array([[0, 1, 0], [1, 0, 0]])
    n_samples, n_features = X.shape
    for n_components in [-1, 0, .99, 4]:
        assert_raises_regex(ValueError,
                            "n_components={} invalid for n_features={}, need"
                            " more rows than columns for IncrementalPCA "
                            "processing".format(n_components, n_features),
                            IncrementalPCA(n_components, batch_size=10).fit, X)

    # Tests that n_components is also <= n_samples.
    n_components = 3
    assert_raises_regex(ValueError,
                        "n_components={} must be less or equal to "
                        "the batch number of samples {}".format(
                            n_components, n_samples),
                        IncrementalPCA(
                            n_components=n_components).partial_fit, X)

Source File: test_incremental_pca.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_incremental_pca_check_projection():
    # Test that the projection of data is correct.
    rng = np.random.RandomState(1999)
    n, p = 100, 3
    X = rng.randn(n, p) * .1
    X[:10] += np.array([3, 4, 5])
    Xt = 0.1 * rng.randn(1, p) + np.array([3, 4, 5])

    # Get the reconstruction of the generated data X
    # Note that Xt has the same "components" as X, just separated
    # This is what we want to ensure is recreated correctly
    Yt = IncrementalPCA(n_components=2).fit(X).transform(Xt)

    # Normalize
    Yt /= np.sqrt((Yt ** 2).sum())

    # Make sure that the first element of Yt is ~1, this means
    # the reconstruction worked as expected
    assert_almost_equal(np.abs(Yt[0][0]), 1., 1)

Source File: test_incremental_pca.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_incremental_pca():
    # Incremental PCA on dense arrays.
    X = iris.data
    batch_size = X.shape[0] // 3
    ipca = IncrementalPCA(n_components=2, batch_size=batch_size)
    pca = PCA(n_components=2)
    pca.fit_transform(X)

    X_transformed = ipca.fit_transform(X)

    np.testing.assert_equal(X_transformed.shape, (X.shape[0], 2))
    assert_almost_equal(ipca.explained_variance_ratio_.sum(),
                        pca.explained_variance_ratio_.sum(), 1)

    for n_components in [1, 2, X.shape[1]]:
        ipca = IncrementalPCA(n_components, batch_size=batch_size)
        ipca.fit(X)
        cov = ipca.get_covariance()
        precision = ipca.get_precision()
        assert_array_almost_equal(np.dot(cov, precision),
                                  np.eye(X.shape[1]))

Source File: test_incremental_pca.py From twitter-stock-recommendation with MIT License

6 votes

def test_whitening():
    # Test that PCA and IncrementalPCA transforms match to sign flip.
    X = datasets.make_low_rank_matrix(1000, 10, tail_strength=0.,
                                      effective_rank=2, random_state=1999)
    prec = 3
    n_samples, n_features = X.shape
    for nc in [None, 9]:
        pca = PCA(whiten=True, n_components=nc).fit(X)
        ipca = IncrementalPCA(whiten=True, n_components=nc,
                              batch_size=250).fit(X)

        Xt_pca = pca.transform(X)
        Xt_ipca = ipca.transform(X)
        assert_almost_equal(np.abs(Xt_pca), np.abs(Xt_ipca), decimal=prec)
        Xinv_ipca = ipca.inverse_transform(Xt_ipca)
        Xinv_pca = pca.inverse_transform(Xt_pca)
        assert_almost_equal(X, Xinv_ipca, decimal=prec)
        assert_almost_equal(X, Xinv_pca, decimal=prec)
        assert_almost_equal(Xinv_pca, Xinv_ipca, decimal=prec)

Source File: test_sklearn_pca_converter.py From sklearn-onnx with MIT License

5 votes

def test_incrementalpca_default(self):
        model, X_test = _fit_model_pca(IncrementalPCA())
        model_onnx = convert_sklearn(
            model,
            initial_types=[("input",
                            FloatTensorType([None, X_test.shape[1]]))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X_test,
            model,
            model_onnx,
            basename="SklearnIncrementalPCADefault",
        )

Source File: test_incremental_pca.py From dask-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def test_incremental_pca_num_features_change():
    # Test that changing n_components will raise an error.
    rng = np.random.RandomState(1999)
    n_samples = 100
    X = rng.randn(n_samples, 20)
    X2 = rng.randn(n_samples, 50)
    X = da.from_array(X, chunks=[4, -1])
    X2 = da.from_array(X2, chunks=[4, -1])

    ipca = IncrementalPCA(n_components=None)
    ipca.fit(X)
    with pytest.raises(ValueError):
        ipca.partial_fit(X2)

Source File: test_incremental_pca.py From dask-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def test_incremental_pca_against_pca_random_data(svd_solver):
    # Test that IncrementalPCA and PCA are approximate (to a sign flip).
    rng = np.random.RandomState(1999)
    n_samples = 100
    n_features = 3
    X = rng.randn(n_samples, n_features) + 5 * rng.rand(1, n_features)
    X = da.from_array(X, chunks=[40, -1])

    Y_pca = PCA(n_components=3, svd_solver=svd_solver).fit_transform(X)
    Y_ipca = IncrementalPCA(
        n_components=3, batch_size=25, svd_solver=svd_solver
    ).fit_transform(X)

    assert_almost_equal(np.abs(Y_pca), np.abs(Y_ipca), 1)

Source File: test_incremental_pca.py From twitter-stock-recommendation with MIT License

5 votes

def test_incremental_pca_inverse():
    # Test that the projection of data can be inverted.
    rng = np.random.RandomState(1999)
    n, p = 50, 3
    X = rng.randn(n, p)  # spherical data
    X[:, 1] *= .00001  # make middle component relatively small
    X += [5, 4, 3]  # make a large mean

    # same check that we can find the original data from the transformed
    # signal (since the data is almost of rank n_components)
    ipca = IncrementalPCA(n_components=2, batch_size=10).fit(X)
    Y = ipca.transform(X)
    Y_inverse = ipca.inverse_transform(Y)
    assert_almost_equal(X, Y_inverse, decimal=3)

Source File: test_incremental_pca.py From twitter-stock-recommendation with MIT License

5 votes

def test_incremental_pca_validation():
    # Test that n_components is >=1 and <= n_features.
    X = [[0, 1], [1, 0]]
    for n_components in [-1, 0, .99, 3]:
        assert_raises(ValueError, IncrementalPCA(n_components,
                                                 batch_size=10).fit, X)

Source File: test_incremental_pca.py From twitter-stock-recommendation with MIT License

5 votes

def test_incremental_pca_batch_signs():
    # Test that components_ sign is stable over batch sizes.
    rng = np.random.RandomState(1999)
    n_samples = 100
    n_features = 3
    X = rng.randn(n_samples, n_features)
    all_components = []
    batch_sizes = np.arange(10, 20)
    for batch_size in batch_sizes:
        ipca = IncrementalPCA(n_components=None, batch_size=batch_size).fit(X)
        all_components.append(ipca.components_)

    for i, j in zip(all_components[:-1], all_components[1:]):
        assert_almost_equal(np.sign(i), np.sign(j), decimal=6)

Source File: test_incremental_pca.py From twitter-stock-recommendation with MIT License

5 votes

def test_incremental_pca_batch_values():
    # Test that components_ values are stable over batch sizes.
    rng = np.random.RandomState(1999)
    n_samples = 100
    n_features = 3
    X = rng.randn(n_samples, n_features)
    all_components = []
    batch_sizes = np.arange(20, 40, 3)
    for batch_size in batch_sizes:
        ipca = IncrementalPCA(n_components=None, batch_size=batch_size).fit(X)
        all_components.append(ipca.components_)

    for i, j in zip(all_components[:-1], all_components[1:]):
        assert_almost_equal(i, j, decimal=1)

Source File: test_incremental_pca.py From twitter-stock-recommendation with MIT License

5 votes

def test_incremental_pca_against_pca_iris():
    # Test that IncrementalPCA and PCA are approximate (to a sign flip).
    X = iris.data

    Y_pca = PCA(n_components=2).fit_transform(X)
    Y_ipca = IncrementalPCA(n_components=2, batch_size=25).fit_transform(X)

    assert_almost_equal(np.abs(Y_pca), np.abs(Y_ipca), 1)

Source File: test_incremental_pca.py From twitter-stock-recommendation with MIT License

5 votes

def test_incremental_pca_against_pca_random_data():
    # Test that IncrementalPCA and PCA are approximate (to a sign flip).
    rng = np.random.RandomState(1999)
    n_samples = 100
    n_features = 3
    X = rng.randn(n_samples, n_features) + 5 * rng.rand(1, n_features)

    Y_pca = PCA(n_components=3).fit_transform(X)
    Y_ipca = IncrementalPCA(n_components=3, batch_size=25).fit_transform(X)

    assert_almost_equal(np.abs(Y_pca), np.abs(Y_ipca), 1)

Source File: test_incremental_pca.py From dask-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def test_incremental_pca_batch_rank():
    # Test sample size in each batch is always larger or equal to n_components
    rng = np.random.RandomState(1999)
    n_samples = 100
    n_features = 20
    X = rng.randn(n_samples, n_features)
    X = da.from_array(X, chunks=[40, -1])
    all_components = []
    batch_sizes = np.arange(20, 90, 3)
    for batch_size in batch_sizes:
        ipca = IncrementalPCA(n_components=20, batch_size=batch_size).fit(X)
        all_components.append(ipca.components_)

    for components_i, components_j in zip(all_components[:-1], all_components[1:]):
        assert_allclose_dense_sparse(components_i, components_j)

Python sklearn.decomposition.IncrementalPCA() Examples