Python Examples of pandas.SparseDataFrame

Source File: test_frame.py From recruit with Apache License 2.0

6 votes

def test_comparison_op_scalar(self):
        # GH 13001
        df = pd.DataFrame({'A': [nan, nan, 0, 1, ],
                           'B': [0, 1, 2, nan],
                           'C': [1., 2., 3., 4.],
                           'D': [nan, nan, nan, nan]})
        sparse = df.to_sparse()

        # comparison changes internal repr, compare with dense
        res = sparse > 1
        assert isinstance(res, pd.SparseDataFrame)
        tm.assert_frame_equal(res.to_dense(), df > 1)

        res = sparse != 0
        assert isinstance(res, pd.SparseDataFrame)
        tm.assert_frame_equal(res.to_dense(), df != 0)

Source File: test_frame.py From recruit with Apache License 2.0

6 votes

def test_notna(self):
        # GH 8276
        df = pd.SparseDataFrame({'A': [np.nan, np.nan, 1, 2, np.nan],
                                 'B': [0, np.nan, np.nan, 2, np.nan]})

        res = df.notna()
        exp = pd.SparseDataFrame({'A': [False, False, True, True, False],
                                  'B': [True, False, False, True, False]},
                                 default_fill_value=False)
        exp._default_fill_value = np.nan
        tm.assert_sp_frame_equal(res, exp)

        # if fill_value is not nan, True can be included in sp_values
        df = pd.SparseDataFrame({'A': [0, 0, 1, 2, np.nan],
                                 'B': [0, np.nan, 0, 2, np.nan]},
                                default_fill_value=0.)
        res = df.notna()
        assert isinstance(res, pd.SparseDataFrame)
        exp = pd.DataFrame({'A': [True, True, True, True, False],
                            'B': [True, False, True, True, False]})
        tm.assert_frame_equal(res.to_dense(), exp)

Source File: test_frame.py From recruit with Apache License 2.0

6 votes

def test_isna(self):
        # GH 8276
        df = pd.SparseDataFrame({'A': [np.nan, np.nan, 1, 2, np.nan],
                                 'B': [0, np.nan, np.nan, 2, np.nan]})

        res = df.isna()
        exp = pd.SparseDataFrame({'A': [True, True, False, False, True],
                                  'B': [False, True, True, False, True]},
                                 default_fill_value=True)
        exp._default_fill_value = np.nan
        tm.assert_sp_frame_equal(res, exp)

        # if fill_value is not nan, True can be included in sp_values
        df = pd.SparseDataFrame({'A': [0, 0, 1, 2, np.nan],
                                 'B': [0, np.nan, 0, 2, np.nan]},
                                default_fill_value=0.)
        res = df.isna()
        assert isinstance(res, pd.SparseDataFrame)
        exp = pd.DataFrame({'A': [False, False, False, False, True],
                            'B': [False, True, False, False, True]})
        tm.assert_frame_equal(res.to_dense(), exp)

Source File: test_series.py From vnpy_crypto with MIT License

6 votes

def test_to_frame(self):
        # GH 9850
        s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x')
        exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_sp_frame_equal(s.to_frame(), exp)

        exp = pd.SparseDataFrame({'y': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_sp_frame_equal(s.to_frame(name='y'), exp)

        s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x', fill_value=0)
        exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]},
                                 default_fill_value=0)

        tm.assert_sp_frame_equal(s.to_frame(), exp)
        exp = pd.DataFrame({'y': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_frame_equal(s.to_frame(name='y').to_dense(), exp)

Source File: test_combine_concat.py From vnpy_crypto with MIT License

6 votes

def test_concat_sparse_dense_rows(self, fill_value, sparse_idx, dense_idx):
        frames = [self.dense1, self.dense2]
        sparse_frame = [frames[dense_idx],
                        frames[sparse_idx].to_sparse(fill_value=fill_value)]
        dense_frame = [frames[dense_idx], frames[sparse_idx]]

        # This will try both directions sparse + dense and dense + sparse
        for _ in range(2):
            res = pd.concat(sparse_frame)
            exp = pd.concat(dense_frame)

            assert isinstance(res, pd.SparseDataFrame)
            tm.assert_frame_equal(res.to_dense(), exp)

            sparse_frame = sparse_frame[::-1]
            dense_frame = dense_frame[::-1]

Source File: test_frame.py From recruit with Apache License 2.0

6 votes

def test_constructor_ndarray(self, float_frame):
        # no index or columns
        sp = SparseDataFrame(float_frame.values)

        # 1d
        sp = SparseDataFrame(float_frame['A'].values, index=float_frame.index,
                             columns=['A'])
        tm.assert_sp_frame_equal(sp, float_frame.reindex(columns=['A']))

        # raise on level argument
        pytest.raises(TypeError, float_frame.reindex, columns=['A'],
                      level=1)

        # wrong length index / columns
        with pytest.raises(ValueError, match="^Index length"):
            SparseDataFrame(float_frame.values, index=float_frame.index[:-1])

        with pytest.raises(ValueError, match="^Column length"):
            SparseDataFrame(float_frame.values,
                            columns=float_frame.columns[:-1])

    # GH 9272

Source File: test_combine_concat.py From recruit with Apache License 2.0

6 votes

def test_concat_sparse_dense_rows(self, fill_value, sparse_idx, dense_idx):
        frames = [self.dense1, self.dense2]
        sparse_frame = [frames[dense_idx],
                        frames[sparse_idx].to_sparse(fill_value=fill_value)]
        dense_frame = [frames[dense_idx], frames[sparse_idx]]

        # This will try both directions sparse + dense and dense + sparse
        for _ in range(2):
            res = pd.concat(sparse_frame)
            exp = pd.concat(dense_frame)

            assert isinstance(res, pd.SparseDataFrame)
            tm.assert_frame_equal(res.to_dense(), exp)

            sparse_frame = sparse_frame[::-1]
            dense_frame = dense_frame[::-1]

Source File: test_series.py From recruit with Apache License 2.0

6 votes

def test_to_frame(self):
        # GH 9850
        s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x')
        exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_sp_frame_equal(s.to_frame(), exp)

        exp = pd.SparseDataFrame({'y': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_sp_frame_equal(s.to_frame(name='y'), exp)

        s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x', fill_value=0)
        exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]},
                                 default_fill_value=0)

        tm.assert_sp_frame_equal(s.to_frame(), exp)
        exp = pd.DataFrame({'y': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_frame_equal(s.to_frame(name='y').to_dense(), exp)

Source File: test_frame.py From recruit with Apache License 2.0

6 votes

def test_constructor_from_series(self):

        # GH 2873
        x = Series(np.random.randn(10000), name='a')
        x = x.to_sparse(fill_value=0)
        assert isinstance(x, SparseSeries)
        df = SparseDataFrame(x)
        assert isinstance(df, SparseDataFrame)

        x = Series(np.random.randn(10000), name='a')
        y = Series(np.random.randn(10000), name='b')
        x2 = x.astype(float)
        x2.loc[:9998] = np.NaN
        # TODO: x_sparse is unused...fix
        x_sparse = x2.to_sparse(fill_value=np.NaN)  # noqa

        # Currently fails too with weird ufunc error
        # df1 = SparseDataFrame([x_sparse, y])

        y.loc[:9998] = 0
        # TODO: y_sparse is unsused...fix
        y_sparse = y.to_sparse(fill_value=0)  # noqa
        # without sparse value raises error
        # df2 = SparseDataFrame([x2_sparse, y])

Source File: test_to_from_scipy.py From recruit with Apache License 2.0

6 votes

def test_from_scipy_correct_ordering(spmatrix):
    # GH 16179
    arr = np.arange(1, 5).reshape(2, 2)
    try:
        spm = spmatrix(arr)
        assert spm.dtype == arr.dtype
    except (TypeError, AssertionError):
        # If conversion to sparse fails for this spmatrix type and arr.dtype,
        # then the combination is not currently supported in NumPy, so we
        # can just skip testing it thoroughly
        return

    sdf = SparseDataFrame(spm)
    expected = SparseDataFrame(arr)
    tm.assert_sp_frame_equal(sdf, expected)
    tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())

Source File: test_frame.py From recruit with Apache License 2.0

6 votes

def test_astype_bool(self):
        sparse = pd.SparseDataFrame({'A': SparseArray([0, 2, 0, 4],
                                                      fill_value=0,
                                                      dtype=np.int64),
                                     'B': SparseArray([0, 5, 0, 7],
                                                      fill_value=0,
                                                      dtype=np.int64)},
                                    default_fill_value=0)
        assert sparse['A'].dtype == SparseDtype(np.int64)
        assert sparse['B'].dtype == SparseDtype(np.int64)

        res = sparse.astype(SparseDtype(bool, False))
        exp = pd.SparseDataFrame({'A': SparseArray([False, True, False, True],
                                                   dtype=np.bool,
                                                   fill_value=False,
                                                   kind='integer'),
                                  'B': SparseArray([False, True, False, True],
                                                   dtype=np.bool,
                                                   fill_value=False,
                                                   kind='integer')},
                                 default_fill_value=False)
        tm.assert_sp_frame_equal(res, exp)
        assert res['A'].dtype == SparseDtype(np.bool)
        assert res['B'].dtype == SparseDtype(np.bool)

Source File: test_frame.py From recruit with Apache License 2.0

6 votes

def test_dense_to_sparse(self):
        df = DataFrame({'A': [nan, nan, nan, 1, 2],
                        'B': [1, 2, nan, nan, nan]})
        sdf = df.to_sparse()
        assert isinstance(sdf, SparseDataFrame)
        assert np.isnan(sdf.default_fill_value)
        assert isinstance(sdf['A'].sp_index, BlockIndex)
        tm.assert_frame_equal(sdf.to_dense(), df)

        sdf = df.to_sparse(kind='integer')
        assert isinstance(sdf['A'].sp_index, IntIndex)

        df = DataFrame({'A': [0, 0, 0, 1, 2],
                        'B': [1, 2, 0, 0, 0]}, dtype=float)
        sdf = df.to_sparse(fill_value=0)
        assert sdf.default_fill_value == 0
        tm.assert_frame_equal(sdf.to_dense(), df)

Source File: InferenceLightGBM.py From KDDCup2019_admin with MIT License

6 votes

def get_node_id_feature_sparse(self,X):


        pool = ThreadPool(40)
        #results = map(self.get_feaure, np.array(X.values))
        results = pool.map(self.get_feaure, np.array(X.values))

        results = list(results)
        #print(results)
        #results = np.array(results)
        #print(results)
        results = pd.DataFrame(results)

        print(results.columns)
        print("-------------")
        results = pd.SparseDataFrame(pd.get_dummies(results)).astype("float")



        print(results)

        # columns = results.columns
        # results = scipy.sparse.csr_matrix(results)
        print(results.columns)
        return results

Source File: conftest.py From recruit with Apache License 2.0

5 votes

def float_frame():
    """
    Fixture for sparse DataFrame of floats with DatetimeIndex

    Columns are ['A', 'B', 'C', 'D']; some entries are missing
    """
    # default_kind='block' is the default
    return SparseDataFrame(data, index=dates, default_kind='block')

Source File: test_series.py From vnpy_crypto with MIT License

5 votes

def test_concat_axis1(self):
        val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
        val2 = np.array([3, np.nan, 4, 0, 0])

        sparse1 = pd.SparseSeries(val1, name='x')
        sparse2 = pd.SparseSeries(val2, name='y')

        res = pd.concat([sparse1, sparse2], axis=1)
        exp = pd.concat([pd.Series(val1, name='x'),
                         pd.Series(val2, name='y')], axis=1)
        exp = pd.SparseDataFrame(exp)
        tm.assert_sp_frame_equal(res, exp)

Source File: conftest.py From recruit with Apache License 2.0

5 votes

def float_frame_int_kind():
    """
    Fixture for sparse DataFrame of floats with DatetimeIndex

    Columns are ['A', 'B', 'C', 'D'] and default_kind='integer'.
    Some entries are missing.
    """
    return SparseDataFrame(data, index=dates, default_kind='integer')

Source File: conftest.py From recruit with Apache License 2.0

5 votes

def float_string_frame():
    """
    Fixture for sparse DataFrame of floats and strings with DatetimeIndex

    Columns are ['A', 'B', 'C', 'D', 'foo']; some entries are missing
    """
    sdf = SparseDataFrame(data, index=dates)
    sdf['foo'] = SparseArray(['bar'] * len(dates))
    return sdf

Source File: test_reshape.py From vnpy_crypto with MIT License

5 votes

def sparse_df():
    return pd.SparseDataFrame({0: {0: 1}, 1: {1: 1}, 2: {2: 1}})  # eye

Source File: test_combine_concat.py From vnpy_crypto with MIT License

5 votes

def test_concat_axis1(self):
        val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
        val2 = np.array([3, np.nan, 4, 0, 0])

        sparse1 = pd.SparseSeries(val1, name='x')
        sparse2 = pd.SparseSeries(val2, name='y')

        res = pd.concat([sparse1, sparse2], axis=1)
        exp = pd.concat([pd.Series(val1, name='x'),
                         pd.Series(val2, name='y')], axis=1)
        exp = pd.SparseDataFrame(exp)
        tm.assert_sp_frame_equal(res, exp)

Source File: test_format.py From vnpy_crypto with MIT License

5 votes

def test_sparse_repr_after_set(self):
        # GH 15488
        sdf = pd.SparseDataFrame([[np.nan, 1], [2, np.nan]])
        res = sdf.copy()

        # Ignore the warning
        with pd.option_context('mode.chained_assignment', None):
            sdf[0][1] = 2  # This line triggers the bug

        repr(sdf)
        tm.assert_sp_frame_equal(sdf, res)

Source File: conftest.py From recruit with Apache License 2.0

5 votes

def empty_frame():
    """
    Fixture for empty SparseDataFrame
    """
    return SparseDataFrame()

Source File: test_apply.py From recruit with Apache License 2.0

5 votes

def test_apply_keep_sparse_dtype():
    # GH 23744
    sdf = SparseDataFrame(np.array([[0, 1, 0], [0, 0, 0], [0, 0, 1]]),
                          columns=['b', 'a', 'c'], default_fill_value=1)
    df = DataFrame(sdf)

    expected = sdf.apply(np.exp)
    result = df.apply(np.exp)
    tm.assert_frame_equal(expected, result)

Source File: test_apply.py From recruit with Apache License 2.0

5 votes

def test_applymap(frame):
    # just test that it works
    result = frame.applymap(lambda x: x * 2)
    assert isinstance(result, SparseDataFrame)

Source File: test_apply.py From recruit with Apache License 2.0

5 votes

def fill_frame(frame):
    values = frame.values.copy()
    values[np.isnan(values)] = 2

    return SparseDataFrame(values, columns=['A', 'B', 'C', 'D'],
                           default_fill_value=2,
                           index=frame.index)

Source File: test_apply.py From recruit with Apache License 2.0

5 votes

def frame(dates):
    data = {'A': [np.nan, np.nan, np.nan, 0, 1, 2, 3, 4, 5, 6],
            'B': [0, 1, 2, np.nan, np.nan, np.nan, 3, 4, 5, 6],
            'C': np.arange(10, dtype=np.float64),
            'D': [0, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan]}

    return SparseDataFrame(data, index=dates)

Source File: test_apply.py From recruit with Apache License 2.0

5 votes

def empty():
    return SparseDataFrame()

Source File: test_frame.py From recruit with Apache License 2.0

5 votes

def test_dropna(self, inplace, how):
        # Tests regression #21172.
        expected = pd.SparseDataFrame({"F2": [0, 1]})
        input_df = pd.SparseDataFrame(
            {"F1": [float('nan'), float('nan')], "F2": [0, 1]}
        )
        result_df = input_df.dropna(axis=1, inplace=inplace, how=how)
        if inplace:
            result_df = input_df
        tm.assert_sp_frame_equal(expected, result_df)

Source File: test_frame.py From recruit with Apache License 2.0

5 votes

def test_quantile_multi(self):
        # GH 17386
        data = [[1, 1], [2, 10], [3, 100], [nan, nan]]
        q = [0.1, 0.5]

        sparse_df = SparseDataFrame(data)
        result = sparse_df.quantile(q)

        dense_df = DataFrame(data)
        dense_expected = dense_df.quantile(q)
        sparse_expected = SparseDataFrame(dense_expected)

        tm.assert_frame_equal(result, dense_expected)
        tm.assert_sp_frame_equal(result, sparse_expected)

Source File: test_frame.py From recruit with Apache License 2.0

5 votes

def test_numpy_cumsum(self, float_frame):
        result = np.cumsum(float_frame)
        expected = SparseDataFrame(float_frame.to_dense().cumsum())
        tm.assert_sp_frame_equal(result, expected)

        msg = "the 'dtype' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.cumsum(float_frame, dtype=np.int64)

        msg = "the 'out' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.cumsum(float_frame, out=result)

Source File: test_frame.py From recruit with Apache License 2.0

5 votes

def test_cumsum(self, float_frame):
        expected = SparseDataFrame(float_frame.to_dense().cumsum())

        result = float_frame.cumsum()
        tm.assert_sp_frame_equal(result, expected)

        result = float_frame.cumsum(axis=None)
        tm.assert_sp_frame_equal(result, expected)

        result = float_frame.cumsum(axis=0)
        tm.assert_sp_frame_equal(result, expected)

Python pandas.SparseDataFrame() Examples