Python pandas.SparseDataFrame() Examples
The following are 30
code examples of pandas.SparseDataFrame().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: test_frame.py From recruit with Apache License 2.0 | 6 votes |
def test_comparison_op_scalar(self): # GH 13001 df = pd.DataFrame({'A': [nan, nan, 0, 1, ], 'B': [0, 1, 2, nan], 'C': [1., 2., 3., 4.], 'D': [nan, nan, nan, nan]}) sparse = df.to_sparse() # comparison changes internal repr, compare with dense res = sparse > 1 assert isinstance(res, pd.SparseDataFrame) tm.assert_frame_equal(res.to_dense(), df > 1) res = sparse != 0 assert isinstance(res, pd.SparseDataFrame) tm.assert_frame_equal(res.to_dense(), df != 0)
Example #2
Source File: test_frame.py From recruit with Apache License 2.0 | 6 votes |
def test_notna(self): # GH 8276 df = pd.SparseDataFrame({'A': [np.nan, np.nan, 1, 2, np.nan], 'B': [0, np.nan, np.nan, 2, np.nan]}) res = df.notna() exp = pd.SparseDataFrame({'A': [False, False, True, True, False], 'B': [True, False, False, True, False]}, default_fill_value=False) exp._default_fill_value = np.nan tm.assert_sp_frame_equal(res, exp) # if fill_value is not nan, True can be included in sp_values df = pd.SparseDataFrame({'A': [0, 0, 1, 2, np.nan], 'B': [0, np.nan, 0, 2, np.nan]}, default_fill_value=0.) res = df.notna() assert isinstance(res, pd.SparseDataFrame) exp = pd.DataFrame({'A': [True, True, True, True, False], 'B': [True, False, True, True, False]}) tm.assert_frame_equal(res.to_dense(), exp)
Example #3
Source File: test_frame.py From recruit with Apache License 2.0 | 6 votes |
def test_isna(self): # GH 8276 df = pd.SparseDataFrame({'A': [np.nan, np.nan, 1, 2, np.nan], 'B': [0, np.nan, np.nan, 2, np.nan]}) res = df.isna() exp = pd.SparseDataFrame({'A': [True, True, False, False, True], 'B': [False, True, True, False, True]}, default_fill_value=True) exp._default_fill_value = np.nan tm.assert_sp_frame_equal(res, exp) # if fill_value is not nan, True can be included in sp_values df = pd.SparseDataFrame({'A': [0, 0, 1, 2, np.nan], 'B': [0, np.nan, 0, 2, np.nan]}, default_fill_value=0.) res = df.isna() assert isinstance(res, pd.SparseDataFrame) exp = pd.DataFrame({'A': [False, False, False, False, True], 'B': [False, True, False, False, True]}) tm.assert_frame_equal(res.to_dense(), exp)
Example #4
Source File: test_series.py From vnpy_crypto with MIT License | 6 votes |
def test_to_frame(self): # GH 9850 s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x') exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]}) tm.assert_sp_frame_equal(s.to_frame(), exp) exp = pd.SparseDataFrame({'y': [1, 2, 0, nan, 4, nan, 0]}) tm.assert_sp_frame_equal(s.to_frame(name='y'), exp) s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x', fill_value=0) exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]}, default_fill_value=0) tm.assert_sp_frame_equal(s.to_frame(), exp) exp = pd.DataFrame({'y': [1, 2, 0, nan, 4, nan, 0]}) tm.assert_frame_equal(s.to_frame(name='y').to_dense(), exp)
Example #5
Source File: test_combine_concat.py From vnpy_crypto with MIT License | 6 votes |
def test_concat_sparse_dense_rows(self, fill_value, sparse_idx, dense_idx): frames = [self.dense1, self.dense2] sparse_frame = [frames[dense_idx], frames[sparse_idx].to_sparse(fill_value=fill_value)] dense_frame = [frames[dense_idx], frames[sparse_idx]] # This will try both directions sparse + dense and dense + sparse for _ in range(2): res = pd.concat(sparse_frame) exp = pd.concat(dense_frame) assert isinstance(res, pd.SparseDataFrame) tm.assert_frame_equal(res.to_dense(), exp) sparse_frame = sparse_frame[::-1] dense_frame = dense_frame[::-1]
Example #6
Source File: test_frame.py From recruit with Apache License 2.0 | 6 votes |
def test_constructor_ndarray(self, float_frame): # no index or columns sp = SparseDataFrame(float_frame.values) # 1d sp = SparseDataFrame(float_frame['A'].values, index=float_frame.index, columns=['A']) tm.assert_sp_frame_equal(sp, float_frame.reindex(columns=['A'])) # raise on level argument pytest.raises(TypeError, float_frame.reindex, columns=['A'], level=1) # wrong length index / columns with pytest.raises(ValueError, match="^Index length"): SparseDataFrame(float_frame.values, index=float_frame.index[:-1]) with pytest.raises(ValueError, match="^Column length"): SparseDataFrame(float_frame.values, columns=float_frame.columns[:-1]) # GH 9272
Example #7
Source File: test_combine_concat.py From recruit with Apache License 2.0 | 6 votes |
def test_concat_sparse_dense_rows(self, fill_value, sparse_idx, dense_idx): frames = [self.dense1, self.dense2] sparse_frame = [frames[dense_idx], frames[sparse_idx].to_sparse(fill_value=fill_value)] dense_frame = [frames[dense_idx], frames[sparse_idx]] # This will try both directions sparse + dense and dense + sparse for _ in range(2): res = pd.concat(sparse_frame) exp = pd.concat(dense_frame) assert isinstance(res, pd.SparseDataFrame) tm.assert_frame_equal(res.to_dense(), exp) sparse_frame = sparse_frame[::-1] dense_frame = dense_frame[::-1]
Example #8
Source File: test_series.py From recruit with Apache License 2.0 | 6 votes |
def test_to_frame(self): # GH 9850 s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x') exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]}) tm.assert_sp_frame_equal(s.to_frame(), exp) exp = pd.SparseDataFrame({'y': [1, 2, 0, nan, 4, nan, 0]}) tm.assert_sp_frame_equal(s.to_frame(name='y'), exp) s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x', fill_value=0) exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]}, default_fill_value=0) tm.assert_sp_frame_equal(s.to_frame(), exp) exp = pd.DataFrame({'y': [1, 2, 0, nan, 4, nan, 0]}) tm.assert_frame_equal(s.to_frame(name='y').to_dense(), exp)
Example #9
Source File: test_frame.py From recruit with Apache License 2.0 | 6 votes |
def test_constructor_from_series(self): # GH 2873 x = Series(np.random.randn(10000), name='a') x = x.to_sparse(fill_value=0) assert isinstance(x, SparseSeries) df = SparseDataFrame(x) assert isinstance(df, SparseDataFrame) x = Series(np.random.randn(10000), name='a') y = Series(np.random.randn(10000), name='b') x2 = x.astype(float) x2.loc[:9998] = np.NaN # TODO: x_sparse is unused...fix x_sparse = x2.to_sparse(fill_value=np.NaN) # noqa # Currently fails too with weird ufunc error # df1 = SparseDataFrame([x_sparse, y]) y.loc[:9998] = 0 # TODO: y_sparse is unsused...fix y_sparse = y.to_sparse(fill_value=0) # noqa # without sparse value raises error # df2 = SparseDataFrame([x2_sparse, y])
Example #10
Source File: test_to_from_scipy.py From recruit with Apache License 2.0 | 6 votes |
def test_from_scipy_correct_ordering(spmatrix): # GH 16179 arr = np.arange(1, 5).reshape(2, 2) try: spm = spmatrix(arr) assert spm.dtype == arr.dtype except (TypeError, AssertionError): # If conversion to sparse fails for this spmatrix type and arr.dtype, # then the combination is not currently supported in NumPy, so we # can just skip testing it thoroughly return sdf = SparseDataFrame(spm) expected = SparseDataFrame(arr) tm.assert_sp_frame_equal(sdf, expected) tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())
Example #11
Source File: test_frame.py From recruit with Apache License 2.0 | 6 votes |
def test_astype_bool(self): sparse = pd.SparseDataFrame({'A': SparseArray([0, 2, 0, 4], fill_value=0, dtype=np.int64), 'B': SparseArray([0, 5, 0, 7], fill_value=0, dtype=np.int64)}, default_fill_value=0) assert sparse['A'].dtype == SparseDtype(np.int64) assert sparse['B'].dtype == SparseDtype(np.int64) res = sparse.astype(SparseDtype(bool, False)) exp = pd.SparseDataFrame({'A': SparseArray([False, True, False, True], dtype=np.bool, fill_value=False, kind='integer'), 'B': SparseArray([False, True, False, True], dtype=np.bool, fill_value=False, kind='integer')}, default_fill_value=False) tm.assert_sp_frame_equal(res, exp) assert res['A'].dtype == SparseDtype(np.bool) assert res['B'].dtype == SparseDtype(np.bool)
Example #12
Source File: test_frame.py From recruit with Apache License 2.0 | 6 votes |
def test_dense_to_sparse(self): df = DataFrame({'A': [nan, nan, nan, 1, 2], 'B': [1, 2, nan, nan, nan]}) sdf = df.to_sparse() assert isinstance(sdf, SparseDataFrame) assert np.isnan(sdf.default_fill_value) assert isinstance(sdf['A'].sp_index, BlockIndex) tm.assert_frame_equal(sdf.to_dense(), df) sdf = df.to_sparse(kind='integer') assert isinstance(sdf['A'].sp_index, IntIndex) df = DataFrame({'A': [0, 0, 0, 1, 2], 'B': [1, 2, 0, 0, 0]}, dtype=float) sdf = df.to_sparse(fill_value=0) assert sdf.default_fill_value == 0 tm.assert_frame_equal(sdf.to_dense(), df)
Example #13
Source File: InferenceLightGBM.py From KDDCup2019_admin with MIT License | 6 votes |
def get_node_id_feature_sparse(self,X): pool = ThreadPool(40) #results = map(self.get_feaure, np.array(X.values)) results = pool.map(self.get_feaure, np.array(X.values)) results = list(results) #print(results) #results = np.array(results) #print(results) results = pd.DataFrame(results) print(results.columns) print("-------------") results = pd.SparseDataFrame(pd.get_dummies(results)).astype("float") print(results) # columns = results.columns # results = scipy.sparse.csr_matrix(results) print(results.columns) return results
Example #14
Source File: conftest.py From recruit with Apache License 2.0 | 5 votes |
def float_frame(): """ Fixture for sparse DataFrame of floats with DatetimeIndex Columns are ['A', 'B', 'C', 'D']; some entries are missing """ # default_kind='block' is the default return SparseDataFrame(data, index=dates, default_kind='block')
Example #15
Source File: test_series.py From vnpy_crypto with MIT License | 5 votes |
def test_concat_axis1(self): val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan]) val2 = np.array([3, np.nan, 4, 0, 0]) sparse1 = pd.SparseSeries(val1, name='x') sparse2 = pd.SparseSeries(val2, name='y') res = pd.concat([sparse1, sparse2], axis=1) exp = pd.concat([pd.Series(val1, name='x'), pd.Series(val2, name='y')], axis=1) exp = pd.SparseDataFrame(exp) tm.assert_sp_frame_equal(res, exp)
Example #16
Source File: conftest.py From recruit with Apache License 2.0 | 5 votes |
def float_frame_int_kind(): """ Fixture for sparse DataFrame of floats with DatetimeIndex Columns are ['A', 'B', 'C', 'D'] and default_kind='integer'. Some entries are missing. """ return SparseDataFrame(data, index=dates, default_kind='integer')
Example #17
Source File: conftest.py From recruit with Apache License 2.0 | 5 votes |
def float_string_frame(): """ Fixture for sparse DataFrame of floats and strings with DatetimeIndex Columns are ['A', 'B', 'C', 'D', 'foo']; some entries are missing """ sdf = SparseDataFrame(data, index=dates) sdf['foo'] = SparseArray(['bar'] * len(dates)) return sdf
Example #18
Source File: test_reshape.py From vnpy_crypto with MIT License | 5 votes |
def sparse_df(): return pd.SparseDataFrame({0: {0: 1}, 1: {1: 1}, 2: {2: 1}}) # eye
Example #19
Source File: test_combine_concat.py From vnpy_crypto with MIT License | 5 votes |
def test_concat_axis1(self): val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan]) val2 = np.array([3, np.nan, 4, 0, 0]) sparse1 = pd.SparseSeries(val1, name='x') sparse2 = pd.SparseSeries(val2, name='y') res = pd.concat([sparse1, sparse2], axis=1) exp = pd.concat([pd.Series(val1, name='x'), pd.Series(val2, name='y')], axis=1) exp = pd.SparseDataFrame(exp) tm.assert_sp_frame_equal(res, exp)
Example #20
Source File: test_format.py From vnpy_crypto with MIT License | 5 votes |
def test_sparse_repr_after_set(self): # GH 15488 sdf = pd.SparseDataFrame([[np.nan, 1], [2, np.nan]]) res = sdf.copy() # Ignore the warning with pd.option_context('mode.chained_assignment', None): sdf[0][1] = 2 # This line triggers the bug repr(sdf) tm.assert_sp_frame_equal(sdf, res)
Example #21
Source File: conftest.py From recruit with Apache License 2.0 | 5 votes |
def empty_frame(): """ Fixture for empty SparseDataFrame """ return SparseDataFrame()
Example #22
Source File: test_apply.py From recruit with Apache License 2.0 | 5 votes |
def test_apply_keep_sparse_dtype(): # GH 23744 sdf = SparseDataFrame(np.array([[0, 1, 0], [0, 0, 0], [0, 0, 1]]), columns=['b', 'a', 'c'], default_fill_value=1) df = DataFrame(sdf) expected = sdf.apply(np.exp) result = df.apply(np.exp) tm.assert_frame_equal(expected, result)
Example #23
Source File: test_apply.py From recruit with Apache License 2.0 | 5 votes |
def test_applymap(frame): # just test that it works result = frame.applymap(lambda x: x * 2) assert isinstance(result, SparseDataFrame)
Example #24
Source File: test_apply.py From recruit with Apache License 2.0 | 5 votes |
def fill_frame(frame): values = frame.values.copy() values[np.isnan(values)] = 2 return SparseDataFrame(values, columns=['A', 'B', 'C', 'D'], default_fill_value=2, index=frame.index)
Example #25
Source File: test_apply.py From recruit with Apache License 2.0 | 5 votes |
def frame(dates): data = {'A': [np.nan, np.nan, np.nan, 0, 1, 2, 3, 4, 5, 6], 'B': [0, 1, 2, np.nan, np.nan, np.nan, 3, 4, 5, 6], 'C': np.arange(10, dtype=np.float64), 'D': [0, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan]} return SparseDataFrame(data, index=dates)
Example #26
Source File: test_apply.py From recruit with Apache License 2.0 | 5 votes |
def empty(): return SparseDataFrame()
Example #27
Source File: test_frame.py From recruit with Apache License 2.0 | 5 votes |
def test_dropna(self, inplace, how): # Tests regression #21172. expected = pd.SparseDataFrame({"F2": [0, 1]}) input_df = pd.SparseDataFrame( {"F1": [float('nan'), float('nan')], "F2": [0, 1]} ) result_df = input_df.dropna(axis=1, inplace=inplace, how=how) if inplace: result_df = input_df tm.assert_sp_frame_equal(expected, result_df)
Example #28
Source File: test_frame.py From recruit with Apache License 2.0 | 5 votes |
def test_quantile_multi(self): # GH 17386 data = [[1, 1], [2, 10], [3, 100], [nan, nan]] q = [0.1, 0.5] sparse_df = SparseDataFrame(data) result = sparse_df.quantile(q) dense_df = DataFrame(data) dense_expected = dense_df.quantile(q) sparse_expected = SparseDataFrame(dense_expected) tm.assert_frame_equal(result, dense_expected) tm.assert_sp_frame_equal(result, sparse_expected)
Example #29
Source File: test_frame.py From recruit with Apache License 2.0 | 5 votes |
def test_numpy_cumsum(self, float_frame): result = np.cumsum(float_frame) expected = SparseDataFrame(float_frame.to_dense().cumsum()) tm.assert_sp_frame_equal(result, expected) msg = "the 'dtype' parameter is not supported" with pytest.raises(ValueError, match=msg): np.cumsum(float_frame, dtype=np.int64) msg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=msg): np.cumsum(float_frame, out=result)
Example #30
Source File: test_frame.py From recruit with Apache License 2.0 | 5 votes |
def test_cumsum(self, float_frame): expected = SparseDataFrame(float_frame.to_dense().cumsum()) result = float_frame.cumsum() tm.assert_sp_frame_equal(result, expected) result = float_frame.cumsum(axis=None) tm.assert_sp_frame_equal(result, expected) result = float_frame.cumsum(axis=0) tm.assert_sp_frame_equal(result, expected)