Python pandas.SparseArray() Examples
The following are 30
code examples of pandas.SparseArray().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: test_array.py From recruit with Apache License 2.0 | 6 votes |
def test_constructor_float32(self): # GH 10648 data = np.array([1., np.nan, 3], dtype=np.float32) arr = SparseArray(data, dtype=np.float32) assert arr.dtype == SparseDtype(np.float32) tm.assert_numpy_array_equal(arr.sp_values, np.array([1, 3], dtype=np.float32)) # Behavior change: np.asarray densifies. # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr)) tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([0, 2], dtype=np.int32)) for dense in [arr.to_dense(), arr.values]: assert dense.dtype == np.float32 tm.assert_numpy_array_equal(dense, data)
Example #2
Source File: test_sparse.py From recruit with Apache License 2.0 | 6 votes |
def test_isna(self, data_missing): expected_dtype = SparseDtype(bool, pd.isna(data_missing.dtype.fill_value)) expected = SparseArray([True, False], dtype=expected_dtype) result = pd.isna(data_missing) self.assert_equal(result, expected) result = pd.Series(data_missing).isna() expected = pd.Series(expected) self.assert_series_equal(result, expected) # GH 21189 result = pd.Series(data_missing).drop([0, 1]).isna() expected = pd.Series([], dtype=expected_dtype) self.assert_series_equal(result, expected)
Example #3
Source File: test_sparse.py From recruit with Apache License 2.0 | 6 votes |
def test_combine_le(self, data_repeated): # We return a Series[SparseArray].__le__ returns a # Series[Sparse[bool]] # rather than Series[bool] orig_data1, orig_data2 = data_repeated(2) s1 = pd.Series(orig_data1) s2 = pd.Series(orig_data2) result = s1.combine(s2, lambda x1, x2: x1 <= x2) expected = pd.Series(pd.SparseArray([ a <= b for (a, b) in zip(list(orig_data1), list(orig_data2)) ], fill_value=False)) self.assert_series_equal(result, expected) val = s1.iloc[0] result = s1.combine(val, lambda x1, x2: x1 <= x2) expected = pd.Series(pd.SparseArray([ a <= val for a in list(orig_data1) ], fill_value=False)) self.assert_series_equal(result, expected)
Example #4
Source File: test_array.py From recruit with Apache License 2.0 | 6 votes |
def test_numpy_sum(self): data = np.arange(10).astype(float) out = np.sum(SparseArray(data)) assert out == 45.0 data[5] = np.nan out = np.sum(SparseArray(data, fill_value=2)) assert out == 40.0 out = np.sum(SparseArray(data, fill_value=np.nan)) assert out == 40.0 msg = "the 'dtype' parameter is not supported" with pytest.raises(ValueError, match=msg): np.sum(SparseArray(data), dtype=np.int64) msg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=msg): np.sum(SparseArray(data), out=out)
Example #5
Source File: test_sparse.py From recruit with Apache License 2.0 | 6 votes |
def _compare_other(self, s, data, op_name, other): op = self.get_op_from_name(op_name) # array result = pd.Series(op(data, other)) # hard to test the fill value, since we don't know what expected # is in general. # Rely on tests in `tests/sparse` to validate that. assert isinstance(result.dtype, SparseDtype) assert result.dtype.subtype == np.dtype('bool') with np.errstate(all='ignore'): expected = pd.Series( pd.SparseArray(op(np.asarray(data), np.asarray(other)), fill_value=result.values.fill_value) ) tm.assert_series_equal(result, expected) # series s = pd.Series(data) result = op(s, other) tm.assert_series_equal(result, expected)
Example #6
Source File: test_array.py From recruit with Apache License 2.0 | 6 votes |
def test_cumsum(self, data, expected, numpy): cumsum = np.cumsum if numpy else lambda s: s.cumsum() out = cumsum(SparseArray(data)) tm.assert_sp_array_equal(out, expected) out = cumsum(SparseArray(data, fill_value=np.nan)) tm.assert_sp_array_equal(out, expected) out = cumsum(SparseArray(data, fill_value=2)) tm.assert_sp_array_equal(out, expected) if numpy: # numpy compatibility checks. msg = "the 'dtype' parameter is not supported" with pytest.raises(ValueError, match=msg): np.cumsum(SparseArray(data), dtype=np.int64) msg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=msg): np.cumsum(SparseArray(data), out=out) else: axis = 1 # SparseArray currently 1-D, so only axis = 0 is valid. msg = "axis\\(={axis}\\) out of bounds".format(axis=axis) with pytest.raises(ValueError, match=msg): SparseArray(data).cumsum(axis=axis)
Example #7
Source File: test_indexing.py From recruit with Apache License 2.0 | 6 votes |
def test_getitem(self): orig = self.orig sparse = self.sparse assert sparse[0] == 1 assert np.isnan(sparse[1]) assert sparse[3] == 3 result = sparse[[1, 3, 4]] exp = orig[[1, 3, 4]].to_sparse() tm.assert_sp_series_equal(result, exp) # dense array result = sparse[orig % 2 == 1] exp = orig[orig % 2 == 1].to_sparse() tm.assert_sp_series_equal(result, exp) # sparse array (actuary it coerces to normal Series) result = sparse[sparse % 2 == 1] exp = orig[orig % 2 == 1].to_sparse() tm.assert_sp_series_equal(result, exp) # sparse array result = sparse[pd.SparseArray(sparse % 2 == 1, dtype=bool)] tm.assert_sp_series_equal(result, exp)
Example #8
Source File: test_indexing.py From recruit with Apache License 2.0 | 6 votes |
def test_loc_index(self): orig = pd.Series([1, np.nan, np.nan, 3, np.nan], index=list('ABCDE')) sparse = orig.to_sparse() assert sparse.loc['A'] == 1 assert np.isnan(sparse.loc['B']) result = sparse.loc[['A', 'C', 'D']] exp = orig.loc[['A', 'C', 'D']].to_sparse() tm.assert_sp_series_equal(result, exp) # dense array result = sparse.loc[orig % 2 == 1] exp = orig.loc[orig % 2 == 1].to_sparse() tm.assert_sp_series_equal(result, exp) # sparse array (actuary it coerces to normal Series) result = sparse.loc[sparse % 2 == 1] exp = orig.loc[orig % 2 == 1].to_sparse() tm.assert_sp_series_equal(result, exp) # sparse array result = sparse[pd.SparseArray(sparse % 2 == 1, dtype=bool)] tm.assert_sp_series_equal(result, exp)
Example #9
Source File: test_array.py From recruit with Apache License 2.0 | 6 votes |
def test_numpy_mean(self): data = np.arange(10).astype(float) out = np.mean(SparseArray(data)) assert out == 4.5 data[5] = np.nan out = np.mean(SparseArray(data)) assert out == 40.0 / 9 msg = "the 'dtype' parameter is not supported" with pytest.raises(ValueError, match=msg): np.mean(SparseArray(data), dtype=np.int64) msg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=msg): np.mean(SparseArray(data), out=out)
Example #10
Source File: test_array.py From recruit with Apache License 2.0 | 6 votes |
def test_numpy_all(self, data, pos, neg): # GH 17570 out = np.all(SparseArray(data)) assert out out = np.all(SparseArray(data, fill_value=pos)) assert out data[1] = neg out = np.all(SparseArray(data)) assert not out out = np.all(SparseArray(data, fill_value=pos)) assert not out # raises with a different message on py2. msg = "the \'out\' parameter is not supported" with pytest.raises(ValueError, match=msg): np.all(SparseArray(data), out=np.array([]))
Example #11
Source File: test_array.py From recruit with Apache License 2.0 | 6 votes |
def test_nonzero(self): # Tests regression #21172. sa = pd.SparseArray([ float('nan'), float('nan'), 1, 0, 0, 2, 0, 0, 0, 3, 0, 0 ]) expected = np.array([2, 5, 9], dtype=np.int32) result, = sa.nonzero() tm.assert_numpy_array_equal(expected, result) sa = pd.SparseArray([0, 0, 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) result, = sa.nonzero() tm.assert_numpy_array_equal(expected, result)
Example #12
Source File: test_array.py From recruit with Apache License 2.0 | 6 votes |
def test_getslice_tuple(self): dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0]) sparse = SparseArray(dense) res = sparse[4:, ] exp = SparseArray(dense[4:, ]) tm.assert_sp_array_equal(res, exp) sparse = SparseArray(dense, fill_value=0) res = sparse[4:, ] exp = SparseArray(dense[4:, ], fill_value=0) tm.assert_sp_array_equal(res, exp) with pytest.raises(IndexError): sparse[4:, :] with pytest.raises(IndexError): # check numpy compat dense[4:, :]
Example #13
Source File: test_array.py From recruit with Apache License 2.0 | 6 votes |
def test_getslice(self): result = self.arr[:-3] exp = SparseArray(self.arr.values[:-3]) tm.assert_sp_array_equal(result, exp) result = self.arr[-4:] exp = SparseArray(self.arr.values[-4:]) tm.assert_sp_array_equal(result, exp) # two corner cases from Series result = self.arr[-12:] exp = SparseArray(self.arr) tm.assert_sp_array_equal(result, exp) result = self.arr[:-12] exp = SparseArray(self.arr.values[:0]) tm.assert_sp_array_equal(result, exp)
Example #14
Source File: test_series.py From recruit with Apache License 2.0 | 6 votes |
def test_dropna(self): sp = SparseSeries([0, 0, 0, nan, nan, 5, 6], fill_value=0) sp_valid = sp.dropna() expected = sp.to_dense().dropna() expected = expected[expected != 0] exp_arr = pd.SparseArray(expected.values, fill_value=0, kind='block') tm.assert_sp_array_equal(sp_valid.values, exp_arr) tm.assert_index_equal(sp_valid.index, expected.index) assert len(sp_valid.sp_values) == 2 result = self.bseries.dropna() expected = self.bseries.to_dense().dropna() assert not isinstance(result, SparseSeries) tm.assert_series_equal(result, expected)
Example #15
Source File: test_array.py From recruit with Apache License 2.0 | 6 votes |
def test_take_filling_all_nan(self): sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan]) # XXX: did the default kind from take change? result = sparse.take(np.array([1, 0, -1])) expected = SparseArray([np.nan, np.nan, np.nan], kind='block') tm.assert_sp_array_equal(result, expected) result = sparse.take(np.array([1, 0, -1]), fill_value=True) expected = SparseArray([np.nan, np.nan, np.nan], kind='block') tm.assert_sp_array_equal(result, expected) with pytest.raises(IndexError): sparse.take(np.array([1, -6])) with pytest.raises(IndexError): sparse.take(np.array([1, 5])) with pytest.raises(IndexError): sparse.take(np.array([1, 5]), fill_value=True)
Example #16
Source File: test_frame.py From recruit with Apache License 2.0 | 6 votes |
def test_constructor_preserve_attr(self): # GH 13866 arr = pd.SparseArray([1, 0, 3, 0], dtype=np.int64, fill_value=0) assert arr.dtype == SparseDtype(np.int64) assert arr.fill_value == 0 df = pd.SparseDataFrame({'x': arr}) assert df['x'].dtype == SparseDtype(np.int64) assert df['x'].fill_value == 0 s = pd.SparseSeries(arr, name='x') assert s.dtype == SparseDtype(np.int64) assert s.fill_value == 0 df = pd.SparseDataFrame(s) assert df['x'].dtype == SparseDtype(np.int64) assert df['x'].fill_value == 0 df = pd.SparseDataFrame({'x': s}) assert df['x'].dtype == SparseDtype(np.int64) assert df['x'].fill_value == 0
Example #17
Source File: test_frame.py From recruit with Apache License 2.0 | 6 votes |
def test_astype_bool(self): sparse = pd.SparseDataFrame({'A': SparseArray([0, 2, 0, 4], fill_value=0, dtype=np.int64), 'B': SparseArray([0, 5, 0, 7], fill_value=0, dtype=np.int64)}, default_fill_value=0) assert sparse['A'].dtype == SparseDtype(np.int64) assert sparse['B'].dtype == SparseDtype(np.int64) res = sparse.astype(SparseDtype(bool, False)) exp = pd.SparseDataFrame({'A': SparseArray([False, True, False, True], dtype=np.bool, fill_value=False, kind='integer'), 'B': SparseArray([False, True, False, True], dtype=np.bool, fill_value=False, kind='integer')}, default_fill_value=False) tm.assert_sp_frame_equal(res, exp) assert res['A'].dtype == SparseDtype(np.bool) assert res['B'].dtype == SparseDtype(np.bool)
Example #18
Source File: test_array.py From recruit with Apache License 2.0 | 6 votes |
def test_astype(self): # float -> float arr = SparseArray([None, None, 0, 2]) result = arr.astype("Sparse[float32]") expected = SparseArray([None, None, 0, 2], dtype=np.dtype('float32')) tm.assert_sp_array_equal(result, expected) dtype = SparseDtype("float64", fill_value=0) result = arr.astype(dtype) expected = SparseArray._simple_new(np.array([0., 2.], dtype=dtype.subtype), IntIndex(4, [2, 3]), dtype) tm.assert_sp_array_equal(result, expected) dtype = SparseDtype("int64", 0) result = arr.astype(dtype) expected = SparseArray._simple_new(np.array([0, 2], dtype=np.int64), IntIndex(4, [2, 3]), dtype) tm.assert_sp_array_equal(result, expected) arr = SparseArray([0, np.nan, 0, 1], fill_value=0) with pytest.raises(ValueError, match='NA'): arr.astype('Sparse[i8]')
Example #19
Source File: test_array.py From recruit with Apache License 2.0 | 5 votes |
def test_take_filling_fill_value(self): # same tests as GH 12631 sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0) result = sparse.take(np.array([1, 0, -1])) expected = SparseArray([0, np.nan, 4], fill_value=0) tm.assert_sp_array_equal(result, expected) # fill_value result = sparse.take(np.array([1, 0, -1]), allow_fill=True) # XXX: behavior change. # the old way of filling self.fill_value doesn't follow EA rules. # It's supposed to be self.dtype.na_value (nan in this case) expected = SparseArray([0, np.nan, np.nan], fill_value=0) tm.assert_sp_array_equal(result, expected) # allow_fill=False result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = SparseArray([0, np.nan, 4], fill_value=0) tm.assert_sp_array_equal(result, expected) msg = ("Invalid value in 'indices'.") with pytest.raises(ValueError, match=msg): sparse.take(np.array([1, 0, -2]), allow_fill=True) with pytest.raises(ValueError, match=msg): sparse.take(np.array([1, 0, -5]), allow_fill=True) with pytest.raises(IndexError): sparse.take(np.array([1, -6])) with pytest.raises(IndexError): sparse.take(np.array([1, 5])) with pytest.raises(IndexError): sparse.take(np.array([1, 5]), fill_value=True)
Example #20
Source File: test_array.py From recruit with Apache License 2.0 | 5 votes |
def test_sparse_series_round_trip(self, kind, fill): # see gh-13999 arr = SparseArray([np.nan, 1, np.nan, 2, 3], kind=kind, fill_value=fill) res = SparseArray(SparseSeries(arr)) tm.assert_sp_array_equal(arr, res) arr = SparseArray([0, 0, 0, 1, 1, 2], dtype=np.int64, kind=kind, fill_value=fill) res = SparseArray(SparseSeries(arr), dtype=np.int64) tm.assert_sp_array_equal(arr, res) res = SparseArray(SparseSeries(arr)) tm.assert_sp_array_equal(arr, res)
Example #21
Source File: test_array.py From recruit with Apache License 2.0 | 5 votes |
def test_take_fill_value(self): data = np.array([1, np.nan, 0, 3, 0]) sparse = SparseArray(data, fill_value=0) exp = SparseArray(np.take(data, [0]), fill_value=0) tm.assert_sp_array_equal(sparse.take([0]), exp) exp = SparseArray(np.take(data, [1, 3, 4]), fill_value=0) tm.assert_sp_array_equal(sparse.take([1, 3, 4]), exp)
Example #22
Source File: test_array.py From recruit with Apache License 2.0 | 5 votes |
def test_take(self): exp = SparseArray(np.take(self.arr_data, [2, 3])) tm.assert_sp_array_equal(self.arr.take([2, 3]), exp) exp = SparseArray(np.take(self.arr_data, [0, 1, 2])) tm.assert_sp_array_equal(self.arr.take([0, 1, 2]), exp)
Example #23
Source File: test_array.py From recruit with Apache License 2.0 | 5 votes |
def test_constructor_from_too_large_array(self): with pytest.raises(TypeError, match="expected dimension <= 1 data"): SparseArray(np.arange(10).reshape((2, 5)))
Example #24
Source File: test_array.py From recruit with Apache License 2.0 | 5 votes |
def test_sparse_series_round_trip2(self, kind, fill): # see gh-13999 arr = SparseArray([True, False, True, True], dtype=np.bool, kind=kind, fill_value=fill) res = SparseArray(SparseSeries(arr)) tm.assert_sp_array_equal(arr, res) res = SparseArray(SparseSeries(arr)) tm.assert_sp_array_equal(arr, res)
Example #25
Source File: test_array.py From recruit with Apache License 2.0 | 5 votes |
def test_take_negative(self): exp = SparseArray(np.take(self.arr_data, [-1])) tm.assert_sp_array_equal(self.arr.take([-1]), exp) exp = SparseArray(np.take(self.arr_data, [-4, -3, -2])) tm.assert_sp_array_equal(self.arr.take([-4, -3, -2]), exp)
Example #26
Source File: test_array.py From recruit with Apache License 2.0 | 5 votes |
def test_constructor_from_sparse(self): res = SparseArray(self.zarr) assert res.fill_value == 0 assert_almost_equal(res.sp_values, self.zarr.sp_values)
Example #27
Source File: test_array.py From recruit with Apache License 2.0 | 5 votes |
def test_constructor_bool(self): # GH 10648 data = np.array([False, False, True, True, False, False]) arr = SparseArray(data, fill_value=False, dtype=bool) assert arr.dtype == SparseDtype(bool) tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True])) # Behavior change: np.asarray densifies. # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr)) tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([2, 3], np.int32)) for dense in [arr.to_dense(), arr.values]: assert dense.dtype == bool tm.assert_numpy_array_equal(dense, data)
Example #28
Source File: test_array.py From recruit with Apache License 2.0 | 5 votes |
def test_constructor_bool_fill_value(self): arr = SparseArray([True, False, True], dtype=None) assert arr.dtype == SparseDtype(np.bool) assert not arr.fill_value arr = SparseArray([True, False, True], dtype=np.bool) assert arr.dtype == SparseDtype(np.bool) assert not arr.fill_value arr = SparseArray([True, False, True], dtype=np.bool, fill_value=True) assert arr.dtype == SparseDtype(np.bool, True) assert arr.fill_value
Example #29
Source File: test_array.py From recruit with Apache License 2.0 | 5 votes |
def test_constructor_na_dtype(self, dtype): with pytest.raises(ValueError, match="Cannot convert"): SparseArray([0, 1, np.nan], dtype=dtype)
Example #30
Source File: test_array.py From recruit with Apache License 2.0 | 5 votes |
def test_constructor_inferred_fill_value(self, data, fill_value): result = SparseArray(data).fill_value if pd.isna(fill_value): assert pd.isna(result) else: assert result == fill_value