Python Examples of pandas.SparseArray

Source File: test_array.py From recruit with Apache License 2.0

6 votes

def test_constructor_float32(self):
        # GH 10648
        data = np.array([1., np.nan, 3], dtype=np.float32)
        arr = SparseArray(data, dtype=np.float32)

        assert arr.dtype == SparseDtype(np.float32)
        tm.assert_numpy_array_equal(arr.sp_values,
                                    np.array([1, 3], dtype=np.float32))
        # Behavior change: np.asarray densifies.
        # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
        tm.assert_numpy_array_equal(arr.sp_index.indices,
                                    np.array([0, 2], dtype=np.int32))

        for dense in [arr.to_dense(), arr.values]:
            assert dense.dtype == np.float32
            tm.assert_numpy_array_equal(dense, data)

Source File: test_sparse.py From recruit with Apache License 2.0

6 votes

def test_isna(self, data_missing):
        expected_dtype = SparseDtype(bool,
                                     pd.isna(data_missing.dtype.fill_value))
        expected = SparseArray([True, False], dtype=expected_dtype)

        result = pd.isna(data_missing)
        self.assert_equal(result, expected)

        result = pd.Series(data_missing).isna()
        expected = pd.Series(expected)
        self.assert_series_equal(result, expected)

        # GH 21189
        result = pd.Series(data_missing).drop([0, 1]).isna()
        expected = pd.Series([], dtype=expected_dtype)
        self.assert_series_equal(result, expected)

Source File: test_sparse.py From recruit with Apache License 2.0

6 votes

def test_combine_le(self, data_repeated):
        # We return a Series[SparseArray].__le__ returns a
        # Series[Sparse[bool]]
        # rather than Series[bool]
        orig_data1, orig_data2 = data_repeated(2)
        s1 = pd.Series(orig_data1)
        s2 = pd.Series(orig_data2)
        result = s1.combine(s2, lambda x1, x2: x1 <= x2)
        expected = pd.Series(pd.SparseArray([
            a <= b for (a, b) in
            zip(list(orig_data1), list(orig_data2))
        ], fill_value=False))
        self.assert_series_equal(result, expected)

        val = s1.iloc[0]
        result = s1.combine(val, lambda x1, x2: x1 <= x2)
        expected = pd.Series(pd.SparseArray([
            a <= val for a in list(orig_data1)
        ], fill_value=False))
        self.assert_series_equal(result, expected)

Source File: test_array.py From recruit with Apache License 2.0

6 votes

def test_numpy_sum(self):
        data = np.arange(10).astype(float)
        out = np.sum(SparseArray(data))
        assert out == 45.0

        data[5] = np.nan
        out = np.sum(SparseArray(data, fill_value=2))
        assert out == 40.0

        out = np.sum(SparseArray(data, fill_value=np.nan))
        assert out == 40.0

        msg = "the 'dtype' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.sum(SparseArray(data), dtype=np.int64)

        msg = "the 'out' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.sum(SparseArray(data), out=out)

Source File: test_sparse.py From recruit with Apache License 2.0

6 votes

def _compare_other(self, s, data, op_name, other):
        op = self.get_op_from_name(op_name)

        # array
        result = pd.Series(op(data, other))
        # hard to test the fill value, since we don't know what expected
        # is in general.
        # Rely on tests in `tests/sparse` to validate that.
        assert isinstance(result.dtype, SparseDtype)
        assert result.dtype.subtype == np.dtype('bool')

        with np.errstate(all='ignore'):
            expected = pd.Series(
                pd.SparseArray(op(np.asarray(data), np.asarray(other)),
                               fill_value=result.values.fill_value)
            )

        tm.assert_series_equal(result, expected)

        # series
        s = pd.Series(data)
        result = op(s, other)
        tm.assert_series_equal(result, expected)

Source File: test_array.py From recruit with Apache License 2.0

6 votes

def test_cumsum(self, data, expected, numpy):
        cumsum = np.cumsum if numpy else lambda s: s.cumsum()

        out = cumsum(SparseArray(data))
        tm.assert_sp_array_equal(out, expected)

        out = cumsum(SparseArray(data, fill_value=np.nan))
        tm.assert_sp_array_equal(out, expected)

        out = cumsum(SparseArray(data, fill_value=2))
        tm.assert_sp_array_equal(out, expected)

        if numpy:  # numpy compatibility checks.
            msg = "the 'dtype' parameter is not supported"
            with pytest.raises(ValueError, match=msg):
                np.cumsum(SparseArray(data), dtype=np.int64)

            msg = "the 'out' parameter is not supported"
            with pytest.raises(ValueError, match=msg):
                np.cumsum(SparseArray(data), out=out)
        else:
            axis = 1  # SparseArray currently 1-D, so only axis = 0 is valid.
            msg = "axis\\(={axis}\\) out of bounds".format(axis=axis)
            with pytest.raises(ValueError, match=msg):
                SparseArray(data).cumsum(axis=axis)

Source File: test_indexing.py From recruit with Apache License 2.0

6 votes

def test_getitem(self):
        orig = self.orig
        sparse = self.sparse

        assert sparse[0] == 1
        assert np.isnan(sparse[1])
        assert sparse[3] == 3

        result = sparse[[1, 3, 4]]
        exp = orig[[1, 3, 4]].to_sparse()
        tm.assert_sp_series_equal(result, exp)

        # dense array
        result = sparse[orig % 2 == 1]
        exp = orig[orig % 2 == 1].to_sparse()
        tm.assert_sp_series_equal(result, exp)

        # sparse array (actuary it coerces to normal Series)
        result = sparse[sparse % 2 == 1]
        exp = orig[orig % 2 == 1].to_sparse()
        tm.assert_sp_series_equal(result, exp)

        # sparse array
        result = sparse[pd.SparseArray(sparse % 2 == 1, dtype=bool)]
        tm.assert_sp_series_equal(result, exp)

Source File: test_indexing.py From recruit with Apache License 2.0

6 votes

def test_loc_index(self):
        orig = pd.Series([1, np.nan, np.nan, 3, np.nan], index=list('ABCDE'))
        sparse = orig.to_sparse()

        assert sparse.loc['A'] == 1
        assert np.isnan(sparse.loc['B'])

        result = sparse.loc[['A', 'C', 'D']]
        exp = orig.loc[['A', 'C', 'D']].to_sparse()
        tm.assert_sp_series_equal(result, exp)

        # dense array
        result = sparse.loc[orig % 2 == 1]
        exp = orig.loc[orig % 2 == 1].to_sparse()
        tm.assert_sp_series_equal(result, exp)

        # sparse array (actuary it coerces to normal Series)
        result = sparse.loc[sparse % 2 == 1]
        exp = orig.loc[orig % 2 == 1].to_sparse()
        tm.assert_sp_series_equal(result, exp)

        # sparse array
        result = sparse[pd.SparseArray(sparse % 2 == 1, dtype=bool)]
        tm.assert_sp_series_equal(result, exp)

Source File: test_array.py From recruit with Apache License 2.0

6 votes

def test_numpy_mean(self):
        data = np.arange(10).astype(float)
        out = np.mean(SparseArray(data))
        assert out == 4.5

        data[5] = np.nan
        out = np.mean(SparseArray(data))
        assert out == 40.0 / 9

        msg = "the 'dtype' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.mean(SparseArray(data), dtype=np.int64)

        msg = "the 'out' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.mean(SparseArray(data), out=out)

Source File: test_array.py From recruit with Apache License 2.0

6 votes

def test_numpy_all(self, data, pos, neg):
        # GH 17570
        out = np.all(SparseArray(data))
        assert out

        out = np.all(SparseArray(data, fill_value=pos))
        assert out

        data[1] = neg
        out = np.all(SparseArray(data))
        assert not out

        out = np.all(SparseArray(data, fill_value=pos))
        assert not out

        # raises with a different message on py2.
        msg = "the \'out\' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.all(SparseArray(data), out=np.array([]))

Source File: test_array.py From recruit with Apache License 2.0

6 votes

def test_nonzero(self):
        # Tests regression #21172.
        sa = pd.SparseArray([
            float('nan'),
            float('nan'),
            1, 0, 0,
            2, 0, 0, 0,
            3, 0, 0
        ])
        expected = np.array([2, 5, 9], dtype=np.int32)
        result, = sa.nonzero()
        tm.assert_numpy_array_equal(expected, result)

        sa = pd.SparseArray([0, 0, 1, 0, 0, 2, 0, 0, 0, 3, 0, 0])
        result, = sa.nonzero()
        tm.assert_numpy_array_equal(expected, result)

Source File: test_array.py From recruit with Apache License 2.0

6 votes

def test_getslice_tuple(self):
        dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0])

        sparse = SparseArray(dense)
        res = sparse[4:, ]
        exp = SparseArray(dense[4:, ])
        tm.assert_sp_array_equal(res, exp)

        sparse = SparseArray(dense, fill_value=0)
        res = sparse[4:, ]
        exp = SparseArray(dense[4:, ], fill_value=0)
        tm.assert_sp_array_equal(res, exp)

        with pytest.raises(IndexError):
            sparse[4:, :]

        with pytest.raises(IndexError):
            # check numpy compat
            dense[4:, :]

Source File: test_array.py From recruit with Apache License 2.0

6 votes

def test_getslice(self):
        result = self.arr[:-3]
        exp = SparseArray(self.arr.values[:-3])
        tm.assert_sp_array_equal(result, exp)

        result = self.arr[-4:]
        exp = SparseArray(self.arr.values[-4:])
        tm.assert_sp_array_equal(result, exp)

        # two corner cases from Series
        result = self.arr[-12:]
        exp = SparseArray(self.arr)
        tm.assert_sp_array_equal(result, exp)

        result = self.arr[:-12]
        exp = SparseArray(self.arr.values[:0])
        tm.assert_sp_array_equal(result, exp)

Source File: test_series.py From recruit with Apache License 2.0

6 votes

def test_dropna(self):
        sp = SparseSeries([0, 0, 0, nan, nan, 5, 6], fill_value=0)

        sp_valid = sp.dropna()

        expected = sp.to_dense().dropna()
        expected = expected[expected != 0]
        exp_arr = pd.SparseArray(expected.values, fill_value=0, kind='block')
        tm.assert_sp_array_equal(sp_valid.values, exp_arr)
        tm.assert_index_equal(sp_valid.index, expected.index)
        assert len(sp_valid.sp_values) == 2

        result = self.bseries.dropna()
        expected = self.bseries.to_dense().dropna()
        assert not isinstance(result, SparseSeries)
        tm.assert_series_equal(result, expected)

Source File: test_array.py From recruit with Apache License 2.0

6 votes

def test_take_filling_all_nan(self):
        sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan])
        # XXX: did the default kind from take change?
        result = sparse.take(np.array([1, 0, -1]))
        expected = SparseArray([np.nan, np.nan, np.nan], kind='block')
        tm.assert_sp_array_equal(result, expected)

        result = sparse.take(np.array([1, 0, -1]), fill_value=True)
        expected = SparseArray([np.nan, np.nan, np.nan], kind='block')
        tm.assert_sp_array_equal(result, expected)

        with pytest.raises(IndexError):
            sparse.take(np.array([1, -6]))
        with pytest.raises(IndexError):
            sparse.take(np.array([1, 5]))
        with pytest.raises(IndexError):
            sparse.take(np.array([1, 5]), fill_value=True)

Source File: test_frame.py From recruit with Apache License 2.0

6 votes

def test_constructor_preserve_attr(self):
        # GH 13866
        arr = pd.SparseArray([1, 0, 3, 0], dtype=np.int64, fill_value=0)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

        df = pd.SparseDataFrame({'x': arr})
        assert df['x'].dtype == SparseDtype(np.int64)
        assert df['x'].fill_value == 0

        s = pd.SparseSeries(arr, name='x')
        assert s.dtype == SparseDtype(np.int64)
        assert s.fill_value == 0

        df = pd.SparseDataFrame(s)
        assert df['x'].dtype == SparseDtype(np.int64)
        assert df['x'].fill_value == 0

        df = pd.SparseDataFrame({'x': s})
        assert df['x'].dtype == SparseDtype(np.int64)
        assert df['x'].fill_value == 0

Source File: test_frame.py From recruit with Apache License 2.0

6 votes

def test_astype_bool(self):
        sparse = pd.SparseDataFrame({'A': SparseArray([0, 2, 0, 4],
                                                      fill_value=0,
                                                      dtype=np.int64),
                                     'B': SparseArray([0, 5, 0, 7],
                                                      fill_value=0,
                                                      dtype=np.int64)},
                                    default_fill_value=0)
        assert sparse['A'].dtype == SparseDtype(np.int64)
        assert sparse['B'].dtype == SparseDtype(np.int64)

        res = sparse.astype(SparseDtype(bool, False))
        exp = pd.SparseDataFrame({'A': SparseArray([False, True, False, True],
                                                   dtype=np.bool,
                                                   fill_value=False,
                                                   kind='integer'),
                                  'B': SparseArray([False, True, False, True],
                                                   dtype=np.bool,
                                                   fill_value=False,
                                                   kind='integer')},
                                 default_fill_value=False)
        tm.assert_sp_frame_equal(res, exp)
        assert res['A'].dtype == SparseDtype(np.bool)
        assert res['B'].dtype == SparseDtype(np.bool)

Source File: test_array.py From recruit with Apache License 2.0

6 votes

def test_astype(self):
        # float -> float
        arr = SparseArray([None, None, 0, 2])
        result = arr.astype("Sparse[float32]")
        expected = SparseArray([None, None, 0, 2], dtype=np.dtype('float32'))
        tm.assert_sp_array_equal(result, expected)

        dtype = SparseDtype("float64", fill_value=0)
        result = arr.astype(dtype)
        expected = SparseArray._simple_new(np.array([0., 2.],
                                                    dtype=dtype.subtype),
                                           IntIndex(4, [2, 3]),
                                           dtype)
        tm.assert_sp_array_equal(result, expected)

        dtype = SparseDtype("int64", 0)
        result = arr.astype(dtype)
        expected = SparseArray._simple_new(np.array([0, 2], dtype=np.int64),
                                           IntIndex(4, [2, 3]),
                                           dtype)
        tm.assert_sp_array_equal(result, expected)

        arr = SparseArray([0, np.nan, 0, 1], fill_value=0)
        with pytest.raises(ValueError, match='NA'):
            arr.astype('Sparse[i8]')

Source File: test_array.py From recruit with Apache License 2.0

5 votes

def test_take_filling_fill_value(self):
        # same tests as GH 12631
        sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0)
        result = sparse.take(np.array([1, 0, -1]))
        expected = SparseArray([0, np.nan, 4], fill_value=0)
        tm.assert_sp_array_equal(result, expected)

        # fill_value
        result = sparse.take(np.array([1, 0, -1]), allow_fill=True)
        # XXX: behavior change.
        # the old way of filling self.fill_value doesn't follow EA rules.
        # It's supposed to be self.dtype.na_value (nan in this case)
        expected = SparseArray([0, np.nan, np.nan], fill_value=0)
        tm.assert_sp_array_equal(result, expected)

        # allow_fill=False
        result = sparse.take(np.array([1, 0, -1]),
                             allow_fill=False, fill_value=True)
        expected = SparseArray([0, np.nan, 4], fill_value=0)
        tm.assert_sp_array_equal(result, expected)

        msg = ("Invalid value in 'indices'.")
        with pytest.raises(ValueError, match=msg):
            sparse.take(np.array([1, 0, -2]), allow_fill=True)
        with pytest.raises(ValueError, match=msg):
            sparse.take(np.array([1, 0, -5]), allow_fill=True)

        with pytest.raises(IndexError):
            sparse.take(np.array([1, -6]))
        with pytest.raises(IndexError):
            sparse.take(np.array([1, 5]))
        with pytest.raises(IndexError):
            sparse.take(np.array([1, 5]), fill_value=True)

Source File: test_array.py From recruit with Apache License 2.0

5 votes

def test_sparse_series_round_trip(self, kind, fill):
        # see gh-13999
        arr = SparseArray([np.nan, 1, np.nan, 2, 3],
                          kind=kind, fill_value=fill)
        res = SparseArray(SparseSeries(arr))
        tm.assert_sp_array_equal(arr, res)

        arr = SparseArray([0, 0, 0, 1, 1, 2], dtype=np.int64,
                          kind=kind, fill_value=fill)
        res = SparseArray(SparseSeries(arr), dtype=np.int64)
        tm.assert_sp_array_equal(arr, res)

        res = SparseArray(SparseSeries(arr))
        tm.assert_sp_array_equal(arr, res)

Source File: test_array.py From recruit with Apache License 2.0

5 votes

def test_take_fill_value(self):
        data = np.array([1, np.nan, 0, 3, 0])
        sparse = SparseArray(data, fill_value=0)

        exp = SparseArray(np.take(data, [0]), fill_value=0)
        tm.assert_sp_array_equal(sparse.take([0]), exp)

        exp = SparseArray(np.take(data, [1, 3, 4]), fill_value=0)
        tm.assert_sp_array_equal(sparse.take([1, 3, 4]), exp)

Source File: test_array.py From recruit with Apache License 2.0

5 votes

def test_take(self):
        exp = SparseArray(np.take(self.arr_data, [2, 3]))
        tm.assert_sp_array_equal(self.arr.take([2, 3]), exp)

        exp = SparseArray(np.take(self.arr_data, [0, 1, 2]))
        tm.assert_sp_array_equal(self.arr.take([0, 1, 2]), exp)

Source File: test_array.py From recruit with Apache License 2.0

5 votes

def test_constructor_from_too_large_array(self):
        with pytest.raises(TypeError, match="expected dimension <= 1 data"):
            SparseArray(np.arange(10).reshape((2, 5)))

Source File: test_array.py From recruit with Apache License 2.0

5 votes

def test_sparse_series_round_trip2(self, kind, fill):
        # see gh-13999
        arr = SparseArray([True, False, True, True], dtype=np.bool,
                          kind=kind, fill_value=fill)
        res = SparseArray(SparseSeries(arr))
        tm.assert_sp_array_equal(arr, res)

        res = SparseArray(SparseSeries(arr))
        tm.assert_sp_array_equal(arr, res)

Source File: test_array.py From recruit with Apache License 2.0

5 votes

def test_take_negative(self):
        exp = SparseArray(np.take(self.arr_data, [-1]))
        tm.assert_sp_array_equal(self.arr.take([-1]), exp)

        exp = SparseArray(np.take(self.arr_data, [-4, -3, -2]))
        tm.assert_sp_array_equal(self.arr.take([-4, -3, -2]), exp)

Source File: test_array.py From recruit with Apache License 2.0

5 votes

def test_constructor_from_sparse(self):
        res = SparseArray(self.zarr)
        assert res.fill_value == 0
        assert_almost_equal(res.sp_values, self.zarr.sp_values)

Source File: test_array.py From recruit with Apache License 2.0

5 votes

def test_constructor_bool(self):
        # GH 10648
        data = np.array([False, False, True, True, False, False])
        arr = SparseArray(data, fill_value=False, dtype=bool)

        assert arr.dtype == SparseDtype(bool)
        tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True]))
        # Behavior change: np.asarray densifies.
        # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
        tm.assert_numpy_array_equal(arr.sp_index.indices,
                                    np.array([2, 3], np.int32))

        for dense in [arr.to_dense(), arr.values]:
            assert dense.dtype == bool
            tm.assert_numpy_array_equal(dense, data)

Source File: test_array.py From recruit with Apache License 2.0

5 votes

def test_constructor_bool_fill_value(self):
        arr = SparseArray([True, False, True], dtype=None)
        assert arr.dtype == SparseDtype(np.bool)
        assert not arr.fill_value

        arr = SparseArray([True, False, True], dtype=np.bool)
        assert arr.dtype == SparseDtype(np.bool)
        assert not arr.fill_value

        arr = SparseArray([True, False, True], dtype=np.bool, fill_value=True)
        assert arr.dtype == SparseDtype(np.bool, True)
        assert arr.fill_value

Source File: test_array.py From recruit with Apache License 2.0

5 votes

def test_constructor_na_dtype(self, dtype):
        with pytest.raises(ValueError, match="Cannot convert"):
            SparseArray([0, 1, np.nan], dtype=dtype)

Source File: test_array.py From recruit with Apache License 2.0

5 votes

def test_constructor_inferred_fill_value(self, data, fill_value):
        result = SparseArray(data).fill_value

        if pd.isna(fill_value):
            assert pd.isna(result)
        else:
            assert result == fill_value

Python pandas.SparseArray() Examples