Python Examples of pandas.SparseSeries

Source File: test_series.py From recruit with Apache License 2.0

6 votes

def test_concat_different_fill(self):
        val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
        val2 = np.array([3, np.nan, 4, 0, 0])

        for kind in ['integer', 'block']:
            sparse1 = pd.SparseSeries(val1, name='x', kind=kind)
            sparse2 = pd.SparseSeries(val2, name='y', kind=kind, fill_value=0)

            with tm.assert_produces_warning(PerformanceWarning):
                res = pd.concat([sparse1, sparse2])
            exp = pd.concat([pd.Series(val1), pd.Series(val2)])
            exp = pd.SparseSeries(exp, kind=kind)
            tm.assert_sp_series_equal(res, exp)

            with tm.assert_produces_warning(PerformanceWarning):
                res = pd.concat([sparse2, sparse1])
            exp = pd.concat([pd.Series(val2), pd.Series(val1)])
            exp = pd.SparseSeries(exp, kind=kind, fill_value=0)
            tm.assert_sp_series_equal(res, exp)

Source File: test_combine_concat.py From recruit with Apache License 2.0

6 votes

def test_concat_different_kind(self):
        val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
        val2 = np.array([3, np.nan, 4, 0, 0])

        sparse1 = pd.SparseSeries(val1, name='x', kind='integer')
        sparse2 = pd.SparseSeries(val2, name='y', kind='block')

        res = pd.concat([sparse1, sparse2])
        exp = pd.concat([pd.Series(val1), pd.Series(val2)])
        exp = pd.SparseSeries(exp, kind=sparse1.kind)
        tm.assert_sp_series_equal(res, exp)

        res = pd.concat([sparse2, sparse1])
        exp = pd.concat([pd.Series(val2), pd.Series(val1)])
        exp = pd.SparseSeries(exp, kind=sparse2.kind)
        tm.assert_sp_series_equal(res, exp, consolidate_block_indices=True)

Source File: test_indexing.py From recruit with Apache License 2.0

6 votes

def test_get(self):
        s = pd.SparseSeries([1, np.nan, np.nan, 3, np.nan])
        assert s.get(0) == 1
        assert np.isnan(s.get(1))
        assert s.get(5) is None

        s = pd.SparseSeries([1, np.nan, 0, 3, 0], index=list('ABCDE'))
        assert s.get('A') == 1
        assert np.isnan(s.get('B'))
        assert s.get('C') == 0
        assert s.get('XX') is None

        s = pd.SparseSeries([1, np.nan, 0, 3, 0], index=list('ABCDE'),
                            fill_value=0)
        assert s.get('A') == 1
        assert np.isnan(s.get('B'))
        assert s.get('C') == 0
        assert s.get('XX') is None

Source File: test_indexing.py From vnpy_crypto with MIT License

6 votes

def test_get(self):
        s = pd.SparseSeries([1, np.nan, np.nan, 3, np.nan])
        assert s.get(0) == 1
        assert np.isnan(s.get(1))
        assert s.get(5) is None

        s = pd.SparseSeries([1, np.nan, 0, 3, 0], index=list('ABCDE'))
        assert s.get('A') == 1
        assert np.isnan(s.get('B'))
        assert s.get('C') == 0
        assert s.get('XX') is None

        s = pd.SparseSeries([1, np.nan, 0, 3, 0], index=list('ABCDE'),
                            fill_value=0)
        assert s.get('A') == 1
        assert np.isnan(s.get('B'))
        assert s.get('C') == 0
        assert s.get('XX') is None

Source File: test_reshape.py From recruit with Apache License 2.0

6 votes

def test_dataframe_dummies_prefix_dict(self, sparse):
        prefixes = {'A': 'from_A', 'B': 'from_B'}
        df = DataFrame({'C': [1, 2, 3],
                        'A': ['a', 'b', 'a'],
                        'B': ['b', 'b', 'c']})
        result = get_dummies(df, prefix=prefixes, sparse=sparse)

        expected = DataFrame({'C': [1, 2, 3],
                              'from_A_a': [1, 0, 1],
                              'from_A_b': [0, 1, 0],
                              'from_B_b': [1, 1, 0],
                              'from_B_c': [0, 0, 1]})

        columns = ['from_A_a', 'from_A_b', 'from_B_b', 'from_B_c']
        expected[columns] = expected[columns].astype(np.uint8)
        if sparse:
            expected[columns] = expected[columns].apply(
                lambda x: pd.SparseSeries(x)
            )

        assert_frame_equal(result, expected)

Source File: test_combine_concat.py From recruit with Apache License 2.0

6 votes

def test_concat_different_fill(self):
        val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
        val2 = np.array([3, np.nan, 4, 0, 0])

        for kind in ['integer', 'block']:
            sparse1 = pd.SparseSeries(val1, name='x', kind=kind)
            sparse2 = pd.SparseSeries(val2, name='y', kind=kind, fill_value=0)

            with tm.assert_produces_warning(PerformanceWarning):
                res = pd.concat([sparse1, sparse2])

            exp = pd.concat([pd.Series(val1), pd.Series(val2)])
            exp = pd.SparseSeries(exp, kind=kind)
            tm.assert_sp_series_equal(res, exp)

            with tm.assert_produces_warning(PerformanceWarning):
                res = pd.concat([sparse2, sparse1])

            exp = pd.concat([pd.Series(val2), pd.Series(val1)])
            exp = pd.SparseSeries(exp, kind=kind, fill_value=0)
            tm.assert_sp_series_equal(res, exp)

Source File: test_reshape.py From recruit with Apache License 2.0

6 votes

def test_dataframe_dummies_drop_first_with_na(self, df, sparse):
        df.loc[3, :] = [np.nan, np.nan, np.nan]
        result = get_dummies(df, dummy_na=True, drop_first=True,
                             sparse=sparse).sort_index(axis=1)
        expected = DataFrame({'C': [1, 2, 3, np.nan],
                              'A_b': [0, 1, 0, 0],
                              'A_nan': [0, 0, 0, 1],
                              'B_c': [0, 0, 1, 0],
                              'B_nan': [0, 0, 0, 1]})
        cols = ['A_b', 'A_nan', 'B_c', 'B_nan']
        expected[cols] = expected[cols].astype(np.uint8)
        expected = expected.sort_index(axis=1)
        if sparse:
            for col in cols:
                expected[col] = pd.SparseSeries(expected[col])

        assert_frame_equal(result, expected)

        result = get_dummies(df, dummy_na=False, drop_first=True,
                             sparse=sparse)
        expected = expected[['C', 'A_b', 'B_c']]
        assert_frame_equal(result, expected)

Source File: test_combine_concat.py From recruit with Apache License 2.0

6 votes

def test_concat(self, kind):
        val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
        val2 = np.array([3, np.nan, 4, 0, 0])

        sparse1 = pd.SparseSeries(val1, name='x', kind=kind)
        sparse2 = pd.SparseSeries(val2, name='y', kind=kind)

        res = pd.concat([sparse1, sparse2])
        exp = pd.concat([pd.Series(val1), pd.Series(val2)])
        exp = pd.SparseSeries(exp, kind=kind)
        tm.assert_sp_series_equal(res, exp, consolidate_block_indices=True)

        sparse1 = pd.SparseSeries(val1, fill_value=0, name='x', kind=kind)
        sparse2 = pd.SparseSeries(val2, fill_value=0, name='y', kind=kind)

        res = pd.concat([sparse1, sparse2])
        exp = pd.concat([pd.Series(val1), pd.Series(val2)])
        exp = pd.SparseSeries(exp, fill_value=0, kind=kind)
        tm.assert_sp_series_equal(res, exp, consolidate_block_indices=True)

Source File: test_format.py From recruit with Apache License 2.0

6 votes

def test_sparse_bool(self):
        # GH 13110
        s = pd.SparseSeries([True, False, False, True, False, False],
                            fill_value=False)
        result = repr(s)
        dtype = '' if use_32bit_repr else ', dtype=int32'
        exp = ("0     True\n1    False\n2    False\n"
               "3     True\n4    False\n5    False\n"
               "dtype: Sparse[bool, False]\nBlockIndex\n"
               "Block locations: array([0, 3]{0})\n"
               "Block lengths: array([1, 1]{0})".format(dtype))
        assert result == exp

        with option_context("display.max_rows", 3):
            result = repr(s)
            exp = ("0     True\n     ...  \n5    False\n"
                   "Length: 6, dtype: Sparse[bool, False]\nBlockIndex\n"
                   "Block locations: array([0, 3]{0})\n"
                   "Block lengths: array([1, 1]{0})".format(dtype))
            assert result == exp

Source File: concat.py From recruit with Apache License 2.0

6 votes

def _get_series_result_type(result, objs=None):
    """
    return appropriate class of Series concat
    input is either dict or array-like
    """
    from pandas import SparseSeries, SparseDataFrame, DataFrame

    # concat Series with axis 1
    if isinstance(result, dict):
        # concat Series with axis 1
        if all(isinstance(c, (SparseSeries, SparseDataFrame))
               for c in compat.itervalues(result)):
            return SparseDataFrame
        else:
            return DataFrame

    # otherwise it is a SingleBlockManager (axis = 0)
    if result._block.is_sparse:
        return SparseSeries
    else:
        return objs[0]._constructor

Source File: test_series.py From recruit with Apache License 2.0

6 votes

def test_constructor_dtype(self):
        arr = SparseSeries([np.nan, 1, 2, np.nan])
        assert arr.dtype == SparseDtype(np.float64)
        assert np.isnan(arr.fill_value)

        arr = SparseSeries([np.nan, 1, 2, np.nan], fill_value=0)
        assert arr.dtype == SparseDtype(np.float64, 0)
        assert arr.fill_value == 0

        arr = SparseSeries([0, 1, 2, 4], dtype=np.int64, fill_value=np.nan)
        assert arr.dtype == SparseDtype(np.int64, np.nan)
        assert np.isnan(arr.fill_value)

        arr = SparseSeries([0, 1, 2, 4], dtype=np.int64)
        assert arr.dtype == SparseDtype(np.int64, 0)
        assert arr.fill_value == 0

        arr = SparseSeries([0, 1, 2, 4], fill_value=0, dtype=np.int64)
        assert arr.dtype == SparseDtype(np.int64, 0)
        assert arr.fill_value == 0

Source File: test_frame.py From recruit with Apache License 2.0

6 votes

def test_constructor_preserve_attr(self):
        # GH 13866
        arr = pd.SparseArray([1, 0, 3, 0], dtype=np.int64, fill_value=0)
        assert arr.dtype == SparseDtype(np.int64)
        assert arr.fill_value == 0

        df = pd.SparseDataFrame({'x': arr})
        assert df['x'].dtype == SparseDtype(np.int64)
        assert df['x'].fill_value == 0

        s = pd.SparseSeries(arr, name='x')
        assert s.dtype == SparseDtype(np.int64)
        assert s.fill_value == 0

        df = pd.SparseDataFrame(s)
        assert df['x'].dtype == SparseDtype(np.int64)
        assert df['x'].fill_value == 0

        df = pd.SparseDataFrame({'x': s})
        assert df['x'].dtype == SparseDtype(np.int64)
        assert df['x'].fill_value == 0

Source File: test_series.py From recruit with Apache License 2.0

6 votes

def test_getitem_slice(self):
        idx = self.bseries.index
        res = self.bseries[::2]
        assert isinstance(res, SparseSeries)

        expected = self.bseries.reindex(idx[::2])
        tm.assert_sp_series_equal(res, expected)

        res = self.bseries[:5]
        assert isinstance(res, SparseSeries)
        tm.assert_sp_series_equal(res, self.bseries.reindex(idx[:5]))

        res = self.bseries[5:]
        tm.assert_sp_series_equal(res, self.bseries.reindex(idx[5:]))

        # negative indices
        res = self.bseries[:-3]
        tm.assert_sp_series_equal(res, self.bseries.reindex(idx[:-3]))

Source File: test_series.py From recruit with Apache License 2.0

6 votes

def test_numpy_cumsum(self):
        result = np.cumsum(self.bseries)
        expected = SparseSeries(self.bseries.to_dense().cumsum())
        tm.assert_sp_series_equal(result, expected)

        result = np.cumsum(self.zbseries)
        expected = self.zbseries.to_dense().cumsum().to_sparse()
        tm.assert_series_equal(result, expected)

        msg = "the 'dtype' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.cumsum(self.bseries, dtype=np.int64)

        msg = "the 'out' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.cumsum(self.zbseries, out=result)

Source File: test_series.py From recruit with Apache License 2.0

6 votes

def test_unary_operators(self, values, op, fill_value):
        # https://github.com/pandas-dev/pandas/issues/22835
        values = np.asarray(values)
        if op is operator.invert:
            new_fill_value = not fill_value
        else:
            new_fill_value = op(fill_value)
        s = SparseSeries(values,
                         fill_value=fill_value,
                         index=['a', 'b', 'c', 'd'],
                         name='name')
        result = op(s)
        expected = SparseSeries(op(values),
                                fill_value=new_fill_value,
                                index=['a', 'b', 'c', 'd'],
                                name='name')
        tm.assert_sp_series_equal(result, expected)

Source File: test_series.py From recruit with Apache License 2.0

6 votes

def test_dropna(self):
        sp = SparseSeries([0, 0, 0, nan, nan, 5, 6], fill_value=0)

        sp_valid = sp.dropna()

        expected = sp.to_dense().dropna()
        expected = expected[expected != 0]
        exp_arr = pd.SparseArray(expected.values, fill_value=0, kind='block')
        tm.assert_sp_array_equal(sp_valid.values, exp_arr)
        tm.assert_index_equal(sp_valid.index, expected.index)
        assert len(sp_valid.sp_values) == 2

        result = self.bseries.dropna()
        expected = self.bseries.to_dense().dropna()
        assert not isinstance(result, SparseSeries)
        tm.assert_series_equal(result, expected)

Source File: test_series.py From recruit with Apache License 2.0

6 votes

def test_notna(self):
        # GH 8276
        s = pd.SparseSeries([np.nan, np.nan, 1, 2, np.nan], name='xxx')

        res = s.notna()
        exp = pd.SparseSeries([False, False, True, True, False], name='xxx',
                              fill_value=False)
        tm.assert_sp_series_equal(res, exp)

        # if fill_value is not nan, True can be included in sp_values
        s = pd.SparseSeries([np.nan, 0., 1., 2., 0.], name='xxx',
                            fill_value=0.)
        res = s.notna()
        assert isinstance(res, pd.SparseSeries)
        exp = pd.Series([False, True, True, True, True], name='xxx')
        tm.assert_series_equal(res.to_dense(), exp)

Source File: test_series.py From recruit with Apache License 2.0

6 votes

def test_isna(self):
        # GH 8276
        s = pd.SparseSeries([np.nan, np.nan, 1, 2, np.nan], name='xxx')

        res = s.isna()
        exp = pd.SparseSeries([True, True, False, False, True], name='xxx',
                              fill_value=True)
        tm.assert_sp_series_equal(res, exp)

        # if fill_value is not nan, True can be included in sp_values
        s = pd.SparseSeries([np.nan, 0., 1., 2., 0.], name='xxx',
                            fill_value=0.)
        res = s.isna()
        assert isinstance(res, pd.SparseSeries)
        exp = pd.Series([True, False, False, False, False], name='xxx')
        tm.assert_series_equal(res.to_dense(), exp)

Source File: test_series.py From recruit with Apache License 2.0

6 votes

def test_shift(self):
        series = SparseSeries([nan, 1., 2., 3., nan, nan], index=np.arange(6))

        shifted = series.shift(0)
        # assert shifted is not series
        tm.assert_sp_series_equal(shifted, series)

        f = lambda s: s.shift(1)
        _dense_series_compare(series, f)

        f = lambda s: s.shift(-2)
        _dense_series_compare(series, f)

        series = SparseSeries([nan, 1., 2., 3., nan, nan],
                              index=bdate_range('1/1/2000', periods=6))
        f = lambda s: s.shift(2, freq='B')
        _dense_series_compare(series, f)

        f = lambda s: s.shift(2, freq=BDay())
        _dense_series_compare(series, f)

Source File: test_series.py From recruit with Apache License 2.0

6 votes

def test_value_counts_int(self):
        vals = [1, 2, 0, 1, 2, 1, 2, 0, 1, 1]
        dense = pd.Series(vals, name='xx')

        # fill_value is np.nan, but should not be included in the result
        sparse = pd.SparseSeries(vals, name='xx')
        tm.assert_series_equal(sparse.value_counts(),
                               dense.value_counts())
        tm.assert_series_equal(sparse.value_counts(dropna=False),
                               dense.value_counts(dropna=False))

        sparse = pd.SparseSeries(vals, name='xx', fill_value=0)
        tm.assert_series_equal(sparse.value_counts(),
                               dense.value_counts())
        tm.assert_series_equal(sparse.value_counts(dropna=False),
                               dense.value_counts(dropna=False))

Source File: test_series.py From recruit with Apache License 2.0

6 votes

def test_to_frame(self):
        # GH 9850
        s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x')
        exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_sp_frame_equal(s.to_frame(), exp)

        exp = pd.SparseDataFrame({'y': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_sp_frame_equal(s.to_frame(name='y'), exp)

        s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x', fill_value=0)
        exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]},
                                 default_fill_value=0)

        tm.assert_sp_frame_equal(s.to_frame(), exp)
        exp = pd.DataFrame({'y': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_frame_equal(s.to_frame(name='y').to_dense(), exp)

Source File: test_series.py From recruit with Apache License 2.0

6 votes

def test_concat_different_kind(self):
        val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
        val2 = np.array([3, np.nan, 4, 0, 0])

        sparse1 = pd.SparseSeries(val1, name='x', kind='integer')
        sparse2 = pd.SparseSeries(val2, name='y', kind='block', fill_value=0)

        with tm.assert_produces_warning(PerformanceWarning):
            res = pd.concat([sparse1, sparse2])
        exp = pd.concat([pd.Series(val1), pd.Series(val2)])
        exp = pd.SparseSeries(exp, kind='integer')
        tm.assert_sp_series_equal(res, exp)

        with tm.assert_produces_warning(PerformanceWarning):
            res = pd.concat([sparse2, sparse1])
        exp = pd.concat([pd.Series(val2), pd.Series(val1)])
        exp = pd.SparseSeries(exp, kind='block', fill_value=0)
        tm.assert_sp_series_equal(res, exp)

Source File: test_series.py From recruit with Apache License 2.0

6 votes

def test_concat(self):
        val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
        val2 = np.array([3, np.nan, 4, 0, 0])

        for kind in ['integer', 'block']:
            sparse1 = pd.SparseSeries(val1, name='x', kind=kind)
            sparse2 = pd.SparseSeries(val2, name='y', kind=kind)

            res = pd.concat([sparse1, sparse2])
            exp = pd.concat([pd.Series(val1), pd.Series(val2)])
            exp = pd.SparseSeries(exp, kind=kind)
            tm.assert_sp_series_equal(res, exp)

            sparse1 = pd.SparseSeries(val1, fill_value=0, name='x', kind=kind)
            sparse2 = pd.SparseSeries(val2, fill_value=0, name='y', kind=kind)

            res = pd.concat([sparse1, sparse2])
            exp = pd.concat([pd.Series(val1), pd.Series(val2)])
            exp = pd.SparseSeries(exp, fill_value=0, kind=kind)
            tm.assert_sp_series_equal(res, exp,
                                      consolidate_block_indices=True)

Source File: test_series.py From recruit with Apache License 2.0

6 votes

def test_to_dense_fill_value(self):
        s = pd.Series([1, np.nan, np.nan, 3, np.nan])
        res = SparseSeries(s).to_dense()
        tm.assert_series_equal(res, s)

        res = SparseSeries(s, fill_value=0).to_dense()
        tm.assert_series_equal(res, s)

        s = pd.Series([1, np.nan, 0, 3, 0])
        res = SparseSeries(s, fill_value=0).to_dense()
        tm.assert_series_equal(res, s)

        res = SparseSeries(s, fill_value=0).to_dense()
        tm.assert_series_equal(res, s)

        s = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan])
        res = SparseSeries(s).to_dense()
        tm.assert_series_equal(res, s)

        s = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan])
        res = SparseSeries(s, fill_value=0).to_dense()
        tm.assert_series_equal(res, s)

Source File: test_series.py From recruit with Apache License 2.0

5 votes

def test_value_counts(self):
        vals = [1, 2, nan, 0, nan, 1, 2, nan, nan, 1, 2, 0, 1, 1]
        dense = pd.Series(vals, name='xx')

        sparse = pd.SparseSeries(vals, name='xx')
        tm.assert_series_equal(sparse.value_counts(),
                               dense.value_counts())
        tm.assert_series_equal(sparse.value_counts(dropna=False),
                               dense.value_counts(dropna=False))

        sparse = pd.SparseSeries(vals, name='xx', fill_value=0)
        tm.assert_series_equal(sparse.value_counts(),
                               dense.value_counts())
        tm.assert_series_equal(sparse.value_counts(dropna=False),
                               dense.value_counts(dropna=False))

Source File: test_series.py From recruit with Apache License 2.0

5 votes

def test_constructor_dict_datetime64_index(datetime_type):
    # GH 9456
    dates = ['1984-02-19', '1988-11-06', '1989-12-03', '1990-03-15']
    values = [42544017.198965244, 1234565, 40512335.181958228, -1]

    result = SparseSeries(dict(zip(map(datetime_type, dates), values)))
    expected = SparseSeries(values, map(pd.Timestamp, dates))

    tm.assert_sp_series_equal(result, expected)

Source File: test_indexing.py From vnpy_crypto with MIT License

5 votes

def test_getitem_ellipsis(self):
        # GH 9467
        s = pd.SparseSeries([1, np.nan, 2, 0, np.nan])
        tm.assert_sp_series_equal(s[...], s)

        s = pd.SparseSeries([1, np.nan, 2, 0, np.nan], fill_value=0)
        tm.assert_sp_series_equal(s[...], s)

Source File: test_series.py From recruit with Apache License 2.0

5 votes

def test_concat_axis1_different_fill(self):
        val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
        val2 = np.array([3, np.nan, 4, 0, 0])

        sparse1 = pd.SparseSeries(val1, name='x')
        sparse2 = pd.SparseSeries(val2, name='y', fill_value=0)

        res = pd.concat([sparse1, sparse2], axis=1)
        exp = pd.concat([pd.Series(val1, name='x'),
                         pd.Series(val2, name='y')], axis=1)
        assert isinstance(res, pd.SparseDataFrame)
        tm.assert_frame_equal(res.to_dense(), exp)

Source File: test_series.py From recruit with Apache License 2.0

5 votes

def _dense_series_compare(s, f):
    result = f(s)
    assert (isinstance(result, SparseSeries))
    dense_result = f(s.to_dense())
    tm.assert_series_equal(result.to_dense(), dense_result)

Source File: test_common.py From recruit with Apache License 2.0

5 votes

def test_is_sparse(check_scipy):
    assert com.is_sparse(pd.SparseArray([1, 2, 3]))
    assert com.is_sparse(pd.SparseSeries([1, 2, 3]))

    assert not com.is_sparse(np.array([1, 2, 3]))

    if check_scipy:
        import scipy.sparse
        assert not com.is_sparse(scipy.sparse.bsr_matrix([1, 2, 3]))

Python pandas.SparseSeries() Examples