Python pandas.util.testing.assert_series_equal() Examples
The following are 30
code examples of pandas.util.testing.assert_series_equal().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas.util.testing
, or try the search function
.
Example #1
Source File: test_window.py From recruit with Apache License 2.0 | 6 votes |
def test_rolling_skew_edge_cases(self): all_nan = Series([np.NaN] * 5) # yields all NaN (0 variance) d = Series([1] * 5) x = d.rolling(window=5).skew() tm.assert_series_equal(all_nan, x) # yields all NaN (window too small) d = Series(np.random.randn(5)) x = d.rolling(window=2).skew() tm.assert_series_equal(all_nan, x) # yields [NaN, NaN, NaN, 0.177994, 1.548824] d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401 ]) expected = Series([np.NaN, np.NaN, np.NaN, 0.177994, 1.548824]) x = d.rolling(window=4).skew() tm.assert_series_equal(expected, x)
Example #2
Source File: test_window.py From recruit with Apache License 2.0 | 6 votes |
def test_rolling_cov_offset(self): # GH16058 idx = pd.date_range('2017-01-01', periods=24, freq='1h') ss = Series(np.arange(len(idx)), index=idx) result = ss.rolling('2h').cov() expected = Series([np.nan] + [0.5] * (len(idx) - 1), index=idx) tm.assert_series_equal(result, expected) expected2 = ss.rolling(2, min_periods=1).cov() tm.assert_series_equal(result, expected2) result = ss.rolling('3h').cov() expected = Series([np.nan, 0.5] + [1.0] * (len(idx) - 2), index=idx) tm.assert_series_equal(result, expected) expected2 = ss.rolling(3, min_periods=1).cov() tm.assert_series_equal(result, expected2)
Example #3
Source File: test_window.py From recruit with Apache License 2.0 | 6 votes |
def test_cmov_window_special_linear_range(self, win_types_special): # GH 8238 kwds = { 'kaiser': {'beta': 1.}, 'gaussian': {'std': 1.}, 'general_gaussian': {'power': 2., 'width': 2.}, 'slepian': {'width': 0.5}} vals = np.array(range(10), dtype=np.float) xp = vals.copy() xp[:2] = np.nan xp[-2:] = np.nan xp = Series(xp) rs = Series(vals).rolling( 5, win_type=win_types_special, center=True).mean( **kwds[win_types_special]) tm.assert_series_equal(xp, rs)
Example #4
Source File: test_window.py From recruit with Apache License 2.0 | 6 votes |
def test_groupby_monotonic(self): # GH 15130 # we don't need to validate monotonicity when grouping data = [ ['David', '1/1/2015', 100], ['David', '1/5/2015', 500], ['David', '5/30/2015', 50], ['David', '7/25/2015', 50], ['Ryan', '1/4/2014', 100], ['Ryan', '1/19/2015', 500], ['Ryan', '3/31/2016', 50], ['Joe', '7/1/2015', 100], ['Joe', '9/9/2015', 500], ['Joe', '10/15/2015', 50]] df = DataFrame(data=data, columns=['name', 'date', 'amount']) df['date'] = pd.to_datetime(df['date']) expected = df.set_index('date').groupby('name').apply( lambda x: x.rolling('180D')['amount'].sum()) result = df.groupby('name').rolling('180D', on='date')['amount'].sum() tm.assert_series_equal(result, expected)
Example #5
Source File: test_window.py From recruit with Apache License 2.0 | 6 votes |
def test_expanding_corr_cov(self): g = self.frame.groupby('A') r = g.expanding() for f in ['corr', 'cov']: result = getattr(r, f)(self.frame) def func(x): return getattr(x.expanding(), f)(self.frame) expected = g.apply(func) tm.assert_frame_equal(result, expected) result = getattr(r.B, f)(pairwise=True) def func(x): return getattr(x.B.expanding(), f)(pairwise=True) expected = g.apply(func) tm.assert_series_equal(result, expected)
Example #6
Source File: test_series.py From recruit with Apache License 2.0 | 6 votes |
def test_to_dense_fill_value(self): s = pd.Series([1, np.nan, np.nan, 3, np.nan]) res = SparseSeries(s).to_dense() tm.assert_series_equal(res, s) res = SparseSeries(s, fill_value=0).to_dense() tm.assert_series_equal(res, s) s = pd.Series([1, np.nan, 0, 3, 0]) res = SparseSeries(s, fill_value=0).to_dense() tm.assert_series_equal(res, s) res = SparseSeries(s, fill_value=0).to_dense() tm.assert_series_equal(res, s) s = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan]) res = SparseSeries(s).to_dense() tm.assert_series_equal(res, s) s = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan]) res = SparseSeries(s, fill_value=0).to_dense() tm.assert_series_equal(res, s)
Example #7
Source File: test_window.py From recruit with Apache License 2.0 | 6 votes |
def test_rolling_corr_cov(self): g = self.frame.groupby('A') r = g.rolling(window=4) for f in ['corr', 'cov']: result = getattr(r, f)(self.frame) def func(x): return getattr(x.rolling(4), f)(self.frame) expected = g.apply(func) tm.assert_frame_equal(result, expected) result = getattr(r.B, f)(pairwise=True) def func(x): return getattr(x.B.rolling(4), f)(pairwise=True) expected = g.apply(func) tm.assert_series_equal(result, expected)
Example #8
Source File: test_window.py From recruit with Apache License 2.0 | 6 votes |
def test_getitem(self): g = self.frame.groupby('A') g_mutated = self.frame.groupby('A', mutated=True) expected = g_mutated.B.apply(lambda x: x.rolling(2).mean()) result = g.rolling(2).mean().B tm.assert_series_equal(result, expected) result = g.rolling(2).B.mean() tm.assert_series_equal(result, expected) result = g.B.rolling(2).mean() tm.assert_series_equal(result, expected) result = self.frame.B.groupby(self.frame.A).rolling(2).mean() tm.assert_series_equal(result, expected)
Example #9
Source File: test_window.py From recruit with Apache License 2.0 | 6 votes |
def test_rolling_median_resample(self): indices = [datetime(1975, 1, i) for i in range(1, 6)] # So that we can have 3 datapoints on last day (4, 10, and 20) indices.append(datetime(1975, 1, 5, 1)) indices.append(datetime(1975, 1, 5, 2)) series = Series(list(range(0, 5)) + [10, 20], index=indices) # Use floats instead of ints as values series = series.map(lambda x: float(x)) # Sort chronologically series = series.sort_index() # Default how should be median expected = Series([0.0, 1.0, 2.0, 3.0, 10], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) x = series.resample('D').median().rolling(window=1).median() tm.assert_series_equal(expected, x)
Example #10
Source File: test_window.py From recruit with Apache License 2.0 | 6 votes |
def test_rolling_min_resample(self): indices = [datetime(1975, 1, i) for i in range(1, 6)] # So that we can have 3 datapoints on last day (4, 10, and 20) indices.append(datetime(1975, 1, 5, 1)) indices.append(datetime(1975, 1, 5, 2)) series = Series(list(range(0, 5)) + [10, 20], index=indices) # Use floats instead of ints as values series = series.map(lambda x: float(x)) # Sort chronologically series = series.sort_index() # Default how should be min expected = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) r = series.resample('D').min().rolling(window=1) tm.assert_series_equal(expected, r.min())
Example #11
Source File: test_series.py From recruit with Apache License 2.0 | 6 votes |
def test_dropna(self): sp = SparseSeries([0, 0, 0, nan, nan, 5, 6], fill_value=0) sp_valid = sp.dropna() expected = sp.to_dense().dropna() expected = expected[expected != 0] exp_arr = pd.SparseArray(expected.values, fill_value=0, kind='block') tm.assert_sp_array_equal(sp_valid.values, exp_arr) tm.assert_index_equal(sp_valid.index, expected.index) assert len(sp_valid.sp_values) == 2 result = self.bseries.dropna() expected = self.bseries.to_dense().dropna() assert not isinstance(result, SparseSeries) tm.assert_series_equal(result, expected)
Example #12
Source File: test_window.py From recruit with Apache License 2.0 | 6 votes |
def test_expanding_cov_diff_index(self): # GH 7512 s1 = Series([1, 2, 3], index=[0, 1, 2]) s2 = Series([1, 3], index=[0, 2]) result = s1.expanding().cov(s2) expected = Series([None, None, 2.0]) tm.assert_series_equal(result, expected) s2a = Series([1, None, 3], index=[0, 1, 2]) result = s1.expanding().cov(s2a) tm.assert_series_equal(result, expected) s1 = Series([7, 8, 10], index=[0, 1, 3]) s2 = Series([7, 9, 10], index=[0, 2, 3]) result = s1.expanding().cov(s2) expected = Series([None, None, None, 4.5]) tm.assert_series_equal(result, expected)
Example #13
Source File: test_window.py From recruit with Apache License 2.0 | 6 votes |
def test_rolling_max_gh6297(self): """Replicate result expected in GH #6297""" indices = [datetime(1975, 1, i) for i in range(1, 6)] # So that we can have 2 datapoints on one of the days indices.append(datetime(1975, 1, 3, 6, 0)) series = Series(range(1, 7), index=indices) # Use floats instead of ints as values series = series.map(lambda x: float(x)) # Sort chronologically series = series.sort_index() expected = Series([1.0, 2.0, 6.0, 4.0, 5.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) x = series.resample('D').max().rolling(window=1).max() tm.assert_series_equal(expected, x)
Example #14
Source File: test_series.py From recruit with Apache License 2.0 | 6 votes |
def test_value_counts_dup(self): vals = [1, 2, nan, 0, nan, 1, 2, nan, nan, 1, 2, 0, 1, 1] # numeric op may cause sp_values to include the same value as # fill_value dense = pd.Series(vals, name='xx') / 0. sparse = pd.SparseSeries(vals, name='xx') / 0. tm.assert_series_equal(sparse.value_counts(), dense.value_counts()) tm.assert_series_equal(sparse.value_counts(dropna=False), dense.value_counts(dropna=False)) vals = [1, 2, 0, 0, 0, 1, 2, 0, 0, 1, 2, 0, 1, 1] dense = pd.Series(vals, name='xx') * 0. sparse = pd.SparseSeries(vals, name='xx') * 0. tm.assert_series_equal(sparse.value_counts(), dense.value_counts()) tm.assert_series_equal(sparse.value_counts(dropna=False), dense.value_counts(dropna=False))
Example #15
Source File: test_decimal.py From recruit with Apache License 2.0 | 6 votes |
def assert_frame_equal(self, left, right, *args, **kwargs): # TODO(EA): select_dtypes tm.assert_index_equal( left.columns, right.columns, exact=kwargs.get('check_column_type', 'equiv'), check_names=kwargs.get('check_names', True), check_exact=kwargs.get('check_exact', False), check_categorical=kwargs.get('check_categorical', True), obj='{obj}.columns'.format(obj=kwargs.get('obj', 'DataFrame'))) decimals = (left.dtypes == 'decimal').index for col in decimals: self.assert_series_equal(left[col], right[col], *args, **kwargs) left = left.drop(columns=decimals) right = right.drop(columns=decimals) tm.assert_frame_equal(left, right, *args, **kwargs)
Example #16
Source File: test_decimal.py From recruit with Apache License 2.0 | 6 votes |
def assert_series_equal(self, left, right, *args, **kwargs): def convert(x): # need to convert array([Decimal(NaN)], dtype='object') to np.NaN # because Series[object].isnan doesn't recognize decimal(NaN) as # NA. try: return math.isnan(x) except TypeError: return False if left.dtype == 'object': left_na = left.apply(convert) else: left_na = left.isna() if right.dtype == 'object': right_na = right.apply(convert) else: right_na = right.isna() tm.assert_series_equal(left_na, right_na) return tm.assert_series_equal(left[~left_na], right[~right_na], *args, **kwargs)
Example #17
Source File: test_json.py From recruit with Apache License 2.0 | 6 votes |
def test_custom_asserts(self): # This would always trigger the KeyError from trying to put # an array of equal-length UserDicts inside an ndarray. data = JSONArray([collections.UserDict({'a': 1}), collections.UserDict({'b': 2}), collections.UserDict({'c': 3})]) a = pd.Series(data) self.assert_series_equal(a, a) self.assert_frame_equal(a.to_frame(), a.to_frame()) b = pd.Series(data.take([0, 0, 1])) with pytest.raises(AssertionError): self.assert_series_equal(a, b) with pytest.raises(AssertionError): self.assert_frame_equal(a.to_frame(), b.to_frame())
Example #18
Source File: test_series.py From recruit with Apache License 2.0 | 6 votes |
def test_value_counts_int(self): vals = [1, 2, 0, 1, 2, 1, 2, 0, 1, 1] dense = pd.Series(vals, name='xx') # fill_value is np.nan, but should not be included in the result sparse = pd.SparseSeries(vals, name='xx') tm.assert_series_equal(sparse.value_counts(), dense.value_counts()) tm.assert_series_equal(sparse.value_counts(dropna=False), dense.value_counts(dropna=False)) sparse = pd.SparseSeries(vals, name='xx', fill_value=0) tm.assert_series_equal(sparse.value_counts(), dense.value_counts()) tm.assert_series_equal(sparse.value_counts(dropna=False), dense.value_counts(dropna=False))
Example #19
Source File: test_sparse.py From recruit with Apache License 2.0 | 6 votes |
def _compare_other(self, s, data, op_name, other): op = self.get_op_from_name(op_name) # array result = pd.Series(op(data, other)) # hard to test the fill value, since we don't know what expected # is in general. # Rely on tests in `tests/sparse` to validate that. assert isinstance(result.dtype, SparseDtype) assert result.dtype.subtype == np.dtype('bool') with np.errstate(all='ignore'): expected = pd.Series( pd.SparseArray(op(np.asarray(data), np.asarray(other)), fill_value=result.values.fill_value) ) tm.assert_series_equal(result, expected) # series s = pd.Series(data) result = op(s, other) tm.assert_series_equal(result, expected)
Example #20
Source File: test_sparse.py From recruit with Apache License 2.0 | 6 votes |
def test_where_series(self, data, na_value): assert data[0] != data[1] cls = type(data) a, b = data[:2] ser = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype)) cond = np.array([True, True, False, False]) result = ser.where(cond) new_dtype = SparseDtype('float', 0.0) expected = pd.Series(cls._from_sequence([a, a, na_value, na_value], dtype=new_dtype)) self.assert_series_equal(result, expected) other = cls._from_sequence([a, b, a, b], dtype=data.dtype) cond = np.array([True, False, True, True]) result = ser.where(cond, other) expected = pd.Series(cls._from_sequence([a, b, b, b], dtype=data.dtype)) self.assert_series_equal(result, expected)
Example #21
Source File: test_sparse.py From recruit with Apache License 2.0 | 6 votes |
def test_combine_le(self, data_repeated): # We return a Series[SparseArray].__le__ returns a # Series[Sparse[bool]] # rather than Series[bool] orig_data1, orig_data2 = data_repeated(2) s1 = pd.Series(orig_data1) s2 = pd.Series(orig_data2) result = s1.combine(s2, lambda x1, x2: x1 <= x2) expected = pd.Series(pd.SparseArray([ a <= b for (a, b) in zip(list(orig_data1), list(orig_data2)) ], fill_value=False)) self.assert_series_equal(result, expected) val = s1.iloc[0] result = s1.combine(val, lambda x1, x2: x1 <= x2) expected = pd.Series(pd.SparseArray([ a <= val for a in list(orig_data1) ], fill_value=False)) self.assert_series_equal(result, expected)
Example #22
Source File: test_sparse.py From recruit with Apache License 2.0 | 6 votes |
def test_isna(self, data_missing): expected_dtype = SparseDtype(bool, pd.isna(data_missing.dtype.fill_value)) expected = SparseArray([True, False], dtype=expected_dtype) result = pd.isna(data_missing) self.assert_equal(result, expected) result = pd.Series(data_missing).isna() expected = pd.Series(expected) self.assert_series_equal(result, expected) # GH 21189 result = pd.Series(data_missing).drop([0, 1]).isna() expected = pd.Series([], dtype=expected_dtype) self.assert_series_equal(result, expected)
Example #23
Source File: test_series.py From recruit with Apache License 2.0 | 6 votes |
def test_isna(self): # GH 8276 s = pd.SparseSeries([np.nan, np.nan, 1, 2, np.nan], name='xxx') res = s.isna() exp = pd.SparseSeries([True, True, False, False, True], name='xxx', fill_value=True) tm.assert_sp_series_equal(res, exp) # if fill_value is not nan, True can be included in sp_values s = pd.SparseSeries([np.nan, 0., 1., 2., 0.], name='xxx', fill_value=0.) res = s.isna() assert isinstance(res, pd.SparseSeries) exp = pd.Series([True, False, False, False, False], name='xxx') tm.assert_series_equal(res.to_dense(), exp)
Example #24
Source File: test_window.py From recruit with Apache License 2.0 | 6 votes |
def test_rolling_kurt_edge_cases(self): all_nan = Series([np.NaN] * 5) # yields all NaN (0 variance) d = Series([1] * 5) x = d.rolling(window=5).kurt() tm.assert_series_equal(all_nan, x) # yields all NaN (window too small) d = Series(np.random.randn(5)) x = d.rolling(window=3).kurt() tm.assert_series_equal(all_nan, x) # yields [NaN, NaN, NaN, 1.224307, 2.671499] d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401 ]) expected = Series([np.NaN, np.NaN, np.NaN, 1.224307, 2.671499]) x = d.rolling(window=4).kurt() tm.assert_series_equal(expected, x)
Example #25
Source File: test_window.py From recruit with Apache License 2.0 | 6 votes |
def test_rolling_functions_window_non_shrinkage(self, f): # GH 7764 s = Series(range(4)) s_expected = Series(np.nan, index=s.index) df = DataFrame([[1, 5], [3, 2], [3, 9], [-1, 0]], columns=['A', 'B']) df_expected = DataFrame(np.nan, index=df.index, columns=df.columns) try: s_result = f(s) tm.assert_series_equal(s_result, s_expected) df_result = f(df) tm.assert_frame_equal(df_result, df_expected) except (ImportError): # scipy needed for rolling_window pytest.skip("scipy not available")
Example #26
Source File: test_series.py From recruit with Apache License 2.0 | 6 votes |
def test_numpy_cumsum(self): result = np.cumsum(self.bseries) expected = SparseSeries(self.bseries.to_dense().cumsum()) tm.assert_sp_series_equal(result, expected) result = np.cumsum(self.zbseries) expected = self.zbseries.to_dense().cumsum().to_sparse() tm.assert_series_equal(result, expected) msg = "the 'dtype' parameter is not supported" with pytest.raises(ValueError, match=msg): np.cumsum(self.bseries, dtype=np.int64) msg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=msg): np.cumsum(self.zbseries, out=result)
Example #27
Source File: test_window.py From recruit with Apache License 2.0 | 5 votes |
def test_rolling_max_resample(self): indices = [datetime(1975, 1, i) for i in range(1, 6)] # So that we can have 3 datapoints on last day (4, 10, and 20) indices.append(datetime(1975, 1, 5, 1)) indices.append(datetime(1975, 1, 5, 2)) series = Series(list(range(0, 5)) + [10, 20], index=indices) # Use floats instead of ints as values series = series.map(lambda x: float(x)) # Sort chronologically series = series.sort_index() # Default how should be max expected = Series([0.0, 1.0, 2.0, 3.0, 20.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) x = series.resample('D').max().rolling(window=1).max() tm.assert_series_equal(expected, x) # Now specify median (10.0) expected = Series([0.0, 1.0, 2.0, 3.0, 10.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) x = series.resample('D').median().rolling(window=1).max() tm.assert_series_equal(expected, x) # Now specify mean (4+10+20)/3 v = (4.0 + 10.0 + 20.0) / 3.0 expected = Series([0.0, 1.0, 2.0, 3.0, v], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) x = series.resample('D').mean().rolling(window=1).max() tm.assert_series_equal(expected, x)
Example #28
Source File: test_window.py From recruit with Apache License 2.0 | 5 votes |
def test_frame_on2(self): # using multiple aggregation columns df = DataFrame({'A': [0, 1, 2, 3, 4], 'B': [0, 1, 2, np.nan, 4], 'C': Index([Timestamp('20130101 09:00:00'), Timestamp('20130101 09:00:02'), Timestamp('20130101 09:00:03'), Timestamp('20130101 09:00:05'), Timestamp('20130101 09:00:06')])}, columns=['A', 'C', 'B']) expected1 = DataFrame({'A': [0., 1, 3, 3, 7], 'B': [0, 1, 3, np.nan, 4], 'C': df['C']}, columns=['A', 'C', 'B']) result = df.rolling('2s', on='C').sum() expected = expected1 tm.assert_frame_equal(result, expected) expected = Series([0, 1, 3, np.nan, 4], name='B') result = df.rolling('2s', on='C').B.sum() tm.assert_series_equal(result, expected) expected = expected1[['A', 'B', 'C']] result = df.rolling('2s', on='C')[['A', 'B', 'C']].sum() tm.assert_frame_equal(result, expected)
Example #29
Source File: test_series.py From recruit with Apache License 2.0 | 5 votes |
def test_astype_all(self): orig = pd.Series(np.array([1, 2, 3])) s = SparseSeries(orig) types = [np.float64, np.float32, np.int64, np.int32, np.int16, np.int8] for typ in types: dtype = SparseDtype(typ) res = s.astype(dtype) assert res.dtype == dtype tm.assert_series_equal(res.to_dense(), orig.astype(typ))
Example #30
Source File: test_series.py From recruit with Apache License 2.0 | 5 votes |
def test_reindex(self): def _compare_with_series(sps, new_index): spsre = sps.reindex(new_index) series = sps.to_dense() seriesre = series.reindex(new_index) seriesre = seriesre.to_sparse(fill_value=sps.fill_value) tm.assert_sp_series_equal(spsre, seriesre) tm.assert_series_equal(spsre.to_dense(), seriesre.to_dense()) _compare_with_series(self.bseries, self.bseries.index[::2]) _compare_with_series(self.bseries, list(self.bseries.index[::2])) _compare_with_series(self.bseries, self.bseries.index[:10]) _compare_with_series(self.bseries, self.bseries.index[5:]) _compare_with_series(self.zbseries, self.zbseries.index[::2]) _compare_with_series(self.zbseries, self.zbseries.index[:10]) _compare_with_series(self.zbseries, self.zbseries.index[5:]) # special cases same_index = self.bseries.reindex(self.bseries.index) tm.assert_sp_series_equal(self.bseries, same_index) assert same_index is not self.bseries # corner cases sp = SparseSeries([], index=[]) # TODO: sp_zero is not used anywhere...remove? sp_zero = SparseSeries([], index=[], fill_value=0) # noqa _compare_with_series(sp, np.arange(10)) # with copy=False reindexed = self.bseries.reindex(self.bseries.index, copy=True) reindexed.sp_values[:] = 1. assert (self.bseries.sp_values != 1.).all() reindexed = self.bseries.reindex(self.bseries.index, copy=False) reindexed.sp_values[:] = 1. tm.assert_numpy_array_equal(self.bseries.sp_values, np.repeat(1., 10))