Python pandas.testing.assert_series_equal() Examples
The following are 30
code examples of pandas.testing.assert_series_equal().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas.testing
, or try the search function
.
Example #1
Source File: test_base.py From fletcher with MIT License | 6 votes |
def test_pandas_from_arrow(): arr = pa.array(["a", "b", "c"], pa.string()) expected_series_woutname = pd.Series(fr.FletcherChunkedArray(arr)) pdt.assert_series_equal(expected_series_woutname, fr.pandas_from_arrow(arr)) expected_series_woutname = pd.Series(fr.FletcherContinuousArray(arr)) pdt.assert_series_equal( expected_series_woutname, fr.pandas_from_arrow(arr, continuous=True) ) rb = pa.RecordBatch.from_arrays([arr], ["column"]) expected_df = pd.DataFrame({"column": fr.FletcherChunkedArray(arr)}) table = pa.Table.from_arrays([arr], ["column"]) pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(rb)) pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(table)) expected_df = pd.DataFrame({"column": fr.FletcherContinuousArray(arr)}) table = pa.Table.from_arrays([arr], ["column"]) pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(rb, continuous=True)) pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(table, continuous=True))
Example #2
Source File: test_indicator_trend.py From pandas-ta with MIT License | 6 votes |
def test_psar(self): result = pandas_ta.psar(self.high, self.low) self.assertIsInstance(result, DataFrame) self.assertEqual(result.name, 'PSAR_0.02_0.2') # Combine Long and Short SAR's into one SAR value psar = result[result.columns[:2]].fillna(0) psar = psar[psar.columns[0]] + psar[psar.columns[1]] psar.name = result.name try: expected = tal.SAR(self.high, self.low) pdt.assert_series_equal(psar, expected) except AssertionError as ae: try: psar_corr = pandas_ta.utils.df_error_analysis(psar, expected, col=CORRELATION) self.assertGreater(psar_corr, CORRELATION_THRESHOLD) except Exception as ex: error_analysis(psar, CORRELATION, ex)
Example #3
Source File: test_preprocessing.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_clean_parameters(self): values = pd.Series([ u'Mary-ann', u'Bob :)', u'Angel', u'Bob (alias Billy)', u'Mary ann', u'John', np.nan ]) expected = pd.Series([ u'mary ann', u'bob', u'angel', u'bob', u'mary ann', u'john', np.nan ]) clean_series = clean( values, lowercase=True, replace_by_none=r'[^ \-\_A-Za-z0-9]+', replace_by_whitespace=r'[\-\_]', remove_brackets=True) # Check if series are identical. pdt.assert_series_equal(clean_series, expected)
Example #4
Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_numeric(self): A = DataFrame({'col': [1, 1, 1, nan, 0]}) B = DataFrame({'col': [1, 2, 3, nan, nan]}) ix = MultiIndex.from_arrays([A.index.values, B.index.values]) comp = recordlinkage.Compare() comp.numeric('col', 'col', 'step', offset=2) comp.numeric('col', 'col', method='step', offset=2) comp.numeric('col', 'col', 'step', 2) result = comp.compute(ix, A, B) # Basics expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=0) pdt.assert_series_equal(result[0], expected) # Basics expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=1) pdt.assert_series_equal(result[1], expected) # Basics expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=2) pdt.assert_series_equal(result[2], expected)
Example #5
Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_dates(self): A = DataFrame({ 'col': to_datetime( ['2005/11/23', nan, '2004/11/23', '2010/01/10', '2010/10/30']) }) B = DataFrame({ 'col': to_datetime([ '2005/11/23', '2010/12/31', '2005/11/23', '2010/10/01', '2010/9/30' ]) }) ix = MultiIndex.from_arrays([A.index.values, B.index.values]) comp = recordlinkage.Compare() comp.date('col', 'col') result = comp.compute(ix, A, B)[0] expected = Series([1, 0, 0, 0.5, 0.5], index=ix, name=0) pdt.assert_series_equal(result, expected)
Example #6
Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_geo(self): # Utrecht, Amsterdam, Rotterdam (Cities in The Netherlands) A = DataFrame({ 'lat': [52.0842455, 52.3747388, 51.9280573], 'lng': [5.0124516, 4.7585305, 4.4203581] }) B = DataFrame({ 'lat': [52.3747388, 51.9280573, 52.0842455], 'lng': [4.7585305, 4.4203581, 5.0124516] }) ix = MultiIndex.from_arrays([A.index.values, B.index.values]) comp = recordlinkage.Compare() comp.geo( 'lat', 'lng', 'lat', 'lng', method='step', offset=50) # 50 km range result = comp.compute(ix, A, B) # Missing values as default [36.639460, 54.765854, 44.092472] expected = Series([1.0, 0.0, 1.0], index=ix, name=0) pdt.assert_series_equal(result[0], expected)
Example #7
Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_defaults(self): # default algorithm is levenshtein algorithm # test default values are indentical to levenshtein A = DataFrame({ 'col': [u'str_abc', u'str_abc', u'str_abc', nan, u'hsdkf'] }) B = DataFrame({'col': [u'str_abc', u'str_abd', u'jaskdfsd', nan, nan]}) ix = MultiIndex.from_arrays([A.index.values, B.index.values]) comp = recordlinkage.Compare() comp.string('col', 'col', label='default') comp.string('col', 'col', method='levenshtein', label='with_args') result = comp.compute(ix, A, B) pdt.assert_series_equal( result['default'].rename(None), result['with_args'].rename(None) )
Example #8
Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_variable_nan(self, missing_value): # data arrayA = np.random.random((100,)) arrayA[90:] = np.nan arrayB = np.random.random((100,)) # convert to pandas data A = DataFrame({'col': arrayA}) B = DataFrame({'col': arrayB}) ix = MultiIndex.from_arrays([A.index.values, B.index.values]) # the part to test from recordlinkage.compare import Variable comp = recordlinkage.Compare() comp.add(Variable(left_on='col', missing_value=missing_value)) features = comp.compute(ix, A, B) result = features[0].rename(None) expected = Series(arrayA, index=ix) expected.iloc[90:] = missing_value pdt.assert_series_equal(result, expected)
Example #9
Source File: test_comparison_plot_data_preparation.py From estimagic with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_consolidate_parameter_attribute_custom_wildcards(): tuples = [("a", 0), ("a", 1), ("b", 1), ("b", 2)] ind = pd.MultiIndex.from_tuples(tuples, names=["ind1", "ind2"]) df = pd.DataFrame(index=ind[:3]) df["attr"] = ["g1", None, "g3"] df["other"] = [1, 2, 3] df2 = pd.DataFrame(index=ind) df2["attr"] = ["g1", "g2", "g3", 0] df2["other2"] = [11, 22, 33, 44] info = {} compatible_input = [OPT_RES(df, info), OPT_RES(df2, info)] attribute = "attr" res = test_module._consolidate_parameter_attribute( results=compatible_input, attribute=attribute, wildcards=[0, None] ) expected = pd.Series(["g1", "g2", "g3", np.nan], index=ind, name="attr") pdt.assert_series_equal(res, expected)
Example #10
Source File: test_comparison_plot_data_preparation.py From estimagic with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_calculate_x_bounds_without_nan(): params_data = pd.DataFrame() params_data["group"] = ["a", "a", "a"] + ["b", "b", "b"] params_data["value"] = [0, 1, 2] + [3, 4, 5] params_data["conf_int_lower"] = [-1, 0, -2] + [2, -5, 4] params_data["conf_int_upper"] = [1, 2, 3] + [3, 5, 10] padding = 0.0 res_x_min, res_x_max = test_module._calculate_x_bounds(params_data, padding) ind = pd.Index(["a", "b"], name="group") expected_x_min = pd.Series([-2.0, -5.0], index=ind, name="x_min") expected_x_max = pd.Series([3.0, 10.0], index=ind, name="x_max") pdt.assert_series_equal(expected_x_min, res_x_min) pdt.assert_series_equal(expected_x_max, res_x_max)
Example #11
Source File: test_comparison_plot_data_preparation.py From estimagic with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_calculate_x_bounds_with_nan(): params_data = pd.DataFrame() params_data["group"] = ["a", "a", "a"] + ["b", "b", "b"] params_data["value"] = [0, 1, np.nan] + [3, np.nan, 5] params_data["conf_int_lower"] = np.nan params_data["conf_int_upper"] = np.nan padding = 0.0 res_x_min, res_x_max = test_module._calculate_x_bounds(params_data, padding) ind = pd.Index(["a", "b"], name="group") expected_x_min = pd.Series([0.0, 3.0], index=ind, name="x_min") expected_x_max = pd.Series([1.0, 5.0], index=ind, name="x_max") pdt.assert_series_equal(expected_x_min, res_x_min) pdt.assert_series_equal(expected_x_max, res_x_max)
Example #12
Source File: test_comparison_plot_data_preparation.py From estimagic with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_calculate_x_bounds_with_padding(): params_data = pd.DataFrame() params_data["group"] = ["a", "a", "a"] + ["b", "b", "b"] params_data["value"] = [0, 1, np.nan] + [3, np.nan, 5] params_data["conf_int_lower"] = np.nan params_data["conf_int_upper"] = np.nan padding = 0.1 res_x_min, res_x_max = test_module._calculate_x_bounds(params_data, padding) ind = pd.Index(["a", "b"], name="group") expected_x_min = pd.Series([-0.1, 2.8], index=ind, name="x_min") expected_x_max = pd.Series([1.1, 5.2], index=ind, name="x_max") pdt.assert_series_equal(expected_x_min, res_x_min) pdt.assert_series_equal(expected_x_max, res_x_max) # replace_by_midpoint # ====================
Example #13
Source File: test_comparison_plot_data_preparation.py From estimagic with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_consolidate_parameter_attribute_standard_wildcards(): tuples = [("a", 0), ("a", 1), ("b", 1), ("b", 2)] ind = pd.MultiIndex.from_tuples(tuples, names=["ind1", "ind2"]) df = pd.DataFrame(index=ind[:3]) df["attr"] = ["g1", "g2", "g3"] df["other"] = [1, 2, 3] df2 = pd.DataFrame(index=ind) df2["attr"] = ["g1", "g2", "g3", "g2"] df2["other2"] = [11, 22, 33, 44] info = {} compatible_input = [OPT_RES(df, info), OPT_RES(df2, info)] attribute = "attr" res = test_module._consolidate_parameter_attribute( results=compatible_input, attribute=attribute ) expected = pd.Series(["g1", "g2", "g3", "g2"], index=ind, name="attr") pdt.assert_series_equal(res, expected)
Example #14
Source File: test_protocols.py From bionic with Apache License 2.0 | 6 votes |
def test_dataframe_with_categoricals_ignored(builder): df_value = pd.DataFrame() df_value["cat"] = pd.Categorical( ["red", "blue", "red"], categories=["blue", "red"], ordered=True ) @builder @bn.protocol.frame(check_dtypes=False) def df(): return df_value pdt.assert_series_equal( # Whether or not the deserialized column has the Categorical Dtype can # depend on the version of pyarrow being used, so we'll just convert # both columns to the same type here. builder.build().get("df")["cat"].astype(object), df_value["cat"].astype(object), )
Example #15
Source File: test_text.py From fletcher with MIT License | 6 votes |
def test_text_zfill(data, fletcher_variant): if any("\x00" in x for x in data if x): # pytest.skip("pandas cannot handle \\x00 characters in tests") # Skip is not working properly with hypothesis return ser_pd = pd.Series(data, dtype=str) max_str_len = ser_pd.map(_optional_len).max() if pd.isna(max_str_len): max_str_len = 0 arrow_data = pa.array(data, type=pa.string()) if fletcher_variant == "chunked": fr_array = fr.FletcherChunkedArray(arrow_data) else: fr_array = fr.FletcherContinuousArray(arrow_data) ser_fr = pd.Series(fr_array) result_pd = ser_pd.str.zfill(max_str_len + 1) result_fr = ser_fr.fr_text.zfill(max_str_len + 1) result_fr = result_fr.astype(object) # Pandas returns np.nan for NA values in cat, keep this in line result_fr[result_fr.isna()] = np.nan tm.assert_series_equal(result_fr, result_pd)
Example #16
Source File: test_preprocessing.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_clean(self): values = pd.Series([ 'Mary-ann', 'Bob :)', 'Angel', 'Bob (alias Billy)', 'Mary ann', 'John', np.nan ]) expected = pd.Series( ['mary ann', 'bob', 'angel', 'bob', 'mary ann', 'john', np.nan]) clean_series = clean(values) # Check if series are identical. pdt.assert_series_equal(clean_series, expected) clean_series_nothing = clean( values, lowercase=False, replace_by_none=False, replace_by_whitespace=False, strip_accents=False, remove_brackets=False) # Check if ntohing happend. pdt.assert_series_equal(clean_series_nothing, values)
Example #17
Source File: test_indicator_volume.py From pandas-ta with MIT License | 5 votes |
def test_mfi(self): result = pandas_ta.mfi(self.high, self.low, self.close, self.volume_) self.assertIsInstance(result, Series) self.assertEqual(result.name, 'MFI_14') try: expected = tal.MFI(self.high, self.low, self.close, self.volume_) pdt.assert_series_equal(result, expected, check_names=False) except AssertionError as ae: try: corr = pandas_ta.utils.df_error_analysis(result, expected, col=CORRELATION) self.assertGreater(corr, CORRELATION_THRESHOLD) except Exception as ex: error_analysis(result, CORRELATION, ex)
Example #18
Source File: test_indicator_momentum.py From pandas-ta with MIT License | 5 votes |
def test_uo(self): result = pandas_ta.uo(self.high, self.low, self.close) self.assertIsInstance(result, Series) self.assertEqual(result.name, 'UO_7_14_28') try: expected = tal.ULTOSC(self.high, self.low, self.close) pdt.assert_series_equal(result, expected, check_names=False) except AssertionError as ae: try: corr = pandas_ta.utils.df_error_analysis(result, expected, col=CORRELATION) self.assertGreater(corr, CORRELATION_THRESHOLD) except Exception as ex: error_analysis(result, CORRELATION, ex)
Example #19
Source File: test_indicator_momentum.py From pandas-ta with MIT License | 5 votes |
def test_willr(self): result = pandas_ta.willr(self.high, self.low, self.close) self.assertIsInstance(result, Series) self.assertEqual(result.name, 'WILLR_14') try: expected = tal.WILLR(self.high, self.low, self.close) pdt.assert_series_equal(result, expected, check_names=False) except AssertionError as ae: try: corr = pandas_ta.utils.df_error_analysis(result, expected, col=CORRELATION) self.assertGreater(corr, CORRELATION_THRESHOLD) except Exception as ex: error_analysis(result, CORRELATION, ex)
Example #20
Source File: test_pandas_integration.py From fletcher with MIT License | 5 votes |
def test_fillna_chunked(test_array_chunked): ser = pd.Series(fr.FletcherChunkedArray(test_array_chunked)) ser = ser.fillna("filled") expected_list = TEST_LIST[:2] + ["filled"] chunks = [] for _ in range(10): chunks.append(pa.array(expected_list)) chunked_exp = pa.chunked_array(chunks) expected = pd.Series(fr.FletcherChunkedArray(chunked_exp)) tm.assert_series_equal(ser, expected)
Example #21
Source File: test_pandas_integration.py From fletcher with MIT License | 5 votes |
def test_argsort(array_chunked_nulls, kind): s = pd.Series(fr.FletcherChunkedArray(TEST_ARRAY)) result = s.argsort(kind=kind) expected = s.astype(object).argsort(kind=kind) tm.assert_series_equal(result, expected)
Example #22
Source File: test_pandas_integration.py From fletcher with MIT License | 5 votes |
def test_astype_object(): s = pd.Series(fr.FletcherChunkedArray(TEST_ARRAY)) expected = pd.Series(TEST_LIST) tm.assert_series_equal(s.astype(object), expected)
Example #23
Source File: test_text.py From fletcher with MIT License | 5 votes |
def test_text_cat(data, fletcher_variant, fletcher_variant_2): if any("\x00" in x for x in data if x): # pytest.skip("pandas cannot handle \\x00 characters in tests") # Skip is not working properly with hypothesis return ser_pd = pd.Series(data, dtype=str) ser_fr = _fr_series_from_data(data, fletcher_variant) ser_fr_other = _fr_series_from_data(data, fletcher_variant_2) result_pd = ser_pd.str.cat(ser_pd) result_fr = ser_fr.fr_text.cat(ser_fr_other) result_fr = result_fr.astype(object) # Pandas returns np.nan for NA values in cat, keep this in line result_fr[result_fr.isna()] = np.nan tm.assert_series_equal(result_fr, result_pd)
Example #24
Source File: test_datetime_features.py From timeserio with MIT License | 5 votes |
def test_get_fractional_day_from_series(): series = pd.Series(pd.date_range(start='2000-01-01', freq='6H', periods=5)) fractional_day = get_fractional_day_from_series(series) expected = pd.Series([0, 0.25, 0.5, 0.75, 0]) pdt.assert_series_equal(fractional_day, expected)
Example #25
Source File: test_datetime_features.py From timeserio with MIT License | 5 votes |
def test_get_fractional_year_from_series(): series = pd.Series( pd.date_range(start='2000-01-01', freq='31D', periods=5) ) fractional_year = get_fractional_year_from_series(series) expected = pd.Series([0, 1, 2, 3, 4]) * 31 / 365. pdt.assert_series_equal(fractional_year, expected)
Example #26
Source File: test_datetime_features.py From timeserio with MIT License | 5 votes |
def test_get_is_holiday_from_series(): series = pd.Series(pd.date_range(start='2000-01-01', freq='D', periods=5)) is_holiday = get_is_holiday_from_series(series) expected = pd.Series([1, 1, 1, 1, 0]) pdt.assert_series_equal(is_holiday, expected)
Example #27
Source File: test_datetime_features.py From timeserio with MIT License | 5 votes |
def test_get_is_holiday_from_series_with_country(country, expected): dates = ["2020-01-01", "2020-01-02", "2020-08-03", "2020-08-31"] series = pd.to_datetime(pd.Series(dates)) is_holiday = get_is_holiday_from_series(series, country=country) pdt.assert_series_equal(is_holiday, pd.Series(expected))
Example #28
Source File: test_datetime_features.py From timeserio with MIT License | 5 votes |
def test_truncate_series(series_data, truncation_period, expected_data): out = truncate_series(pd.Series(series_data), truncation_period) expected = pd.Series(expected_data) pdt.assert_series_equal(out, expected)
Example #29
Source File: test_indicator_volume.py From pandas-ta with MIT License | 5 votes |
def test_ad(self): result = pandas_ta.ad(self.high, self.low, self.close, self.volume_) self.assertIsInstance(result, Series) self.assertEqual(result.name, 'AD') try: expected = tal.AD(self.high, self.low, self.close, self.volume_) pdt.assert_series_equal(result, expected, check_names=False) except AssertionError as ae: try: corr = pandas_ta.utils.df_error_analysis(result, expected, col=CORRELATION) self.assertGreater(corr, CORRELATION_THRESHOLD) except Exception as ex: error_analysis(result, CORRELATION, ex)
Example #30
Source File: test_datetime_features.py From timeserio with MIT License | 5 votes |
def test_get_fractional_hour_from_series(): series = pd.Series( pd.date_range(start='2000-01-01', freq='0.5H', periods=48) ) fractionalhour = get_fractional_hour_from_series(series) expected = pd.Series(np.linspace(0, 23.5, 48)) pdt.assert_series_equal(fractionalhour, expected)