Python pandas.testing.assert_series_equal() Examples

The following are 30 code examples of pandas.testing.assert_series_equal(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.testing , or try the search function .
Example #1
Source File: test_base.py    From fletcher with MIT License 6 votes vote down vote up
def test_pandas_from_arrow():
    arr = pa.array(["a", "b", "c"], pa.string())

    expected_series_woutname = pd.Series(fr.FletcherChunkedArray(arr))
    pdt.assert_series_equal(expected_series_woutname, fr.pandas_from_arrow(arr))

    expected_series_woutname = pd.Series(fr.FletcherContinuousArray(arr))
    pdt.assert_series_equal(
        expected_series_woutname, fr.pandas_from_arrow(arr, continuous=True)
    )

    rb = pa.RecordBatch.from_arrays([arr], ["column"])
    expected_df = pd.DataFrame({"column": fr.FletcherChunkedArray(arr)})
    table = pa.Table.from_arrays([arr], ["column"])
    pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(rb))
    pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(table))

    expected_df = pd.DataFrame({"column": fr.FletcherContinuousArray(arr)})
    table = pa.Table.from_arrays([arr], ["column"])
    pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(rb, continuous=True))
    pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(table, continuous=True)) 
Example #2
Source File: test_indicator_trend.py    From pandas-ta with MIT License 6 votes vote down vote up
def test_psar(self):
        result = pandas_ta.psar(self.high, self.low)
        self.assertIsInstance(result, DataFrame)
        self.assertEqual(result.name, 'PSAR_0.02_0.2')

        # Combine Long and Short SAR's into one SAR value
        psar = result[result.columns[:2]].fillna(0)
        psar = psar[psar.columns[0]] + psar[psar.columns[1]]
        psar.name = result.name

        try:
            expected = tal.SAR(self.high, self.low)
            pdt.assert_series_equal(psar, expected)
        except AssertionError as ae:
            try:
                psar_corr = pandas_ta.utils.df_error_analysis(psar, expected, col=CORRELATION)
                self.assertGreater(psar_corr, CORRELATION_THRESHOLD)
            except Exception as ex:
                error_analysis(psar, CORRELATION, ex) 
Example #3
Source File: test_preprocessing.py    From recordlinkage with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_clean_parameters(self):

        values = pd.Series([
            u'Mary-ann', u'Bob :)', u'Angel', u'Bob (alias Billy)',
            u'Mary  ann', u'John', np.nan
        ])

        expected = pd.Series([
            u'mary ann', u'bob', u'angel', u'bob', u'mary ann', u'john', np.nan
        ])

        clean_series = clean(
            values,
            lowercase=True,
            replace_by_none=r'[^ \-\_A-Za-z0-9]+',
            replace_by_whitespace=r'[\-\_]',
            remove_brackets=True)

        # Check if series are identical.
        pdt.assert_series_equal(clean_series, expected) 
Example #4
Source File: test_compare.py    From recordlinkage with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_numeric(self):

        A = DataFrame({'col': [1, 1, 1, nan, 0]})
        B = DataFrame({'col': [1, 2, 3, nan, nan]})
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.numeric('col', 'col', 'step', offset=2)
        comp.numeric('col', 'col', method='step', offset=2)
        comp.numeric('col', 'col', 'step', 2)
        result = comp.compute(ix, A, B)

        # Basics
        expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=0)
        pdt.assert_series_equal(result[0], expected)

        # Basics
        expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=1)
        pdt.assert_series_equal(result[1], expected)

        # Basics
        expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=2)
        pdt.assert_series_equal(result[2], expected) 
Example #5
Source File: test_compare.py    From recordlinkage with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_dates(self):

        A = DataFrame({
            'col':
            to_datetime(
                ['2005/11/23', nan, '2004/11/23', '2010/01/10', '2010/10/30'])
        })
        B = DataFrame({
            'col':
            to_datetime([
                '2005/11/23', '2010/12/31', '2005/11/23', '2010/10/01',
                '2010/9/30'
            ])
        })
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.date('col', 'col')
        result = comp.compute(ix, A, B)[0]

        expected = Series([1, 0, 0, 0.5, 0.5], index=ix, name=0)

        pdt.assert_series_equal(result, expected) 
Example #6
Source File: test_compare.py    From recordlinkage with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_geo(self):

        # Utrecht, Amsterdam, Rotterdam (Cities in The Netherlands)
        A = DataFrame({
            'lat': [52.0842455, 52.3747388, 51.9280573],
            'lng': [5.0124516, 4.7585305, 4.4203581]
        })
        B = DataFrame({
            'lat': [52.3747388, 51.9280573, 52.0842455],
            'lng': [4.7585305, 4.4203581, 5.0124516]
        })
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.geo(
            'lat', 'lng', 'lat', 'lng', method='step',
            offset=50)  # 50 km range
        result = comp.compute(ix, A, B)

        # Missing values as default [36.639460, 54.765854, 44.092472]
        expected = Series([1.0, 0.0, 1.0], index=ix, name=0)
        pdt.assert_series_equal(result[0], expected) 
Example #7
Source File: test_compare.py    From recordlinkage with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_defaults(self):

        # default algorithm is levenshtein algorithm
        # test default values are indentical to levenshtein

        A = DataFrame({
            'col': [u'str_abc', u'str_abc', u'str_abc', nan, u'hsdkf']
        })
        B = DataFrame({'col': [u'str_abc', u'str_abd', u'jaskdfsd', nan, nan]})
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.string('col', 'col', label='default')
        comp.string('col', 'col', method='levenshtein', label='with_args')
        result = comp.compute(ix, A, B)

        pdt.assert_series_equal(
            result['default'].rename(None),
            result['with_args'].rename(None)
        ) 
Example #8
Source File: test_compare.py    From recordlinkage with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_variable_nan(self, missing_value):

        # data
        arrayA = np.random.random((100,))
        arrayA[90:] = np.nan
        arrayB = np.random.random((100,))

        # convert to pandas data
        A = DataFrame({'col': arrayA})
        B = DataFrame({'col': arrayB})
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        # the part to test
        from recordlinkage.compare import Variable

        comp = recordlinkage.Compare()
        comp.add(Variable(left_on='col', missing_value=missing_value))
        features = comp.compute(ix, A, B)
        result = features[0].rename(None)

        expected = Series(arrayA, index=ix)
        expected.iloc[90:] = missing_value
        pdt.assert_series_equal(result, expected) 
Example #9
Source File: test_comparison_plot_data_preparation.py    From estimagic with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_consolidate_parameter_attribute_custom_wildcards():
    tuples = [("a", 0), ("a", 1), ("b", 1), ("b", 2)]
    ind = pd.MultiIndex.from_tuples(tuples, names=["ind1", "ind2"])

    df = pd.DataFrame(index=ind[:3])
    df["attr"] = ["g1", None, "g3"]
    df["other"] = [1, 2, 3]

    df2 = pd.DataFrame(index=ind)
    df2["attr"] = ["g1", "g2", "g3", 0]
    df2["other2"] = [11, 22, 33, 44]

    info = {}
    compatible_input = [OPT_RES(df, info), OPT_RES(df2, info)]
    attribute = "attr"
    res = test_module._consolidate_parameter_attribute(
        results=compatible_input, attribute=attribute, wildcards=[0, None]
    )
    expected = pd.Series(["g1", "g2", "g3", np.nan], index=ind, name="attr")
    pdt.assert_series_equal(res, expected) 
Example #10
Source File: test_comparison_plot_data_preparation.py    From estimagic with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_calculate_x_bounds_without_nan():
    params_data = pd.DataFrame()
    params_data["group"] = ["a", "a", "a"] + ["b", "b", "b"]
    params_data["value"] = [0, 1, 2] + [3, 4, 5]
    params_data["conf_int_lower"] = [-1, 0, -2] + [2, -5, 4]
    params_data["conf_int_upper"] = [1, 2, 3] + [3, 5, 10]

    padding = 0.0
    res_x_min, res_x_max = test_module._calculate_x_bounds(params_data, padding)

    ind = pd.Index(["a", "b"], name="group")
    expected_x_min = pd.Series([-2.0, -5.0], index=ind, name="x_min")
    expected_x_max = pd.Series([3.0, 10.0], index=ind, name="x_max")

    pdt.assert_series_equal(expected_x_min, res_x_min)
    pdt.assert_series_equal(expected_x_max, res_x_max) 
Example #11
Source File: test_comparison_plot_data_preparation.py    From estimagic with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_calculate_x_bounds_with_nan():
    params_data = pd.DataFrame()
    params_data["group"] = ["a", "a", "a"] + ["b", "b", "b"]
    params_data["value"] = [0, 1, np.nan] + [3, np.nan, 5]
    params_data["conf_int_lower"] = np.nan
    params_data["conf_int_upper"] = np.nan

    padding = 0.0
    res_x_min, res_x_max = test_module._calculate_x_bounds(params_data, padding)

    ind = pd.Index(["a", "b"], name="group")
    expected_x_min = pd.Series([0.0, 3.0], index=ind, name="x_min")
    expected_x_max = pd.Series([1.0, 5.0], index=ind, name="x_max")

    pdt.assert_series_equal(expected_x_min, res_x_min)
    pdt.assert_series_equal(expected_x_max, res_x_max) 
Example #12
Source File: test_comparison_plot_data_preparation.py    From estimagic with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_calculate_x_bounds_with_padding():
    params_data = pd.DataFrame()
    params_data["group"] = ["a", "a", "a"] + ["b", "b", "b"]
    params_data["value"] = [0, 1, np.nan] + [3, np.nan, 5]
    params_data["conf_int_lower"] = np.nan
    params_data["conf_int_upper"] = np.nan

    padding = 0.1
    res_x_min, res_x_max = test_module._calculate_x_bounds(params_data, padding)

    ind = pd.Index(["a", "b"], name="group")
    expected_x_min = pd.Series([-0.1, 2.8], index=ind, name="x_min")
    expected_x_max = pd.Series([1.1, 5.2], index=ind, name="x_max")

    pdt.assert_series_equal(expected_x_min, res_x_min)
    pdt.assert_series_equal(expected_x_max, res_x_max)


# replace_by_midpoint
# ==================== 
Example #13
Source File: test_comparison_plot_data_preparation.py    From estimagic with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_consolidate_parameter_attribute_standard_wildcards():
    tuples = [("a", 0), ("a", 1), ("b", 1), ("b", 2)]
    ind = pd.MultiIndex.from_tuples(tuples, names=["ind1", "ind2"])

    df = pd.DataFrame(index=ind[:3])
    df["attr"] = ["g1", "g2", "g3"]
    df["other"] = [1, 2, 3]

    df2 = pd.DataFrame(index=ind)
    df2["attr"] = ["g1", "g2", "g3", "g2"]
    df2["other2"] = [11, 22, 33, 44]

    info = {}
    compatible_input = [OPT_RES(df, info), OPT_RES(df2, info)]
    attribute = "attr"
    res = test_module._consolidate_parameter_attribute(
        results=compatible_input, attribute=attribute
    )
    expected = pd.Series(["g1", "g2", "g3", "g2"], index=ind, name="attr")
    pdt.assert_series_equal(res, expected) 
Example #14
Source File: test_protocols.py    From bionic with Apache License 2.0 6 votes vote down vote up
def test_dataframe_with_categoricals_ignored(builder):
    df_value = pd.DataFrame()
    df_value["cat"] = pd.Categorical(
        ["red", "blue", "red"], categories=["blue", "red"], ordered=True
    )

    @builder
    @bn.protocol.frame(check_dtypes=False)
    def df():
        return df_value

    pdt.assert_series_equal(
        # Whether or not the deserialized column has the Categorical Dtype can
        # depend on the version of pyarrow being used, so we'll just convert
        # both columns to the same type here.
        builder.build().get("df")["cat"].astype(object),
        df_value["cat"].astype(object),
    ) 
Example #15
Source File: test_text.py    From fletcher with MIT License 6 votes vote down vote up
def test_text_zfill(data, fletcher_variant):
    if any("\x00" in x for x in data if x):
        # pytest.skip("pandas cannot handle \\x00 characters in tests")
        # Skip is not working properly with hypothesis
        return
    ser_pd = pd.Series(data, dtype=str)
    max_str_len = ser_pd.map(_optional_len).max()
    if pd.isna(max_str_len):
        max_str_len = 0
    arrow_data = pa.array(data, type=pa.string())
    if fletcher_variant == "chunked":
        fr_array = fr.FletcherChunkedArray(arrow_data)
    else:
        fr_array = fr.FletcherContinuousArray(arrow_data)
    ser_fr = pd.Series(fr_array)

    result_pd = ser_pd.str.zfill(max_str_len + 1)
    result_fr = ser_fr.fr_text.zfill(max_str_len + 1)
    result_fr = result_fr.astype(object)
    # Pandas returns np.nan for NA values in cat, keep this in line
    result_fr[result_fr.isna()] = np.nan
    tm.assert_series_equal(result_fr, result_pd) 
Example #16
Source File: test_preprocessing.py    From recordlinkage with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_clean(self):

        values = pd.Series([
            'Mary-ann', 'Bob :)', 'Angel', 'Bob (alias Billy)', 'Mary ann',
            'John', np.nan
        ])

        expected = pd.Series(
            ['mary ann', 'bob', 'angel', 'bob', 'mary ann', 'john', np.nan])

        clean_series = clean(values)

        # Check if series are identical.
        pdt.assert_series_equal(clean_series, expected)

        clean_series_nothing = clean(
            values,
            lowercase=False,
            replace_by_none=False,
            replace_by_whitespace=False,
            strip_accents=False,
            remove_brackets=False)

        # Check if ntohing happend.
        pdt.assert_series_equal(clean_series_nothing, values) 
Example #17
Source File: test_indicator_volume.py    From pandas-ta with MIT License 5 votes vote down vote up
def test_mfi(self):
        result = pandas_ta.mfi(self.high, self.low, self.close, self.volume_)
        self.assertIsInstance(result, Series)
        self.assertEqual(result.name, 'MFI_14')

        try:
            expected = tal.MFI(self.high, self.low, self.close, self.volume_)
            pdt.assert_series_equal(result, expected, check_names=False)
        except AssertionError as ae:
            try:
                corr = pandas_ta.utils.df_error_analysis(result, expected, col=CORRELATION)
                self.assertGreater(corr, CORRELATION_THRESHOLD)
            except Exception as ex:
                error_analysis(result, CORRELATION, ex) 
Example #18
Source File: test_indicator_momentum.py    From pandas-ta with MIT License 5 votes vote down vote up
def test_uo(self):
        result = pandas_ta.uo(self.high, self.low, self.close)
        self.assertIsInstance(result, Series)
        self.assertEqual(result.name, 'UO_7_14_28')

        try:
            expected = tal.ULTOSC(self.high, self.low, self.close)
            pdt.assert_series_equal(result, expected, check_names=False)
        except AssertionError as ae:
            try:
                corr = pandas_ta.utils.df_error_analysis(result, expected, col=CORRELATION)
                self.assertGreater(corr, CORRELATION_THRESHOLD)
            except Exception as ex:
                error_analysis(result, CORRELATION, ex) 
Example #19
Source File: test_indicator_momentum.py    From pandas-ta with MIT License 5 votes vote down vote up
def test_willr(self):
        result = pandas_ta.willr(self.high, self.low, self.close)
        self.assertIsInstance(result, Series)
        self.assertEqual(result.name, 'WILLR_14')

        try:
            expected = tal.WILLR(self.high, self.low, self.close)
            pdt.assert_series_equal(result, expected, check_names=False)
        except AssertionError as ae:
            try:
                corr = pandas_ta.utils.df_error_analysis(result, expected, col=CORRELATION)
                self.assertGreater(corr, CORRELATION_THRESHOLD)
            except Exception as ex:
                error_analysis(result, CORRELATION, ex) 
Example #20
Source File: test_pandas_integration.py    From fletcher with MIT License 5 votes vote down vote up
def test_fillna_chunked(test_array_chunked):
    ser = pd.Series(fr.FletcherChunkedArray(test_array_chunked))
    ser = ser.fillna("filled")

    expected_list = TEST_LIST[:2] + ["filled"]
    chunks = []
    for _ in range(10):
        chunks.append(pa.array(expected_list))
    chunked_exp = pa.chunked_array(chunks)
    expected = pd.Series(fr.FletcherChunkedArray(chunked_exp))

    tm.assert_series_equal(ser, expected) 
Example #21
Source File: test_pandas_integration.py    From fletcher with MIT License 5 votes vote down vote up
def test_argsort(array_chunked_nulls, kind):
    s = pd.Series(fr.FletcherChunkedArray(TEST_ARRAY))
    result = s.argsort(kind=kind)
    expected = s.astype(object).argsort(kind=kind)
    tm.assert_series_equal(result, expected) 
Example #22
Source File: test_pandas_integration.py    From fletcher with MIT License 5 votes vote down vote up
def test_astype_object():
    s = pd.Series(fr.FletcherChunkedArray(TEST_ARRAY))
    expected = pd.Series(TEST_LIST)
    tm.assert_series_equal(s.astype(object), expected) 
Example #23
Source File: test_text.py    From fletcher with MIT License 5 votes vote down vote up
def test_text_cat(data, fletcher_variant, fletcher_variant_2):
    if any("\x00" in x for x in data if x):
        # pytest.skip("pandas cannot handle \\x00 characters in tests")
        # Skip is not working properly with hypothesis
        return
    ser_pd = pd.Series(data, dtype=str)
    ser_fr = _fr_series_from_data(data, fletcher_variant)
    ser_fr_other = _fr_series_from_data(data, fletcher_variant_2)

    result_pd = ser_pd.str.cat(ser_pd)
    result_fr = ser_fr.fr_text.cat(ser_fr_other)
    result_fr = result_fr.astype(object)
    # Pandas returns np.nan for NA values in cat, keep this in line
    result_fr[result_fr.isna()] = np.nan
    tm.assert_series_equal(result_fr, result_pd) 
Example #24
Source File: test_datetime_features.py    From timeserio with MIT License 5 votes vote down vote up
def test_get_fractional_day_from_series():
    series = pd.Series(pd.date_range(start='2000-01-01', freq='6H', periods=5))
    fractional_day = get_fractional_day_from_series(series)
    expected = pd.Series([0, 0.25, 0.5, 0.75, 0])
    pdt.assert_series_equal(fractional_day, expected) 
Example #25
Source File: test_datetime_features.py    From timeserio with MIT License 5 votes vote down vote up
def test_get_fractional_year_from_series():
    series = pd.Series(
        pd.date_range(start='2000-01-01', freq='31D', periods=5)
    )
    fractional_year = get_fractional_year_from_series(series)
    expected = pd.Series([0, 1, 2, 3, 4]) * 31 / 365.
    pdt.assert_series_equal(fractional_year, expected) 
Example #26
Source File: test_datetime_features.py    From timeserio with MIT License 5 votes vote down vote up
def test_get_is_holiday_from_series():
    series = pd.Series(pd.date_range(start='2000-01-01', freq='D', periods=5))
    is_holiday = get_is_holiday_from_series(series)
    expected = pd.Series([1, 1, 1, 1, 0])
    pdt.assert_series_equal(is_holiday, expected) 
Example #27
Source File: test_datetime_features.py    From timeserio with MIT License 5 votes vote down vote up
def test_get_is_holiday_from_series_with_country(country, expected):
    dates = ["2020-01-01", "2020-01-02", "2020-08-03", "2020-08-31"]
    series = pd.to_datetime(pd.Series(dates))
    is_holiday = get_is_holiday_from_series(series, country=country)
    pdt.assert_series_equal(is_holiday, pd.Series(expected)) 
Example #28
Source File: test_datetime_features.py    From timeserio with MIT License 5 votes vote down vote up
def test_truncate_series(series_data, truncation_period, expected_data):
    out = truncate_series(pd.Series(series_data), truncation_period)
    expected = pd.Series(expected_data)

    pdt.assert_series_equal(out, expected) 
Example #29
Source File: test_indicator_volume.py    From pandas-ta with MIT License 5 votes vote down vote up
def test_ad(self):
        result = pandas_ta.ad(self.high, self.low, self.close, self.volume_)
        self.assertIsInstance(result, Series)
        self.assertEqual(result.name, 'AD')

        try:
            expected = tal.AD(self.high, self.low, self.close, self.volume_)
            pdt.assert_series_equal(result, expected, check_names=False)
        except AssertionError as ae:
            try:
                corr = pandas_ta.utils.df_error_analysis(result, expected, col=CORRELATION)
                self.assertGreater(corr, CORRELATION_THRESHOLD)
            except Exception as ex:
                error_analysis(result, CORRELATION, ex) 
Example #30
Source File: test_datetime_features.py    From timeserio with MIT License 5 votes vote down vote up
def test_get_fractional_hour_from_series():
    series = pd.Series(
        pd.date_range(start='2000-01-01', freq='0.5H', periods=48)
    )
    fractionalhour = get_fractional_hour_from_series(series)
    expected = pd.Series(np.linspace(0, 23.5, 48))
    pdt.assert_series_equal(fractionalhour, expected)