Python Examples of pandas.testing.assert_series

Source File: test_base.py From fletcher with MIT License

6 votes

def test_pandas_from_arrow():
    arr = pa.array(["a", "b", "c"], pa.string())

    expected_series_woutname = pd.Series(fr.FletcherChunkedArray(arr))
    pdt.assert_series_equal(expected_series_woutname, fr.pandas_from_arrow(arr))

    expected_series_woutname = pd.Series(fr.FletcherContinuousArray(arr))
    pdt.assert_series_equal(
        expected_series_woutname, fr.pandas_from_arrow(arr, continuous=True)
    )

    rb = pa.RecordBatch.from_arrays([arr], ["column"])
    expected_df = pd.DataFrame({"column": fr.FletcherChunkedArray(arr)})
    table = pa.Table.from_arrays([arr], ["column"])
    pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(rb))
    pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(table))

    expected_df = pd.DataFrame({"column": fr.FletcherContinuousArray(arr)})
    table = pa.Table.from_arrays([arr], ["column"])
    pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(rb, continuous=True))
    pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(table, continuous=True))

Source File: test_indicator_trend.py From pandas-ta with MIT License

6 votes

def test_psar(self):
        result = pandas_ta.psar(self.high, self.low)
        self.assertIsInstance(result, DataFrame)
        self.assertEqual(result.name, 'PSAR_0.02_0.2')

        # Combine Long and Short SAR's into one SAR value
        psar = result[result.columns[:2]].fillna(0)
        psar = psar[psar.columns[0]] + psar[psar.columns[1]]
        psar.name = result.name

        try:
            expected = tal.SAR(self.high, self.low)
            pdt.assert_series_equal(psar, expected)
        except AssertionError as ae:
            try:
                psar_corr = pandas_ta.utils.df_error_analysis(psar, expected, col=CORRELATION)
                self.assertGreater(psar_corr, CORRELATION_THRESHOLD)
            except Exception as ex:
                error_analysis(psar, CORRELATION, ex)

Source File: test_preprocessing.py From recordlinkage with BSD 3-Clause "New" or "Revised" License

6 votes

def test_clean_parameters(self):

        values = pd.Series([
            u'Mary-ann', u'Bob :)', u'Angel', u'Bob (alias Billy)',
            u'Mary  ann', u'John', np.nan
        ])

        expected = pd.Series([
            u'mary ann', u'bob', u'angel', u'bob', u'mary ann', u'john', np.nan
        ])

        clean_series = clean(
            values,
            lowercase=True,
            replace_by_none=r'[^ \-\_A-Za-z0-9]+',
            replace_by_whitespace=r'[\-\_]',
            remove_brackets=True)

        # Check if series are identical.
        pdt.assert_series_equal(clean_series, expected)

Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License

6 votes

def test_numeric(self):

        A = DataFrame({'col': [1, 1, 1, nan, 0]})
        B = DataFrame({'col': [1, 2, 3, nan, nan]})
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.numeric('col', 'col', 'step', offset=2)
        comp.numeric('col', 'col', method='step', offset=2)
        comp.numeric('col', 'col', 'step', 2)
        result = comp.compute(ix, A, B)

        # Basics
        expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=0)
        pdt.assert_series_equal(result[0], expected)

        # Basics
        expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=1)
        pdt.assert_series_equal(result[1], expected)

        # Basics
        expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=2)
        pdt.assert_series_equal(result[2], expected)

Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License

6 votes

def test_dates(self):

        A = DataFrame({
            'col':
            to_datetime(
                ['2005/11/23', nan, '2004/11/23', '2010/01/10', '2010/10/30'])
        })
        B = DataFrame({
            'col':
            to_datetime([
                '2005/11/23', '2010/12/31', '2005/11/23', '2010/10/01',
                '2010/9/30'
            ])
        })
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.date('col', 'col')
        result = comp.compute(ix, A, B)[0]

        expected = Series([1, 0, 0, 0.5, 0.5], index=ix, name=0)

        pdt.assert_series_equal(result, expected)

Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License

6 votes

def test_geo(self):

        # Utrecht, Amsterdam, Rotterdam (Cities in The Netherlands)
        A = DataFrame({
            'lat': [52.0842455, 52.3747388, 51.9280573],
            'lng': [5.0124516, 4.7585305, 4.4203581]
        })
        B = DataFrame({
            'lat': [52.3747388, 51.9280573, 52.0842455],
            'lng': [4.7585305, 4.4203581, 5.0124516]
        })
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.geo(
            'lat', 'lng', 'lat', 'lng', method='step',
            offset=50)  # 50 km range
        result = comp.compute(ix, A, B)

        # Missing values as default [36.639460, 54.765854, 44.092472]
        expected = Series([1.0, 0.0, 1.0], index=ix, name=0)
        pdt.assert_series_equal(result[0], expected)

Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License

6 votes

def test_defaults(self):

        # default algorithm is levenshtein algorithm
        # test default values are indentical to levenshtein

        A = DataFrame({
            'col': [u'str_abc', u'str_abc', u'str_abc', nan, u'hsdkf']
        })
        B = DataFrame({'col': [u'str_abc', u'str_abd', u'jaskdfsd', nan, nan]})
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.string('col', 'col', label='default')
        comp.string('col', 'col', method='levenshtein', label='with_args')
        result = comp.compute(ix, A, B)

        pdt.assert_series_equal(
            result['default'].rename(None),
            result['with_args'].rename(None)
        )

Source File: test_compare.py From recordlinkage with BSD 3-Clause "New" or "Revised" License

6 votes

def test_variable_nan(self, missing_value):

        # data
        arrayA = np.random.random((100,))
        arrayA[90:] = np.nan
        arrayB = np.random.random((100,))

        # convert to pandas data
        A = DataFrame({'col': arrayA})
        B = DataFrame({'col': arrayB})
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        # the part to test
        from recordlinkage.compare import Variable

        comp = recordlinkage.Compare()
        comp.add(Variable(left_on='col', missing_value=missing_value))
        features = comp.compute(ix, A, B)
        result = features[0].rename(None)

        expected = Series(arrayA, index=ix)
        expected.iloc[90:] = missing_value
        pdt.assert_series_equal(result, expected)

Source File: test_comparison_plot_data_preparation.py From estimagic with BSD 3-Clause "New" or "Revised" License

6 votes

def test_consolidate_parameter_attribute_custom_wildcards():
    tuples = [("a", 0), ("a", 1), ("b", 1), ("b", 2)]
    ind = pd.MultiIndex.from_tuples(tuples, names=["ind1", "ind2"])

    df = pd.DataFrame(index=ind[:3])
    df["attr"] = ["g1", None, "g3"]
    df["other"] = [1, 2, 3]

    df2 = pd.DataFrame(index=ind)
    df2["attr"] = ["g1", "g2", "g3", 0]
    df2["other2"] = [11, 22, 33, 44]

    info = {}
    compatible_input = [OPT_RES(df, info), OPT_RES(df2, info)]
    attribute = "attr"
    res = test_module._consolidate_parameter_attribute(
        results=compatible_input, attribute=attribute, wildcards=[0, None]
    )
    expected = pd.Series(["g1", "g2", "g3", np.nan], index=ind, name="attr")
    pdt.assert_series_equal(res, expected)

Source File: test_comparison_plot_data_preparation.py From estimagic with BSD 3-Clause "New" or "Revised" License

6 votes

def test_calculate_x_bounds_without_nan():
    params_data = pd.DataFrame()
    params_data["group"] = ["a", "a", "a"] + ["b", "b", "b"]
    params_data["value"] = [0, 1, 2] + [3, 4, 5]
    params_data["conf_int_lower"] = [-1, 0, -2] + [2, -5, 4]
    params_data["conf_int_upper"] = [1, 2, 3] + [3, 5, 10]

    padding = 0.0
    res_x_min, res_x_max = test_module._calculate_x_bounds(params_data, padding)

    ind = pd.Index(["a", "b"], name="group")
    expected_x_min = pd.Series([-2.0, -5.0], index=ind, name="x_min")
    expected_x_max = pd.Series([3.0, 10.0], index=ind, name="x_max")

    pdt.assert_series_equal(expected_x_min, res_x_min)
    pdt.assert_series_equal(expected_x_max, res_x_max)

Source File: test_comparison_plot_data_preparation.py From estimagic with BSD 3-Clause "New" or "Revised" License

6 votes

def test_calculate_x_bounds_with_nan():
    params_data = pd.DataFrame()
    params_data["group"] = ["a", "a", "a"] + ["b", "b", "b"]
    params_data["value"] = [0, 1, np.nan] + [3, np.nan, 5]
    params_data["conf_int_lower"] = np.nan
    params_data["conf_int_upper"] = np.nan

    padding = 0.0
    res_x_min, res_x_max = test_module._calculate_x_bounds(params_data, padding)

    ind = pd.Index(["a", "b"], name="group")
    expected_x_min = pd.Series([0.0, 3.0], index=ind, name="x_min")
    expected_x_max = pd.Series([1.0, 5.0], index=ind, name="x_max")

    pdt.assert_series_equal(expected_x_min, res_x_min)
    pdt.assert_series_equal(expected_x_max, res_x_max)

Source File: test_comparison_plot_data_preparation.py From estimagic with BSD 3-Clause "New" or "Revised" License

6 votes

def test_calculate_x_bounds_with_padding():
    params_data = pd.DataFrame()
    params_data["group"] = ["a", "a", "a"] + ["b", "b", "b"]
    params_data["value"] = [0, 1, np.nan] + [3, np.nan, 5]
    params_data["conf_int_lower"] = np.nan
    params_data["conf_int_upper"] = np.nan

    padding = 0.1
    res_x_min, res_x_max = test_module._calculate_x_bounds(params_data, padding)

    ind = pd.Index(["a", "b"], name="group")
    expected_x_min = pd.Series([-0.1, 2.8], index=ind, name="x_min")
    expected_x_max = pd.Series([1.1, 5.2], index=ind, name="x_max")

    pdt.assert_series_equal(expected_x_min, res_x_min)
    pdt.assert_series_equal(expected_x_max, res_x_max)


# replace_by_midpoint
# ====================

Source File: test_comparison_plot_data_preparation.py From estimagic with BSD 3-Clause "New" or "Revised" License

6 votes

def test_consolidate_parameter_attribute_standard_wildcards():
    tuples = [("a", 0), ("a", 1), ("b", 1), ("b", 2)]
    ind = pd.MultiIndex.from_tuples(tuples, names=["ind1", "ind2"])

    df = pd.DataFrame(index=ind[:3])
    df["attr"] = ["g1", "g2", "g3"]
    df["other"] = [1, 2, 3]

    df2 = pd.DataFrame(index=ind)
    df2["attr"] = ["g1", "g2", "g3", "g2"]
    df2["other2"] = [11, 22, 33, 44]

    info = {}
    compatible_input = [OPT_RES(df, info), OPT_RES(df2, info)]
    attribute = "attr"
    res = test_module._consolidate_parameter_attribute(
        results=compatible_input, attribute=attribute
    )
    expected = pd.Series(["g1", "g2", "g3", "g2"], index=ind, name="attr")
    pdt.assert_series_equal(res, expected)

Source File: test_protocols.py From bionic with Apache License 2.0

6 votes

def test_dataframe_with_categoricals_ignored(builder):
    df_value = pd.DataFrame()
    df_value["cat"] = pd.Categorical(
        ["red", "blue", "red"], categories=["blue", "red"], ordered=True
    )

    @builder
    @bn.protocol.frame(check_dtypes=False)
    def df():
        return df_value

    pdt.assert_series_equal(
        # Whether or not the deserialized column has the Categorical Dtype can
        # depend on the version of pyarrow being used, so we'll just convert
        # both columns to the same type here.
        builder.build().get("df")["cat"].astype(object),
        df_value["cat"].astype(object),
    )

Source File: test_text.py From fletcher with MIT License

6 votes

def test_text_zfill(data, fletcher_variant):
    if any("\x00" in x for x in data if x):
        # pytest.skip("pandas cannot handle \\x00 characters in tests")
        # Skip is not working properly with hypothesis
        return
    ser_pd = pd.Series(data, dtype=str)
    max_str_len = ser_pd.map(_optional_len).max()
    if pd.isna(max_str_len):
        max_str_len = 0
    arrow_data = pa.array(data, type=pa.string())
    if fletcher_variant == "chunked":
        fr_array = fr.FletcherChunkedArray(arrow_data)
    else:
        fr_array = fr.FletcherContinuousArray(arrow_data)
    ser_fr = pd.Series(fr_array)

    result_pd = ser_pd.str.zfill(max_str_len + 1)
    result_fr = ser_fr.fr_text.zfill(max_str_len + 1)
    result_fr = result_fr.astype(object)
    # Pandas returns np.nan for NA values in cat, keep this in line
    result_fr[result_fr.isna()] = np.nan
    tm.assert_series_equal(result_fr, result_pd)

Source File: test_preprocessing.py From recordlinkage with BSD 3-Clause "New" or "Revised" License

6 votes

def test_clean(self):

        values = pd.Series([
            'Mary-ann', 'Bob :)', 'Angel', 'Bob (alias Billy)', 'Mary ann',
            'John', np.nan
        ])

        expected = pd.Series(
            ['mary ann', 'bob', 'angel', 'bob', 'mary ann', 'john', np.nan])

        clean_series = clean(values)

        # Check if series are identical.
        pdt.assert_series_equal(clean_series, expected)

        clean_series_nothing = clean(
            values,
            lowercase=False,
            replace_by_none=False,
            replace_by_whitespace=False,
            strip_accents=False,
            remove_brackets=False)

        # Check if ntohing happend.
        pdt.assert_series_equal(clean_series_nothing, values)

Source File: test_indicator_volume.py From pandas-ta with MIT License

5 votes

def test_mfi(self):
        result = pandas_ta.mfi(self.high, self.low, self.close, self.volume_)
        self.assertIsInstance(result, Series)
        self.assertEqual(result.name, 'MFI_14')

        try:
            expected = tal.MFI(self.high, self.low, self.close, self.volume_)
            pdt.assert_series_equal(result, expected, check_names=False)
        except AssertionError as ae:
            try:
                corr = pandas_ta.utils.df_error_analysis(result, expected, col=CORRELATION)
                self.assertGreater(corr, CORRELATION_THRESHOLD)
            except Exception as ex:
                error_analysis(result, CORRELATION, ex)

Source File: test_indicator_momentum.py From pandas-ta with MIT License

5 votes

def test_uo(self):
        result = pandas_ta.uo(self.high, self.low, self.close)
        self.assertIsInstance(result, Series)
        self.assertEqual(result.name, 'UO_7_14_28')

        try:
            expected = tal.ULTOSC(self.high, self.low, self.close)
            pdt.assert_series_equal(result, expected, check_names=False)
        except AssertionError as ae:
            try:
                corr = pandas_ta.utils.df_error_analysis(result, expected, col=CORRELATION)
                self.assertGreater(corr, CORRELATION_THRESHOLD)
            except Exception as ex:
                error_analysis(result, CORRELATION, ex)

Source File: test_indicator_momentum.py From pandas-ta with MIT License

5 votes

def test_willr(self):
        result = pandas_ta.willr(self.high, self.low, self.close)
        self.assertIsInstance(result, Series)
        self.assertEqual(result.name, 'WILLR_14')

        try:
            expected = tal.WILLR(self.high, self.low, self.close)
            pdt.assert_series_equal(result, expected, check_names=False)
        except AssertionError as ae:
            try:
                corr = pandas_ta.utils.df_error_analysis(result, expected, col=CORRELATION)
                self.assertGreater(corr, CORRELATION_THRESHOLD)
            except Exception as ex:
                error_analysis(result, CORRELATION, ex)

Source File: test_pandas_integration.py From fletcher with MIT License

5 votes

def test_fillna_chunked(test_array_chunked):
    ser = pd.Series(fr.FletcherChunkedArray(test_array_chunked))
    ser = ser.fillna("filled")

    expected_list = TEST_LIST[:2] + ["filled"]
    chunks = []
    for _ in range(10):
        chunks.append(pa.array(expected_list))
    chunked_exp = pa.chunked_array(chunks)
    expected = pd.Series(fr.FletcherChunkedArray(chunked_exp))

    tm.assert_series_equal(ser, expected)

Source File: test_pandas_integration.py From fletcher with MIT License

5 votes

def test_argsort(array_chunked_nulls, kind):
    s = pd.Series(fr.FletcherChunkedArray(TEST_ARRAY))
    result = s.argsort(kind=kind)
    expected = s.astype(object).argsort(kind=kind)
    tm.assert_series_equal(result, expected)

Source File: test_pandas_integration.py From fletcher with MIT License

5 votes

def test_astype_object():
    s = pd.Series(fr.FletcherChunkedArray(TEST_ARRAY))
    expected = pd.Series(TEST_LIST)
    tm.assert_series_equal(s.astype(object), expected)

Source File: test_text.py From fletcher with MIT License

5 votes

def test_text_cat(data, fletcher_variant, fletcher_variant_2):
    if any("\x00" in x for x in data if x):
        # pytest.skip("pandas cannot handle \\x00 characters in tests")
        # Skip is not working properly with hypothesis
        return
    ser_pd = pd.Series(data, dtype=str)
    ser_fr = _fr_series_from_data(data, fletcher_variant)
    ser_fr_other = _fr_series_from_data(data, fletcher_variant_2)

    result_pd = ser_pd.str.cat(ser_pd)
    result_fr = ser_fr.fr_text.cat(ser_fr_other)
    result_fr = result_fr.astype(object)
    # Pandas returns np.nan for NA values in cat, keep this in line
    result_fr[result_fr.isna()] = np.nan
    tm.assert_series_equal(result_fr, result_pd)