Python Examples of pandas.core.strings.StringMethods

Source File: test_strings.py From predictive-maintenance-using-machine-learning with Apache License 2.0

6 votes

def test_api_for_categorical(self, any_string_method):
        # https://github.com/pandas-dev/pandas/issues/10661
        s = Series(list('aabb'))
        s = s + " " + s
        c = s.astype('category')
        assert isinstance(c.str, strings.StringMethods)

        method_name, args, kwargs = any_string_method

        result = getattr(c.str, method_name)(*args, **kwargs)
        expected = getattr(s.str, method_name)(*args, **kwargs)

        if isinstance(result, DataFrame):
            tm.assert_frame_equal(result, expected)
        elif isinstance(result, Series):
            tm.assert_series_equal(result, expected)
        else:
            # str.cat(others=None) returns string, for example
            assert result == expected

Source File: test_strings.py From recruit with Apache License 2.0

6 votes

def test_api_for_categorical(self, any_string_method):
        # https://github.com/pandas-dev/pandas/issues/10661
        s = Series(list('aabb'))
        s = s + " " + s
        c = s.astype('category')
        assert isinstance(c.str, strings.StringMethods)

        method_name, args, kwargs = any_string_method

        result = getattr(c.str, method_name)(*args, **kwargs)
        expected = getattr(s.str, method_name)(*args, **kwargs)

        if isinstance(result, DataFrame):
            tm.assert_frame_equal(result, expected)
        elif isinstance(result, Series):
            tm.assert_series_equal(result, expected)
        else:
            # str.cat(others=None) returns string, for example
            assert result == expected

Source File: test_strings.py From recruit with Apache License 2.0

5 votes

def any_allowed_skipna_inferred_dtype(request):
    """
    Fixture for all (inferred) dtypes allowed in StringMethods.__init__

    The covered (inferred) types are:
    * 'string'
    * 'unicode' (if PY2)
    * 'empty'
    * 'bytes' (if PY3)
    * 'mixed'
    * 'mixed-integer'

    Returns
    -------
    inferred_dtype : str
        The string for the inferred dtype from _libs.lib.infer_dtype
    values : np.ndarray
        An array of object dtype that will be inferred to have
        `inferred_dtype`

    Examples
    --------
    >>> import pandas._libs.lib as lib
    >>>
    >>> def test_something(any_allowed_skipna_inferred_dtype):
    ...     inferred_dtype, values = any_allowed_skipna_inferred_dtype
    ...     # will pass
    ...     assert lib.infer_dtype(values, skipna=True) == inferred_dtype
    """
    inferred_dtype, values = request.param
    values = np.array(values, dtype=object)  # object dtype to avoid casting

    # correctness of inference tested in tests/dtypes/test_inference.py
    return inferred_dtype, values

Source File: test_strings.py From twitter-stock-recommendation with MIT License

5 votes

def test_api(self):

        # GH 6106, GH 9322
        assert Series.str is strings.StringMethods
        assert isinstance(Series(['']).str, strings.StringMethods)

        # GH 9184
        invalid = Series([1])
        with tm.assert_raises_regex(AttributeError,
                                    "only use .str accessor"):
            invalid.str
        assert not hasattr(invalid, 'str')

Source File: test_strings.py From elasticintel with GNU General Public License v3.0

5 votes

def test_api(self):

        # GH 6106, GH 9322
        assert Series.str is strings.StringMethods
        assert isinstance(Series(['']).str, strings.StringMethods)

        # GH 9184
        invalid = Series([1])
        with tm.assert_raises_regex(AttributeError,
                                    "only use .str accessor"):
            invalid.str
        assert not hasattr(invalid, 'str')

Source File: test_strings.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_api_per_dtype(self, box, dtype, any_skipna_inferred_dtype):
        # one instance of parametrized fixture
        inferred_dtype, values = any_skipna_inferred_dtype

        t = box(values, dtype=dtype)  # explicit dtype to avoid casting

        # TODO: get rid of these xfails
        if dtype == 'category' and inferred_dtype in ['period', 'interval']:
            pytest.xfail(reason='Conversion to numpy array fails because '
                         'the ._values-attribute is not a numpy array for '
                         'PeriodArray/IntervalArray; see GH 23553')
        if box == Index and inferred_dtype in ['empty', 'bytes']:
            pytest.xfail(reason='Raising too restrictively; '
                         'solved by GH 23167')
        if (box == Index and dtype == object
                and inferred_dtype in ['boolean', 'date', 'time']):
            pytest.xfail(reason='Inferring incorrectly because of NaNs; '
                         'solved by GH 23167')
        if (box == Series
                and (dtype == object and inferred_dtype not in [
                    'string', 'unicode', 'empty',
                    'bytes', 'mixed', 'mixed-integer'])
                or (dtype == 'category'
                    and inferred_dtype in ['decimal', 'boolean', 'time'])):
            pytest.xfail(reason='Not raising correctly; solved by GH 23167')

        types_passing_constructor = ['string', 'unicode', 'empty',
                                     'bytes', 'mixed', 'mixed-integer']
        if inferred_dtype in types_passing_constructor:
            # GH 6106
            assert isinstance(t.str, strings.StringMethods)
        else:
            # GH 9184, GH 23011, GH 23163
            with pytest.raises(AttributeError, match='Can only use .str '
                               'accessor with string values.*'):
                t.str
            assert not hasattr(t, 'str')

Source File: test_strings.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_api(self):

        # GH 6106, GH 9322
        assert Series.str is strings.StringMethods
        assert isinstance(Series(['']).str, strings.StringMethods)

Source File: test_strings.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def any_allowed_skipna_inferred_dtype(request):
    """
    Fixture for all (inferred) dtypes allowed in StringMethods.__init__

    The covered (inferred) types are:
    * 'string'
    * 'unicode' (if PY2)
    * 'empty'
    * 'bytes' (if PY3)
    * 'mixed'
    * 'mixed-integer'

    Returns
    -------
    inferred_dtype : str
        The string for the inferred dtype from _libs.lib.infer_dtype
    values : np.ndarray
        An array of object dtype that will be inferred to have
        `inferred_dtype`

    Examples
    --------
    >>> import pandas._libs.lib as lib
    >>>
    >>> def test_something(any_allowed_skipna_inferred_dtype):
    ...     inferred_dtype, values = any_allowed_skipna_inferred_dtype
    ...     # will pass
    ...     assert lib.infer_dtype(values, skipna=True) == inferred_dtype
    """
    inferred_dtype, values = request.param
    values = np.array(values, dtype=object)  # object dtype to avoid casting

    # correctness of inference tested in tests/dtypes/test_inference.py
    return inferred_dtype, values

Source File: test_strings.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def any_string_method(request):
    """
    Fixture for all public methods of `StringMethods`

    This fixture returns a tuple of the method name and sample arguments
    necessary to call the method.

    Returns
    -------
    method_name : str
        The name of the method in `StringMethods`
    args : tuple
        Sample values for the positional arguments
    kwargs : dict
        Sample values for the keyword arguments

    Examples
    --------
    >>> def test_something(any_string_method):
    ...     s = pd.Series(['a', 'b', np.nan, 'd'])
    ...
    ...     method_name, args, kwargs = any_string_method
    ...     method = getattr(s.str, method_name)
    ...     # will not raise
    ...     method(*args, **kwargs)
    """
    return request.param


# subset of the full set from pandas/conftest.py

Source File: series.py From Computable with MIT License

5 votes

def str(self):
        from pandas.core.strings import StringMethods
        return StringMethods(self)

Source File: test_strings.py From vnpy_crypto with MIT License

5 votes

def test_api(self):

        # GH 6106, GH 9322
        assert Series.str is strings.StringMethods
        assert isinstance(Series(['']).str, strings.StringMethods)

        # GH 9184
        invalid = Series([1])
        with tm.assert_raises_regex(AttributeError,
                                    "only use .str accessor"):
            invalid.str
        assert not hasattr(invalid, 'str')

Source File: test_strings.py From recruit with Apache License 2.0

5 votes

def test_api_per_dtype(self, box, dtype, any_skipna_inferred_dtype):
        # one instance of parametrized fixture
        inferred_dtype, values = any_skipna_inferred_dtype

        t = box(values, dtype=dtype)  # explicit dtype to avoid casting

        # TODO: get rid of these xfails
        if dtype == 'category' and inferred_dtype in ['period', 'interval']:
            pytest.xfail(reason='Conversion to numpy array fails because '
                         'the ._values-attribute is not a numpy array for '
                         'PeriodArray/IntervalArray; see GH 23553')
        if box == Index and inferred_dtype in ['empty', 'bytes']:
            pytest.xfail(reason='Raising too restrictively; '
                         'solved by GH 23167')
        if (box == Index and dtype == object
                and inferred_dtype in ['boolean', 'date', 'time']):
            pytest.xfail(reason='Inferring incorrectly because of NaNs; '
                         'solved by GH 23167')
        if (box == Series
                and (dtype == object and inferred_dtype not in [
                    'string', 'unicode', 'empty',
                    'bytes', 'mixed', 'mixed-integer'])
                or (dtype == 'category'
                    and inferred_dtype in ['decimal', 'boolean', 'time'])):
            pytest.xfail(reason='Not raising correctly; solved by GH 23167')

        types_passing_constructor = ['string', 'unicode', 'empty',
                                     'bytes', 'mixed', 'mixed-integer']
        if inferred_dtype in types_passing_constructor:
            # GH 6106
            assert isinstance(t.str, strings.StringMethods)
        else:
            # GH 9184, GH 23011, GH 23163
            with pytest.raises(AttributeError, match='Can only use .str '
                               'accessor with string values.*'):
                t.str
            assert not hasattr(t, 'str')

Source File: test_strings.py From recruit with Apache License 2.0

5 votes

def test_api(self):

        # GH 6106, GH 9322
        assert Series.str is strings.StringMethods
        assert isinstance(Series(['']).str, strings.StringMethods)

Source File: test_strings.py From recruit with Apache License 2.0

5 votes

def any_string_method(request):
    """
    Fixture for all public methods of `StringMethods`

    This fixture returns a tuple of the method name and sample arguments
    necessary to call the method.

    Returns
    -------
    method_name : str
        The name of the method in `StringMethods`
    args : tuple
        Sample values for the positional arguments
    kwargs : dict
        Sample values for the keyword arguments

    Examples
    --------
    >>> def test_something(any_string_method):
    ...     s = pd.Series(['a', 'b', np.nan, 'd'])
    ...
    ...     method_name, args, kwargs = any_string_method
    ...     method = getattr(s.str, method_name)
    ...     # will not raise
    ...     method(*args, **kwargs)
    """
    return request.param


# subset of the full set from pandas/conftest.py

Source File: test_strings.py From vnpy_crypto with MIT License

4 votes

def test_index_str_accessor_visibility(self):
        from pandas.core.strings import StringMethods

        if not compat.PY3:
            cases = [(['a', 'b'], 'string'), (['a', u('b')], 'mixed'),
                     ([u('a'), u('b')], 'unicode'),
                     (['a', 'b', 1], 'mixed-integer'),
                     (['a', 'b', 1.3], 'mixed'),
                     (['a', 'b', 1.3, 1], 'mixed-integer'),
                     (['aa', datetime(2011, 1, 1)], 'mixed')]
        else:
            cases = [(['a', 'b'], 'string'), (['a', u('b')], 'string'),
                     ([u('a'), u('b')], 'string'),
                     (['a', 'b', 1], 'mixed-integer'),
                     (['a', 'b', 1.3], 'mixed'),
                     (['a', 'b', 1.3, 1], 'mixed-integer'),
                     (['aa', datetime(2011, 1, 1)], 'mixed')]
        for values, tp in cases:
            idx = Index(values)
            assert isinstance(Series(values).str, StringMethods)
            assert isinstance(idx.str, StringMethods)
            assert idx.inferred_type == tp

        for values, tp in cases:
            idx = Index(values)
            assert isinstance(Series(values).str, StringMethods)
            assert isinstance(idx.str, StringMethods)
            assert idx.inferred_type == tp

        cases = [([1, np.nan], 'floating'),
                 ([datetime(2011, 1, 1)], 'datetime64'),
                 ([timedelta(1)], 'timedelta64')]
        for values, tp in cases:
            idx = Index(values)
            message = 'Can only use .str accessor with string values'
            with tm.assert_raises_regex(AttributeError, message):
                Series(values).str
            with tm.assert_raises_regex(AttributeError, message):
                idx.str
            assert idx.inferred_type == tp

        # MultiIndex has mixed dtype, but not allow to use accessor
        idx = MultiIndex.from_tuples([('a', 'b'), ('a', 'b')])
        assert idx.inferred_type == 'mixed'
        message = 'Can only use .str accessor with Index, not MultiIndex'
        with tm.assert_raises_regex(AttributeError, message):
            idx.str

Source File: test_strings.py From elasticintel with GNU General Public License v3.0

4 votes

def test_index_str_accessor_visibility(self):
        from pandas.core.strings import StringMethods

        if not compat.PY3:
            cases = [(['a', 'b'], 'string'), (['a', u('b')], 'mixed'),
                     ([u('a'), u('b')], 'unicode'),
                     (['a', 'b', 1], 'mixed-integer'),
                     (['a', 'b', 1.3], 'mixed'),
                     (['a', 'b', 1.3, 1], 'mixed-integer'),
                     (['aa', datetime(2011, 1, 1)], 'mixed')]
        else:
            cases = [(['a', 'b'], 'string'), (['a', u('b')], 'string'),
                     ([u('a'), u('b')], 'string'),
                     (['a', 'b', 1], 'mixed-integer'),
                     (['a', 'b', 1.3], 'mixed'),
                     (['a', 'b', 1.3, 1], 'mixed-integer'),
                     (['aa', datetime(2011, 1, 1)], 'mixed')]
        for values, tp in cases:
            idx = Index(values)
            assert isinstance(Series(values).str, StringMethods)
            assert isinstance(idx.str, StringMethods)
            assert idx.inferred_type == tp

        for values, tp in cases:
            idx = Index(values)
            assert isinstance(Series(values).str, StringMethods)
            assert isinstance(idx.str, StringMethods)
            assert idx.inferred_type == tp

        cases = [([1, np.nan], 'floating'),
                 ([datetime(2011, 1, 1)], 'datetime64'),
                 ([timedelta(1)], 'timedelta64')]
        for values, tp in cases:
            idx = Index(values)
            message = 'Can only use .str accessor with string values'
            with tm.assert_raises_regex(AttributeError, message):
                Series(values).str
            with tm.assert_raises_regex(AttributeError, message):
                idx.str
            assert idx.inferred_type == tp

        # MultiIndex has mixed dtype, but not allow to use accessor
        idx = MultiIndex.from_tuples([('a', 'b'), ('a', 'b')])
        assert idx.inferred_type == 'mixed'
        message = 'Can only use .str accessor with Index, not MultiIndex'
        with tm.assert_raises_regex(AttributeError, message):
            idx.str

Source File: test_strings.py From twitter-stock-recommendation with MIT License

4 votes

def test_index_str_accessor_visibility(self):
        from pandas.core.strings import StringMethods

        if not compat.PY3:
            cases = [(['a', 'b'], 'string'), (['a', u('b')], 'mixed'),
                     ([u('a'), u('b')], 'unicode'),
                     (['a', 'b', 1], 'mixed-integer'),
                     (['a', 'b', 1.3], 'mixed'),
                     (['a', 'b', 1.3, 1], 'mixed-integer'),
                     (['aa', datetime(2011, 1, 1)], 'mixed')]
        else:
            cases = [(['a', 'b'], 'string'), (['a', u('b')], 'string'),
                     ([u('a'), u('b')], 'string'),
                     (['a', 'b', 1], 'mixed-integer'),
                     (['a', 'b', 1.3], 'mixed'),
                     (['a', 'b', 1.3, 1], 'mixed-integer'),
                     (['aa', datetime(2011, 1, 1)], 'mixed')]
        for values, tp in cases:
            idx = Index(values)
            assert isinstance(Series(values).str, StringMethods)
            assert isinstance(idx.str, StringMethods)
            assert idx.inferred_type == tp

        for values, tp in cases:
            idx = Index(values)
            assert isinstance(Series(values).str, StringMethods)
            assert isinstance(idx.str, StringMethods)
            assert idx.inferred_type == tp

        cases = [([1, np.nan], 'floating'),
                 ([datetime(2011, 1, 1)], 'datetime64'),
                 ([timedelta(1)], 'timedelta64')]
        for values, tp in cases:
            idx = Index(values)
            message = 'Can only use .str accessor with string values'
            with tm.assert_raises_regex(AttributeError, message):
                Series(values).str
            with tm.assert_raises_regex(AttributeError, message):
                idx.str
            assert idx.inferred_type == tp

        # MultiIndex has mixed dtype, but not allow to use accessor
        idx = MultiIndex.from_tuples([('a', 'b'), ('a', 'b')])
        assert idx.inferred_type == 'mixed'
        message = 'Can only use .str accessor with Index, not MultiIndex'
        with tm.assert_raises_regex(AttributeError, message):
            idx.str

Python pandas.core.strings.StringMethods() Examples