Python pandas.core.strings.StringMethods() Examples

The following are 17 code examples of pandas.core.strings.StringMethods(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.core.strings , or try the search function .
Example #1
Source File: test_strings.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_api_for_categorical(self, any_string_method):
        # https://github.com/pandas-dev/pandas/issues/10661
        s = Series(list('aabb'))
        s = s + " " + s
        c = s.astype('category')
        assert isinstance(c.str, strings.StringMethods)

        method_name, args, kwargs = any_string_method

        result = getattr(c.str, method_name)(*args, **kwargs)
        expected = getattr(s.str, method_name)(*args, **kwargs)

        if isinstance(result, DataFrame):
            tm.assert_frame_equal(result, expected)
        elif isinstance(result, Series):
            tm.assert_series_equal(result, expected)
        else:
            # str.cat(others=None) returns string, for example
            assert result == expected 
Example #2
Source File: test_strings.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_api_for_categorical(self, any_string_method):
        # https://github.com/pandas-dev/pandas/issues/10661
        s = Series(list('aabb'))
        s = s + " " + s
        c = s.astype('category')
        assert isinstance(c.str, strings.StringMethods)

        method_name, args, kwargs = any_string_method

        result = getattr(c.str, method_name)(*args, **kwargs)
        expected = getattr(s.str, method_name)(*args, **kwargs)

        if isinstance(result, DataFrame):
            tm.assert_frame_equal(result, expected)
        elif isinstance(result, Series):
            tm.assert_series_equal(result, expected)
        else:
            # str.cat(others=None) returns string, for example
            assert result == expected 
Example #3
Source File: test_strings.py    From recruit with Apache License 2.0 5 votes vote down vote up
def any_allowed_skipna_inferred_dtype(request):
    """
    Fixture for all (inferred) dtypes allowed in StringMethods.__init__

    The covered (inferred) types are:
    * 'string'
    * 'unicode' (if PY2)
    * 'empty'
    * 'bytes' (if PY3)
    * 'mixed'
    * 'mixed-integer'

    Returns
    -------
    inferred_dtype : str
        The string for the inferred dtype from _libs.lib.infer_dtype
    values : np.ndarray
        An array of object dtype that will be inferred to have
        `inferred_dtype`

    Examples
    --------
    >>> import pandas._libs.lib as lib
    >>>
    >>> def test_something(any_allowed_skipna_inferred_dtype):
    ...     inferred_dtype, values = any_allowed_skipna_inferred_dtype
    ...     # will pass
    ...     assert lib.infer_dtype(values, skipna=True) == inferred_dtype
    """
    inferred_dtype, values = request.param
    values = np.array(values, dtype=object)  # object dtype to avoid casting

    # correctness of inference tested in tests/dtypes/test_inference.py
    return inferred_dtype, values 
Example #4
Source File: test_strings.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_api(self):

        # GH 6106, GH 9322
        assert Series.str is strings.StringMethods
        assert isinstance(Series(['']).str, strings.StringMethods)

        # GH 9184
        invalid = Series([1])
        with tm.assert_raises_regex(AttributeError,
                                    "only use .str accessor"):
            invalid.str
        assert not hasattr(invalid, 'str') 
Example #5
Source File: test_strings.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_api(self):

        # GH 6106, GH 9322
        assert Series.str is strings.StringMethods
        assert isinstance(Series(['']).str, strings.StringMethods)

        # GH 9184
        invalid = Series([1])
        with tm.assert_raises_regex(AttributeError,
                                    "only use .str accessor"):
            invalid.str
        assert not hasattr(invalid, 'str') 
Example #6
Source File: test_strings.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_api_per_dtype(self, box, dtype, any_skipna_inferred_dtype):
        # one instance of parametrized fixture
        inferred_dtype, values = any_skipna_inferred_dtype

        t = box(values, dtype=dtype)  # explicit dtype to avoid casting

        # TODO: get rid of these xfails
        if dtype == 'category' and inferred_dtype in ['period', 'interval']:
            pytest.xfail(reason='Conversion to numpy array fails because '
                         'the ._values-attribute is not a numpy array for '
                         'PeriodArray/IntervalArray; see GH 23553')
        if box == Index and inferred_dtype in ['empty', 'bytes']:
            pytest.xfail(reason='Raising too restrictively; '
                         'solved by GH 23167')
        if (box == Index and dtype == object
                and inferred_dtype in ['boolean', 'date', 'time']):
            pytest.xfail(reason='Inferring incorrectly because of NaNs; '
                         'solved by GH 23167')
        if (box == Series
                and (dtype == object and inferred_dtype not in [
                    'string', 'unicode', 'empty',
                    'bytes', 'mixed', 'mixed-integer'])
                or (dtype == 'category'
                    and inferred_dtype in ['decimal', 'boolean', 'time'])):
            pytest.xfail(reason='Not raising correctly; solved by GH 23167')

        types_passing_constructor = ['string', 'unicode', 'empty',
                                     'bytes', 'mixed', 'mixed-integer']
        if inferred_dtype in types_passing_constructor:
            # GH 6106
            assert isinstance(t.str, strings.StringMethods)
        else:
            # GH 9184, GH 23011, GH 23163
            with pytest.raises(AttributeError, match='Can only use .str '
                               'accessor with string values.*'):
                t.str
            assert not hasattr(t, 'str') 
Example #7
Source File: test_strings.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_api(self):

        # GH 6106, GH 9322
        assert Series.str is strings.StringMethods
        assert isinstance(Series(['']).str, strings.StringMethods) 
Example #8
Source File: test_strings.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def any_allowed_skipna_inferred_dtype(request):
    """
    Fixture for all (inferred) dtypes allowed in StringMethods.__init__

    The covered (inferred) types are:
    * 'string'
    * 'unicode' (if PY2)
    * 'empty'
    * 'bytes' (if PY3)
    * 'mixed'
    * 'mixed-integer'

    Returns
    -------
    inferred_dtype : str
        The string for the inferred dtype from _libs.lib.infer_dtype
    values : np.ndarray
        An array of object dtype that will be inferred to have
        `inferred_dtype`

    Examples
    --------
    >>> import pandas._libs.lib as lib
    >>>
    >>> def test_something(any_allowed_skipna_inferred_dtype):
    ...     inferred_dtype, values = any_allowed_skipna_inferred_dtype
    ...     # will pass
    ...     assert lib.infer_dtype(values, skipna=True) == inferred_dtype
    """
    inferred_dtype, values = request.param
    values = np.array(values, dtype=object)  # object dtype to avoid casting

    # correctness of inference tested in tests/dtypes/test_inference.py
    return inferred_dtype, values 
Example #9
Source File: test_strings.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def any_string_method(request):
    """
    Fixture for all public methods of `StringMethods`

    This fixture returns a tuple of the method name and sample arguments
    necessary to call the method.

    Returns
    -------
    method_name : str
        The name of the method in `StringMethods`
    args : tuple
        Sample values for the positional arguments
    kwargs : dict
        Sample values for the keyword arguments

    Examples
    --------
    >>> def test_something(any_string_method):
    ...     s = pd.Series(['a', 'b', np.nan, 'd'])
    ...
    ...     method_name, args, kwargs = any_string_method
    ...     method = getattr(s.str, method_name)
    ...     # will not raise
    ...     method(*args, **kwargs)
    """
    return request.param


# subset of the full set from pandas/conftest.py 
Example #10
Source File: series.py    From Computable with MIT License 5 votes vote down vote up
def str(self):
        from pandas.core.strings import StringMethods
        return StringMethods(self) 
Example #11
Source File: test_strings.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_api(self):

        # GH 6106, GH 9322
        assert Series.str is strings.StringMethods
        assert isinstance(Series(['']).str, strings.StringMethods)

        # GH 9184
        invalid = Series([1])
        with tm.assert_raises_regex(AttributeError,
                                    "only use .str accessor"):
            invalid.str
        assert not hasattr(invalid, 'str') 
Example #12
Source File: test_strings.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_api_per_dtype(self, box, dtype, any_skipna_inferred_dtype):
        # one instance of parametrized fixture
        inferred_dtype, values = any_skipna_inferred_dtype

        t = box(values, dtype=dtype)  # explicit dtype to avoid casting

        # TODO: get rid of these xfails
        if dtype == 'category' and inferred_dtype in ['period', 'interval']:
            pytest.xfail(reason='Conversion to numpy array fails because '
                         'the ._values-attribute is not a numpy array for '
                         'PeriodArray/IntervalArray; see GH 23553')
        if box == Index and inferred_dtype in ['empty', 'bytes']:
            pytest.xfail(reason='Raising too restrictively; '
                         'solved by GH 23167')
        if (box == Index and dtype == object
                and inferred_dtype in ['boolean', 'date', 'time']):
            pytest.xfail(reason='Inferring incorrectly because of NaNs; '
                         'solved by GH 23167')
        if (box == Series
                and (dtype == object and inferred_dtype not in [
                    'string', 'unicode', 'empty',
                    'bytes', 'mixed', 'mixed-integer'])
                or (dtype == 'category'
                    and inferred_dtype in ['decimal', 'boolean', 'time'])):
            pytest.xfail(reason='Not raising correctly; solved by GH 23167')

        types_passing_constructor = ['string', 'unicode', 'empty',
                                     'bytes', 'mixed', 'mixed-integer']
        if inferred_dtype in types_passing_constructor:
            # GH 6106
            assert isinstance(t.str, strings.StringMethods)
        else:
            # GH 9184, GH 23011, GH 23163
            with pytest.raises(AttributeError, match='Can only use .str '
                               'accessor with string values.*'):
                t.str
            assert not hasattr(t, 'str') 
Example #13
Source File: test_strings.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_api(self):

        # GH 6106, GH 9322
        assert Series.str is strings.StringMethods
        assert isinstance(Series(['']).str, strings.StringMethods) 
Example #14
Source File: test_strings.py    From recruit with Apache License 2.0 5 votes vote down vote up
def any_string_method(request):
    """
    Fixture for all public methods of `StringMethods`

    This fixture returns a tuple of the method name and sample arguments
    necessary to call the method.

    Returns
    -------
    method_name : str
        The name of the method in `StringMethods`
    args : tuple
        Sample values for the positional arguments
    kwargs : dict
        Sample values for the keyword arguments

    Examples
    --------
    >>> def test_something(any_string_method):
    ...     s = pd.Series(['a', 'b', np.nan, 'd'])
    ...
    ...     method_name, args, kwargs = any_string_method
    ...     method = getattr(s.str, method_name)
    ...     # will not raise
    ...     method(*args, **kwargs)
    """
    return request.param


# subset of the full set from pandas/conftest.py 
Example #15
Source File: test_strings.py    From vnpy_crypto with MIT License 4 votes vote down vote up
def test_index_str_accessor_visibility(self):
        from pandas.core.strings import StringMethods

        if not compat.PY3:
            cases = [(['a', 'b'], 'string'), (['a', u('b')], 'mixed'),
                     ([u('a'), u('b')], 'unicode'),
                     (['a', 'b', 1], 'mixed-integer'),
                     (['a', 'b', 1.3], 'mixed'),
                     (['a', 'b', 1.3, 1], 'mixed-integer'),
                     (['aa', datetime(2011, 1, 1)], 'mixed')]
        else:
            cases = [(['a', 'b'], 'string'), (['a', u('b')], 'string'),
                     ([u('a'), u('b')], 'string'),
                     (['a', 'b', 1], 'mixed-integer'),
                     (['a', 'b', 1.3], 'mixed'),
                     (['a', 'b', 1.3, 1], 'mixed-integer'),
                     (['aa', datetime(2011, 1, 1)], 'mixed')]
        for values, tp in cases:
            idx = Index(values)
            assert isinstance(Series(values).str, StringMethods)
            assert isinstance(idx.str, StringMethods)
            assert idx.inferred_type == tp

        for values, tp in cases:
            idx = Index(values)
            assert isinstance(Series(values).str, StringMethods)
            assert isinstance(idx.str, StringMethods)
            assert idx.inferred_type == tp

        cases = [([1, np.nan], 'floating'),
                 ([datetime(2011, 1, 1)], 'datetime64'),
                 ([timedelta(1)], 'timedelta64')]
        for values, tp in cases:
            idx = Index(values)
            message = 'Can only use .str accessor with string values'
            with tm.assert_raises_regex(AttributeError, message):
                Series(values).str
            with tm.assert_raises_regex(AttributeError, message):
                idx.str
            assert idx.inferred_type == tp

        # MultiIndex has mixed dtype, but not allow to use accessor
        idx = MultiIndex.from_tuples([('a', 'b'), ('a', 'b')])
        assert idx.inferred_type == 'mixed'
        message = 'Can only use .str accessor with Index, not MultiIndex'
        with tm.assert_raises_regex(AttributeError, message):
            idx.str 
Example #16
Source File: test_strings.py    From elasticintel with GNU General Public License v3.0 4 votes vote down vote up
def test_index_str_accessor_visibility(self):
        from pandas.core.strings import StringMethods

        if not compat.PY3:
            cases = [(['a', 'b'], 'string'), (['a', u('b')], 'mixed'),
                     ([u('a'), u('b')], 'unicode'),
                     (['a', 'b', 1], 'mixed-integer'),
                     (['a', 'b', 1.3], 'mixed'),
                     (['a', 'b', 1.3, 1], 'mixed-integer'),
                     (['aa', datetime(2011, 1, 1)], 'mixed')]
        else:
            cases = [(['a', 'b'], 'string'), (['a', u('b')], 'string'),
                     ([u('a'), u('b')], 'string'),
                     (['a', 'b', 1], 'mixed-integer'),
                     (['a', 'b', 1.3], 'mixed'),
                     (['a', 'b', 1.3, 1], 'mixed-integer'),
                     (['aa', datetime(2011, 1, 1)], 'mixed')]
        for values, tp in cases:
            idx = Index(values)
            assert isinstance(Series(values).str, StringMethods)
            assert isinstance(idx.str, StringMethods)
            assert idx.inferred_type == tp

        for values, tp in cases:
            idx = Index(values)
            assert isinstance(Series(values).str, StringMethods)
            assert isinstance(idx.str, StringMethods)
            assert idx.inferred_type == tp

        cases = [([1, np.nan], 'floating'),
                 ([datetime(2011, 1, 1)], 'datetime64'),
                 ([timedelta(1)], 'timedelta64')]
        for values, tp in cases:
            idx = Index(values)
            message = 'Can only use .str accessor with string values'
            with tm.assert_raises_regex(AttributeError, message):
                Series(values).str
            with tm.assert_raises_regex(AttributeError, message):
                idx.str
            assert idx.inferred_type == tp

        # MultiIndex has mixed dtype, but not allow to use accessor
        idx = MultiIndex.from_tuples([('a', 'b'), ('a', 'b')])
        assert idx.inferred_type == 'mixed'
        message = 'Can only use .str accessor with Index, not MultiIndex'
        with tm.assert_raises_regex(AttributeError, message):
            idx.str 
Example #17
Source File: test_strings.py    From twitter-stock-recommendation with MIT License 4 votes vote down vote up
def test_index_str_accessor_visibility(self):
        from pandas.core.strings import StringMethods

        if not compat.PY3:
            cases = [(['a', 'b'], 'string'), (['a', u('b')], 'mixed'),
                     ([u('a'), u('b')], 'unicode'),
                     (['a', 'b', 1], 'mixed-integer'),
                     (['a', 'b', 1.3], 'mixed'),
                     (['a', 'b', 1.3, 1], 'mixed-integer'),
                     (['aa', datetime(2011, 1, 1)], 'mixed')]
        else:
            cases = [(['a', 'b'], 'string'), (['a', u('b')], 'string'),
                     ([u('a'), u('b')], 'string'),
                     (['a', 'b', 1], 'mixed-integer'),
                     (['a', 'b', 1.3], 'mixed'),
                     (['a', 'b', 1.3, 1], 'mixed-integer'),
                     (['aa', datetime(2011, 1, 1)], 'mixed')]
        for values, tp in cases:
            idx = Index(values)
            assert isinstance(Series(values).str, StringMethods)
            assert isinstance(idx.str, StringMethods)
            assert idx.inferred_type == tp

        for values, tp in cases:
            idx = Index(values)
            assert isinstance(Series(values).str, StringMethods)
            assert isinstance(idx.str, StringMethods)
            assert idx.inferred_type == tp

        cases = [([1, np.nan], 'floating'),
                 ([datetime(2011, 1, 1)], 'datetime64'),
                 ([timedelta(1)], 'timedelta64')]
        for values, tp in cases:
            idx = Index(values)
            message = 'Can only use .str accessor with string values'
            with tm.assert_raises_regex(AttributeError, message):
                Series(values).str
            with tm.assert_raises_regex(AttributeError, message):
                idx.str
            assert idx.inferred_type == tp

        # MultiIndex has mixed dtype, but not allow to use accessor
        idx = MultiIndex.from_tuples([('a', 'b'), ('a', 'b')])
        assert idx.inferred_type == 'mixed'
        message = 'Can only use .str accessor with Index, not MultiIndex'
        with tm.assert_raises_regex(AttributeError, message):
            idx.str