Python Examples of pandas.core.algorithms.take

Source File: categorical.py From predictive-maintenance-using-machine-learning with Apache License 2.0

6 votes

def __array__(self, dtype=None):
        """
        The numpy array interface.

        Returns
        -------
        values : numpy array
            A numpy array of either the specified dtype or,
            if dtype==None (default), the same dtype as
            categorical.categories.dtype
        """
        ret = take_1d(self.categories.values, self._codes)
        if dtype and not is_dtype_equal(dtype, self.categories.dtype):
            return np.asarray(ret, dtype)
        if is_extension_array_dtype(ret):
            # When we're a Categorical[ExtensionArray], like Interval,
            # we need to ensure __array__ get's all the way to an
            # ndarray.
            ret = np.asarray(ret)
        return ret

Source File: groupby.py From elasticintel with GNU General Public License v3.0

6 votes

def _transform_fast(self, result, obj):
        """
        Fast transform path for aggregations
        """
        # if there were groups with no observations (Categorical only?)
        # try casting data to original dtype
        cast = (self.size().fillna(0) > 0).any()

        # for each col, reshape to to size of original frame
        # by take operation
        ids, _, ngroup = self.grouper.group_info
        output = []
        for i, _ in enumerate(result.columns):
            res = algorithms.take_1d(result.iloc[:, i].values, ids)
            if cast:
                res = self._try_cast(res, obj.iloc[:, i])
            output.append(res)

        return DataFrame._from_arrays(output, columns=result.columns,
                                      index=obj.index)

Source File: categorical.py From recruit with Apache License 2.0

6 votes

def __array__(self, dtype=None):
        """
        The numpy array interface.

        Returns
        -------
        values : numpy array
            A numpy array of either the specified dtype or,
            if dtype==None (default), the same dtype as
            categorical.categories.dtype
        """
        ret = take_1d(self.categories.values, self._codes)
        if dtype and not is_dtype_equal(dtype, self.categories.dtype):
            return np.asarray(ret, dtype)
        if is_extension_array_dtype(ret):
            # When we're a Categorical[ExtensionArray], like Interval,
            # we need to ensure __array__ get's all the way to an
            # ndarray.
            ret = np.asarray(ret)
        return ret

Source File: multi.py From elasticintel with GNU General Public License v3.0

6 votes

def values(self):
        if self._tuples is not None:
            return self._tuples

        values = []
        for lev, lab in zip(self.levels, self.labels):
            # Need to box timestamps, etc.
            box = hasattr(lev, '_box_values')
            # Try to minimize boxing.
            if box and len(lev) > len(lab):
                taken = lev._box_values(algos.take_1d(lev._values, lab))
            elif box:
                taken = algos.take_1d(lev._box_values(lev._values), lab,
                                      fill_value=_get_na_value(lev.dtype.type))
            else:
                taken = algos.take_1d(np.asarray(lev._values), lab)
            values.append(taken)

        self._tuples = lib.fast_zip(values)
        return self._tuples

    # fml

Source File: multi.py From elasticintel with GNU General Public License v3.0

6 votes

def _get_level_values(self, level):
        """
        Return vector of label values for requested level,
        equal to the length of the index

        **this is an internal method**

        Parameters
        ----------
        level : int level

        Returns
        -------
        values : ndarray
        """

        unique = self.levels[level]
        labels = self.labels[level]
        filled = algos.take_1d(unique._values, labels,
                               fill_value=unique._na_value)
        values = unique._shallow_copy(filled)
        return values

Source File: managers.py From recruit with Apache License 2.0

6 votes

def combine(self, blocks, copy=True):
        """ return a new manager with the blocks """
        if len(blocks) == 0:
            return self.make_empty()

        # FIXME: optimization potential
        indexer = np.sort(np.concatenate([b.mgr_locs.as_array
                                          for b in blocks]))
        inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0])

        new_blocks = []
        for b in blocks:
            b = b.copy(deep=copy)
            b.mgr_locs = algos.take_1d(inv_indexer, b.mgr_locs.as_array,
                                       axis=0, allow_fill=False)
            new_blocks.append(b)

        axes = list(self.axes)
        axes[0] = self.items.take(indexer)

        return self.__class__(new_blocks, axes, do_integrity_check=False)

Source File: internals.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def combine(self, blocks, copy=True):
        """ return a new manager with the blocks """
        if len(blocks) == 0:
            return self.make_empty()

        # FIXME: optimization potential
        indexer = np.sort(np.concatenate([b.mgr_locs.as_array
                                          for b in blocks]))
        inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0])

        new_blocks = []
        for b in blocks:
            b = b.copy(deep=copy)
            b.mgr_locs = algos.take_1d(inv_indexer, b.mgr_locs.as_array,
                                       axis=0, allow_fill=False)
            new_blocks.append(b)

        axes = list(self.axes)
        axes[0] = self.items.take(indexer)

        return self.__class__(new_blocks, axes, do_integrity_check=False)

Source File: generic.py From recruit with Apache License 2.0

6 votes

def _transform_fast(self, result, obj, func_nm):
        """
        Fast transform path for aggregations
        """
        # if there were groups with no observations (Categorical only?)
        # try casting data to original dtype
        cast = self._transform_should_cast(func_nm)

        # for each col, reshape to to size of original frame
        # by take operation
        ids, _, ngroup = self.grouper.group_info
        output = []
        for i, _ in enumerate(result.columns):
            res = algorithms.take_1d(result.iloc[:, i].values, ids)
            if cast:
                res = self._try_cast(res, obj.iloc[:, i])
            output.append(res)

        return DataFrame._from_arrays(output, columns=result.columns,
                                      index=obj.index)

Source File: groupby.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def _transform_fast(self, result, obj):
        """
        Fast transform path for aggregations
        """
        # if there were groups with no observations (Categorical only?)
        # try casting data to original dtype
        cast = (self.size().fillna(0) > 0).any()

        # for each col, reshape to to size of original frame
        # by take operation
        ids, _, ngroup = self.grouper.group_info
        output = []
        for i, _ in enumerate(result.columns):
            res = algorithms.take_1d(result.iloc[:, i].values, ids)
            if cast:
                res = self._try_cast(res, obj.iloc[:, i])
            output.append(res)

        return DataFrame._from_arrays(output, columns=result.columns,
                                      index=obj.index)

Source File: multi.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def _get_level_values(self, level):
        """
        Return vector of label values for requested level,
        equal to the length of the index

        **this is an internal method**

        Parameters
        ----------
        level : int level

        Returns
        -------
        values : ndarray
        """

        unique = self.levels[level]
        labels = self.labels[level]
        filled = algos.take_1d(unique._values, labels,
                               fill_value=unique._na_value)
        values = unique._shallow_copy(filled)
        return values

Source File: groupby.py From vnpy_crypto with MIT License

6 votes

def _transform_fast(self, result, obj, func_nm):
        """
        Fast transform path for aggregations
        """
        # if there were groups with no observations (Categorical only?)
        # try casting data to original dtype
        cast = self._transform_should_cast(func_nm)

        # for each col, reshape to to size of original frame
        # by take operation
        ids, _, ngroup = self.grouper.group_info
        output = []
        for i, _ in enumerate(result.columns):
            res = algorithms.take_1d(result.iloc[:, i].values, ids)
            if cast:
                res = self._try_cast(res, obj.iloc[:, i])
            output.append(res)

        return DataFrame._from_arrays(output, columns=result.columns,
                                      index=obj.index)

Source File: multi.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def values(self):
        if self._tuples is not None:
            return self._tuples

        values = []
        for lev, lab in zip(self.levels, self.labels):
            # Need to box timestamps, etc.
            box = hasattr(lev, '_box_values')
            # Try to minimize boxing.
            if box and len(lev) > len(lab):
                taken = lev._box_values(algos.take_1d(lev._values, lab))
            elif box:
                taken = algos.take_1d(lev._box_values(lev._values), lab,
                                      fill_value=_get_na_value(lev.dtype.type))
            else:
                taken = algos.take_1d(np.asarray(lev._values), lab)
            values.append(taken)

        self._tuples = lib.fast_zip(values)
        return self._tuples

    # fml

Source File: internals.py From vnpy_crypto with MIT License

6 votes

def combine(self, blocks, copy=True):
        """ return a new manager with the blocks """
        if len(blocks) == 0:
            return self.make_empty()

        # FIXME: optimization potential
        indexer = np.sort(np.concatenate([b.mgr_locs.as_array
                                          for b in blocks]))
        inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0])

        new_blocks = []
        for b in blocks:
            b = b.copy(deep=copy)
            b.mgr_locs = algos.take_1d(inv_indexer, b.mgr_locs.as_array,
                                       axis=0, allow_fill=False)
            new_blocks.append(b)

        axes = list(self.axes)
        axes[0] = self.items.take(indexer)

        return self.__class__(new_blocks, axes, do_integrity_check=False)

Source File: generic.py From predictive-maintenance-using-machine-learning with Apache License 2.0

6 votes

def _transform_fast(self, result, obj, func_nm):
        """
        Fast transform path for aggregations
        """
        # if there were groups with no observations (Categorical only?)
        # try casting data to original dtype
        cast = self._transform_should_cast(func_nm)

        # for each col, reshape to to size of original frame
        # by take operation
        ids, _, ngroup = self.grouper.group_info
        output = []
        for i, _ in enumerate(result.columns):
            res = algorithms.take_1d(result.iloc[:, i].values, ids)
            if cast:
                res = self._try_cast(res, obj.iloc[:, i])
            output.append(res)

        return DataFrame._from_arrays(output, columns=result.columns,
                                      index=obj.index)

Source File: managers.py From predictive-maintenance-using-machine-learning with Apache License 2.0

6 votes

def combine(self, blocks, copy=True):
        """ return a new manager with the blocks """
        if len(blocks) == 0:
            return self.make_empty()

        # FIXME: optimization potential
        indexer = np.sort(np.concatenate([b.mgr_locs.as_array
                                          for b in blocks]))
        inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0])

        new_blocks = []
        for b in blocks:
            b = b.copy(deep=copy)
            b.mgr_locs = algos.take_1d(inv_indexer, b.mgr_locs.as_array,
                                       axis=0, allow_fill=False)
            new_blocks.append(b)

        axes = list(self.axes)
        axes[0] = self.items.take(indexer)

        return self.__class__(new_blocks, axes, do_integrity_check=False)

Source File: test_take.py From elasticintel with GNU General Public License v3.0

5 votes

def test_1d_fill_nonna(self):
        def _test_dtype(dtype, fill_value, out_dtype):
            data = np.random.randint(0, 2, 4).astype(dtype)

            indexer = [2, 1, 0, -1]

            result = algos.take_1d(data, indexer, fill_value=fill_value)
            assert ((result[[0, 1, 2]] == data[[2, 1, 0]]).all())
            assert (result[3] == fill_value)
            assert (result.dtype == out_dtype)

            indexer = [2, 1, 0, 1]

            result = algos.take_1d(data, indexer, fill_value=fill_value)
            assert ((result[[0, 1, 2, 3]] == data[indexer]).all())
            assert (result.dtype == dtype)

        _test_dtype(np.int8, np.int16(127), np.int8)
        _test_dtype(np.int8, np.int16(128), np.int16)
        _test_dtype(np.int32, 1, np.int32)
        _test_dtype(np.int32, 2.0, np.float64)
        _test_dtype(np.int32, 3.0 + 4.0j, np.complex128)
        _test_dtype(np.int32, True, np.object_)
        _test_dtype(np.int32, '', np.object_)
        _test_dtype(np.float64, 1, np.float64)
        _test_dtype(np.float64, 2.0, np.float64)
        _test_dtype(np.float64, 3.0 + 4.0j, np.complex128)
        _test_dtype(np.float64, True, np.object_)
        _test_dtype(np.float64, '', np.object_)
        _test_dtype(np.complex128, 1, np.complex128)
        _test_dtype(np.complex128, 2.0, np.complex128)
        _test_dtype(np.complex128, 3.0 + 4.0j, np.complex128)
        _test_dtype(np.complex128, True, np.object_)
        _test_dtype(np.complex128, '', np.object_)
        _test_dtype(np.bool_, 1, np.object_)
        _test_dtype(np.bool_, 2.0, np.object_)
        _test_dtype(np.bool_, 3.0 + 4.0j, np.object_)
        _test_dtype(np.bool_, True, np.bool_)
        _test_dtype(np.bool_, '', np.object_)

Source File: test_take.py From elasticintel with GNU General Public License v3.0

5 votes

def test_1d_with_out(self):
        def _test_dtype(dtype, can_hold_na, writeable=True):
            data = np.random.randint(0, 2, 4).astype(dtype)
            data.flags.writeable = writeable

            indexer = [2, 1, 0, 1]
            out = np.empty(4, dtype=dtype)
            algos.take_1d(data, indexer, out=out)
            expected = data.take(indexer)
            tm.assert_almost_equal(out, expected)

            indexer = [2, 1, 0, -1]
            out = np.empty(4, dtype=dtype)
            if can_hold_na:
                algos.take_1d(data, indexer, out=out)
                expected = data.take(indexer)
                expected[3] = np.nan
                tm.assert_almost_equal(out, expected)
            else:
                with tm.assert_raises_regex(TypeError, self.fill_error):
                    algos.take_1d(data, indexer, out=out)
                # no exception o/w
                data.take(indexer, out=out)

        for writeable in [True, False]:
            # Check that take_nd works both with writeable arrays (in which
            # case fast typed memoryviews implementation) and read-only
            # arrays alike.
            _test_dtype(np.float64, True, writeable=writeable)
            _test_dtype(np.float32, True, writeable=writeable)
            _test_dtype(np.uint64, False, writeable=writeable)
            _test_dtype(np.uint32, False, writeable=writeable)
            _test_dtype(np.uint16, False, writeable=writeable)
            _test_dtype(np.uint8, False, writeable=writeable)
            _test_dtype(np.int64, False, writeable=writeable)
            _test_dtype(np.int32, False, writeable=writeable)
            _test_dtype(np.int16, False, writeable=writeable)
            _test_dtype(np.int8, False, writeable=writeable)
            _test_dtype(np.object_, True, writeable=writeable)
            _test_dtype(np.bool, False, writeable=writeable)

Source File: test_take.py From elasticintel with GNU General Public License v3.0

5 votes

def test_1d_other_dtypes(self):
        arr = np.random.randn(10).astype(np.float32)

        indexer = [1, 2, 3, -1]
        result = algos.take_1d(arr, indexer)
        expected = arr.take(indexer)
        expected[-1] = np.nan
        tm.assert_almost_equal(result, expected)

Source File: category.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def get_indexer(self, target, method=None, limit=None, tolerance=None):
        method = missing.clean_reindex_fill_method(method)
        target = ibase._ensure_index(target)

        if self.is_unique and self.equals(target):
            return np.arange(len(self), dtype='intp')

        if method == 'pad' or method == 'backfill':
            raise NotImplementedError("method='pad' and method='backfill' not "
                                      "implemented yet for CategoricalIndex")
        elif method == 'nearest':
            raise NotImplementedError("method='nearest' not implemented yet "
                                      'for CategoricalIndex')

        if (isinstance(target, CategoricalIndex) and
                self.values.is_dtype_equal(target)):
            # we have the same codes
            codes = target.codes
        else:
            if isinstance(target, CategoricalIndex):
                code_indexer = self.categories.get_indexer(target.categories)
                codes = take_1d(code_indexer, target.codes, fill_value=-1)
            else:
                codes = self.categories.get_indexer(target)

        indexer, _ = self._engine.get_indexer_non_unique(codes)
        return _ensure_platform_int(indexer)

Source File: frame.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def _reindex_with_indexers(self, reindexers, method=None, fill_value=None,
                               limit=None, copy=False, allow_dups=False):

        if method is not None or limit is not None:
            raise NotImplementedError("cannot reindex with a method or limit "
                                      "with sparse")

        if fill_value is None:
            fill_value = np.nan

        index, row_indexer = reindexers.get(0, (None, None))
        columns, col_indexer = reindexers.get(1, (None, None))

        if columns is None:
            columns = self.columns

        new_arrays = {}
        for col in columns:
            if col not in self:
                continue
            if row_indexer is not None:
                new_arrays[col] = algos.take_1d(self[col].get_values(),
                                                row_indexer,
                                                fill_value=fill_value)
            else:
                new_arrays[col] = self[col]

        return self._constructor(new_arrays, index=index,
                                 columns=columns).__finalize__(self)

Source File: series.py From elasticintel with GNU General Public License v3.0

5 votes

def _reindex_indexer(self, new_index, indexer, copy):
        if indexer is None:
            if copy:
                return self.copy()
            return self

        # be subclass-friendly
        new_values = algorithms.take_1d(self.get_values(), indexer)
        return self._constructor(new_values, index=new_index)

Source File: resample.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def _take_new_index(obj, indexer, new_index, axis=0):
    from pandas.core.api import Series, DataFrame

    if isinstance(obj, Series):
        new_values = algos.take_1d(obj.values, indexer)
        return Series(new_values, index=new_index, name=obj.name)
    elif isinstance(obj, DataFrame):
        if axis == 1:
            raise NotImplementedError("axis 1 is not supported")
        return DataFrame(obj._data.reindex_indexer(
            new_axis=new_index, indexer=indexer, axis=1))
    else:
        raise ValueError("'obj' should be either a Series or a DataFrame")

Source File: series.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def _reindex_indexer(self, new_index, indexer, copy):
        if indexer is None:
            if copy:
                return self.copy()
            return self

        new_values = algorithms.take_1d(self._values, indexer,
                                        allow_fill=True, fill_value=None)
        return self._constructor(new_values, index=new_index)

Source File: resample.py From elasticintel with GNU General Public License v3.0

5 votes

def _take_new_index(obj, indexer, new_index, axis=0):
    from pandas.core.api import Series, DataFrame

    if isinstance(obj, Series):
        new_values = algos.take_1d(obj.values, indexer)
        return Series(new_values, index=new_index, name=obj.name)
    elif isinstance(obj, DataFrame):
        if axis == 1:
            raise NotImplementedError("axis 1 is not supported")
        return DataFrame(obj._data.reindex_indexer(
            new_axis=new_index, indexer=indexer, axis=1))
    else:
        raise ValueError("'obj' should be either a Series or a DataFrame")

Source File: accessors.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def _delegate_property_get(self, name):
        from pandas import Series
        values = self._get_values()

        result = getattr(values, name)

        # maybe need to upcast (ints)
        if isinstance(result, np.ndarray):
            if is_integer_dtype(result):
                result = result.astype('int64')
        elif not is_list_like(result):
            return result

        result = np.asarray(result)

        # blow up if we operate on categories
        if self.orig is not None:
            result = take_1d(result, self.orig.cat.codes)
            index = self.orig.index
        else:
            index = self._parent.index
        # return the result as a Series, which is by definition a copy
        result = Series(result, index=index, name=self.name)

        # setting this object will show a SettingWithCopyWarning/Error
        result._is_copy = ("modifications to a property of a datetimelike "
                           "object are not supported and are discarded. "
                           "Change values on the original.")

        return result

Source File: multi.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def _get_level_values(self, level, unique=False):
        """
        Return vector of label values for requested level,
        equal to the length of the index

        **this is an internal method**

        Parameters
        ----------
        level : int level
        unique : bool, default False
            if True, drop duplicated values

        Returns
        -------
        values : ndarray
        """

        values = self.levels[level]
        level_codes = self.codes[level]
        if unique:
            level_codes = algos.unique(level_codes)
        filled = algos.take_1d(values._values, level_codes,
                               fill_value=values._na_value)
        values = values._shallow_copy(filled)
        return values

Source File: categorical.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def _recode_for_categories(codes, old_categories, new_categories):
    """
    Convert a set of codes for to a new set of categories

    Parameters
    ----------
    codes : array
    old_categories, new_categories : Index

    Returns
    -------
    new_codes : array

    Examples
    --------
    >>> old_cat = pd.Index(['b', 'a', 'c'])
    >>> new_cat = pd.Index(['a', 'b'])
    >>> codes = np.array([0, 1, 1, 2])
    >>> _recode_for_categories(codes, old_cat, new_cat)
    array([ 1,  0,  0, -1])
    """
    from pandas.core.algorithms import take_1d

    if len(old_categories) == 0:
        # All null anyway, so just retain the nulls
        return codes.copy()
    elif new_categories.equals(old_categories):
        # Same categories, so no need to actually recode
        return codes.copy()
    indexer = coerce_indexer_dtype(new_categories.get_indexer(old_categories),
                                   new_categories)
    new_codes = take_1d(indexer, codes.copy(), fill_value=-1)
    return new_codes

Source File: internals.py From elasticintel with GNU General Public License v3.0

5 votes

def get_dtypes(self):
        dtypes = np.array([blk.dtype for blk in self.blocks])
        return algos.take_1d(dtypes, self._blknos, allow_fill=False)

Source File: internals.py From elasticintel with GNU General Public License v3.0

5 votes

def get_ftypes(self):
        ftypes = np.array([blk.ftype for blk in self.blocks])
        return algos.take_1d(ftypes, self._blknos, allow_fill=False)

Source File: groupby.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def _transform_fast(self, func):
        """
        fast version of transform, only applicable to
        builtin/cythonizable functions
        """
        if isinstance(func, compat.string_types):
            func = getattr(self, func)

        ids, _, ngroup = self.grouper.group_info
        cast = (self.size().fillna(0) > 0).any()
        out = algorithms.take_1d(func().values, ids)
        if cast:
            out = self._try_cast(out, self.obj)
        return Series(out, index=self.obj.index, name=self.obj.name)

Python pandas.core.algorithms.take_1d() Examples