Python pandas.core.internals.BlockManager() Examples

The following are 30 code examples of pandas.core.internals.BlockManager(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.core.internals , or try the search function .
Example #1
Source File: groupby.py    From Computable with MIT License 7 votes vote down vote up
def _wrap_agged_blocks(self, blocks):
        obj = self._obj_with_exclusions

        new_axes = list(obj._data.axes)

        # more kludge
        if self.axis == 0:
            new_axes[0], new_axes[1] = new_axes[1], self.grouper.result_index
        else:
            new_axes[self.axis] = self.grouper.result_index

        mgr = BlockManager(blocks, new_axes)

        new_obj = type(obj)(mgr)

        return self._post_process_cython_aggregate(new_obj) 
Example #2
Source File: pytables.py    From Computable with MIT License 6 votes vote down vote up
def read(self, **kwargs):
        self.validate_read(kwargs)

        axes = []
        for i in range(self.ndim):
            ax = self.read_index('axis%d' % i)
            axes.append(ax)

        items = axes[0]
        blocks = []
        for i in range(self.nblocks):
            blk_items = self.read_index('block%d_items' % i)
            values = self.read_array('block%d_values' % i)
            blk = make_block(values, blk_items, items)
            blocks.append(blk)

        return self.obj_type(BlockManager(blocks, axes)) 
Example #3
Source File: test_internals.py    From coffeegrindsize with MIT License 6 votes vote down vote up
def test_equals_block_order_different_dtypes(self):
        # GH 9330

        mgr_strings = [
            "a:i8;b:f8",  # basic case
            "a:i8;b:f8;c:c8;d:b",  # many types
            "a:i8;e:dt;f:td;g:string",  # more types
            "a:i8;b:category;c:category2;d:category2",  # categories
            "c:sparse;d:sparse_na;b:f8",  # sparse
        ]

        for mgr_string in mgr_strings:
            bm = create_mgr(mgr_string)
            block_perms = itertools.permutations(bm.blocks)
            for bm_perm in block_perms:
                bm_this = BlockManager(bm_perm, bm.axes)
                assert bm.equals(bm_this)
                assert bm_this.equals(bm) 
Example #4
Source File: test_internals.py    From coffeegrindsize with MIT License 6 votes vote down vote up
def test_duplicate_ref_loc_failure(self):
        tmp_mgr = create_mgr('a:bool; a: f8')

        axes, blocks = tmp_mgr.axes, tmp_mgr.blocks

        blocks[0].mgr_locs = np.array([0])
        blocks[1].mgr_locs = np.array([0])

        # test trying to create block manager with overlapping ref locs
        with pytest.raises(AssertionError):
            BlockManager(blocks, axes)

        blocks[0].mgr_locs = np.array([0])
        blocks[1].mgr_locs = np.array([1])
        mgr = BlockManager(blocks, axes)
        mgr.iget(1) 
Example #5
Source File: blocks.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def _extend_blocks(result, blocks=None):
    """ return a new extended blocks, givin the result """
    from pandas.core.internals import BlockManager
    if blocks is None:
        blocks = []
    if isinstance(result, list):
        for r in result:
            if isinstance(r, list):
                blocks.extend(r)
            else:
                blocks.append(r)
    elif isinstance(result, BlockManager):
        blocks.extend(result.blocks)
    else:
        blocks.append(result)
    return blocks 
Example #6
Source File: test_internals.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_equals_block_order_different_dtypes(self):
        # GH 9330

        mgr_strings = [
            "a:i8;b:f8",  # basic case
            "a:i8;b:f8;c:c8;d:b",  # many types
            "a:i8;e:dt;f:td;g:string",  # more types
            "a:i8;b:category;c:category2;d:category2",  # categories
            "c:sparse;d:sparse_na;b:f8",  # sparse
        ]

        for mgr_string in mgr_strings:
            bm = create_mgr(mgr_string)
            block_perms = itertools.permutations(bm.blocks)
            for bm_perm in block_perms:
                bm_this = BlockManager(bm_perm, bm.axes)
                assert bm.equals(bm_this)
                assert bm_this.equals(bm) 
Example #7
Source File: test_internals.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_duplicate_ref_loc_failure(self):
        tmp_mgr = create_mgr('a:bool; a: f8')

        axes, blocks = tmp_mgr.axes, tmp_mgr.blocks

        blocks[0].mgr_locs = np.array([0])
        blocks[1].mgr_locs = np.array([0])

        # test trying to create block manager with overlapping ref locs
        with pytest.raises(AssertionError):
            BlockManager(blocks, axes)

        blocks[0].mgr_locs = np.array([0])
        blocks[1].mgr_locs = np.array([1])
        mgr = BlockManager(blocks, axes)
        mgr.iget(1) 
Example #8
Source File: generic.py    From Computable with MIT License 6 votes vote down vote up
def _init_mgr(self, mgr, axes=None, dtype=None, copy=False):
        """ passed a manager and a axes dict """
        for a, axe in axes.items():
            if axe is not None:
                mgr = mgr.reindex_axis(
                    axe, axis=self._get_block_manager_axis(a), copy=False)

        # do not copy BlockManager unless explicitly done
        if copy and dtype is None:
            mgr = mgr.copy()
        elif dtype is not None:
            # avoid copy if we can
            if len(mgr.blocks) > 1 or mgr.blocks[0].values.dtype != dtype:
                mgr = mgr.astype(dtype)
        return mgr

    #----------------------------------------------------------------------
    # Construction 
Example #9
Source File: blocks.py    From recruit with Apache License 2.0 6 votes vote down vote up
def _extend_blocks(result, blocks=None):
    """ return a new extended blocks, givin the result """
    from pandas.core.internals import BlockManager
    if blocks is None:
        blocks = []
    if isinstance(result, list):
        for r in result:
            if isinstance(r, list):
                blocks.extend(r)
            else:
                blocks.append(r)
    elif isinstance(result, BlockManager):
        blocks.extend(result.blocks)
    else:
        blocks.append(result)
    return blocks 
Example #10
Source File: test_internals.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_equals_block_order_different_dtypes(self):
        # GH 9330

        mgr_strings = [
            "a:i8;b:f8",  # basic case
            "a:i8;b:f8;c:c8;d:b",  # many types
            "a:i8;e:dt;f:td;g:string",  # more types
            "a:i8;b:category;c:category2;d:category2",  # categories
            "c:sparse;d:sparse_na;b:f8",  # sparse
        ]

        for mgr_string in mgr_strings:
            bm = create_mgr(mgr_string)
            block_perms = itertools.permutations(bm.blocks)
            for bm_perm in block_perms:
                bm_this = BlockManager(bm_perm, bm.axes)
                assert bm.equals(bm_this)
                assert bm_this.equals(bm) 
Example #11
Source File: test_internals.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_duplicate_ref_loc_failure(self):
        tmp_mgr = create_mgr('a:bool; a: f8')

        axes, blocks = tmp_mgr.axes, tmp_mgr.blocks

        blocks[0].mgr_locs = np.array([0])
        blocks[1].mgr_locs = np.array([0])

        # test trying to create block manager with overlapping ref locs
        with pytest.raises(AssertionError):
            BlockManager(blocks, axes)

        blocks[0].mgr_locs = np.array([0])
        blocks[1].mgr_locs = np.array([1])
        mgr = BlockManager(blocks, axes)
        mgr.iget(1) 
Example #12
Source File: test_internals.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_equals(self):
        # unique items
        bm1 = create_mgr('a,b,c: i8-1; d,e,f: i8-2')
        bm2 = BlockManager(bm1.blocks[::-1], bm1.axes)
        assert bm1.equals(bm2)

        bm1 = create_mgr('a,a,a: i8-1; b,b,b: i8-2')
        bm2 = BlockManager(bm1.blocks[::-1], bm1.axes)
        assert bm1.equals(bm2) 
Example #13
Source File: test_external_block.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_custom_repr():
    values = np.arange(3, dtype='int64')

    # series
    block = CustomBlock(values, placement=slice(0, 3))

    s = pd.Series(SingleBlockManager(block, pd.RangeIndex(3)))
    assert repr(s) == '0    Val: 0\n1    Val: 1\n2    Val: 2\ndtype: int64'

    # dataframe
    block = CustomBlock(values, placement=slice(0, 1))
    blk_mgr = BlockManager([block], [['col'], range(3)])
    df = pd.DataFrame(blk_mgr)
    assert repr(df) == '      col\n0  Val: 0\n1  Val: 1\n2  Val: 2' 
Example #14
Source File: test_internals.py    From coffeegrindsize with MIT License 5 votes vote down vote up
def test_equals(self):
        # unique items
        bm1 = create_mgr('a,b,c: i8-1; d,e,f: i8-2')
        bm2 = BlockManager(bm1.blocks[::-1], bm1.axes)
        assert bm1.equals(bm2)

        bm1 = create_mgr('a,a,a: i8-1; b,b,b: i8-2')
        bm2 = BlockManager(bm1.blocks[::-1], bm1.axes)
        assert bm1.equals(bm2) 
Example #15
Source File: test_internals.py    From coffeegrindsize with MIT License 5 votes vote down vote up
def test_get(self):
        cols = Index(list('abc'))
        values = np.random.rand(3, 3)
        block = make_block(values=values.copy(), placement=np.arange(3))
        mgr = BlockManager(blocks=[block], axes=[cols, np.arange(3)])

        assert_almost_equal(mgr.get('a', fastpath=False), values[0])
        assert_almost_equal(mgr.get('b', fastpath=False), values[1])
        assert_almost_equal(mgr.get('c', fastpath=False), values[2])
        assert_almost_equal(mgr.get('a').internal_values(), values[0])
        assert_almost_equal(mgr.get('b').internal_values(), values[1])
        assert_almost_equal(mgr.get('c').internal_values(), values[2]) 
Example #16
Source File: test_external_block.py    From coffeegrindsize with MIT License 5 votes vote down vote up
def test_custom_repr():
    values = np.arange(3, dtype='int64')

    # series
    block = CustomBlock(values, placement=slice(0, 3))

    s = pd.Series(SingleBlockManager(block, pd.RangeIndex(3)))
    assert repr(s) == '0    Val: 0\n1    Val: 1\n2    Val: 2\ndtype: int64'

    # dataframe
    block = CustomBlock(values, placement=slice(0, 1))
    blk_mgr = BlockManager([block], [['col'], range(3)])
    df = pd.DataFrame(blk_mgr)
    assert repr(df) == '      col\n0  Val: 0\n1  Val: 1\n2  Val: 2' 
Example #17
Source File: test_external_block.py    From coffeegrindsize with MIT License 5 votes vote down vote up
def df():
    df1 = pd.DataFrame({'a': [1, 2, 3]})
    blocks = df1._data.blocks
    values = np.arange(3, dtype='int64')
    custom_block = CustomBlock(values, placement=slice(1, 2))
    blocks = blocks + (custom_block,)
    block_manager = BlockManager(blocks, [pd.Index(['a', 'b']), df1.index])
    return pd.DataFrame(block_manager) 
Example #18
Source File: blocks.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def _get_unstack_items(self, unstacker, new_columns):
        """
        Get the placement, values, and mask for a Block unstack.

        This is shared between ObjectBlock and ExtensionBlock. They
        differ in that ObjectBlock passes the values, while ExtensionBlock
        passes the dummy ndarray of positions to be used by a take
        later.

        Parameters
        ----------
        unstacker : pandas.core.reshape.reshape._Unstacker
        new_columns : Index
            All columns of the unstacked BlockManager.

        Returns
        -------
        new_placement : ndarray[int]
            The placement of the new columns in `new_columns`.
        new_values : Union[ndarray, ExtensionArray]
            The first return value from _Unstacker.get_new_values.
        mask : ndarray[bool]
            The second return value from _Unstacker.get_new_values.
        """
        # shared with ExtensionBlock
        new_items = unstacker.get_new_columns()
        new_placement = new_columns.get_indexer(new_items)
        new_values, mask = unstacker.get_new_values()

        mask = mask.any(0)
        return new_placement, new_values, mask 
Example #19
Source File: blocks.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def _unstack(self, unstacker_func, new_columns, n_rows, fill_value):
        """Return a list of unstacked blocks of self

        Parameters
        ----------
        unstacker_func : callable
            Partially applied unstacker.
        new_columns : Index
            All columns of the unstacked BlockManager.
        n_rows : int
            Only used in ExtensionBlock.unstack
        fill_value : int
            Only used in ExtensionBlock.unstack

        Returns
        -------
        blocks : list of Block
            New blocks of unstacked values.
        mask : array_like of bool
            The mask of columns of `blocks` we should keep.
        """
        unstacker = unstacker_func(self.values.T)
        new_items = unstacker.get_new_columns()
        new_placement = new_columns.get_indexer(new_items)
        new_values, mask = unstacker.get_new_values()

        mask = mask.any(0)
        new_values = new_values.T[mask]
        new_placement = new_placement[mask]

        blocks = [make_block(new_values, placement=new_placement)]
        return blocks, mask 
Example #20
Source File: test_internals.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_get(self):
        cols = Index(list('abc'))
        values = np.random.rand(3, 3)
        block = make_block(values=values.copy(), placement=np.arange(3))
        mgr = BlockManager(blocks=[block], axes=[cols, np.arange(3)])

        assert_almost_equal(mgr.get('a', fastpath=False), values[0])
        assert_almost_equal(mgr.get('b', fastpath=False), values[1])
        assert_almost_equal(mgr.get('c', fastpath=False), values[2])
        assert_almost_equal(mgr.get('a').internal_values(), values[0])
        assert_almost_equal(mgr.get('b').internal_values(), values[1])
        assert_almost_equal(mgr.get('c').internal_values(), values[2]) 
Example #21
Source File: test_internals.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_equals(self):
        # unique items
        bm1 = create_mgr('a,b,c: i8-1; d,e,f: i8-2')
        bm2 = BlockManager(bm1.blocks[::-1], bm1.axes)
        assert bm1.equals(bm2)

        bm1 = create_mgr('a,a,a: i8-1; b,b,b: i8-2')
        bm2 = BlockManager(bm1.blocks[::-1], bm1.axes)
        assert bm1.equals(bm2) 
Example #22
Source File: test_internals.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_get(self):
        cols = Index(list('abc'))
        values = np.random.rand(3, 3)
        block = make_block(values=values.copy(), placement=np.arange(3))
        mgr = BlockManager(blocks=[block], axes=[cols, np.arange(3)])

        assert_almost_equal(mgr.get('a', fastpath=False), values[0])
        assert_almost_equal(mgr.get('b', fastpath=False), values[1])
        assert_almost_equal(mgr.get('c', fastpath=False), values[2])
        assert_almost_equal(mgr.get('a').internal_values(), values[0])
        assert_almost_equal(mgr.get('b').internal_values(), values[1])
        assert_almost_equal(mgr.get('c').internal_values(), values[2]) 
Example #23
Source File: blocks.py    From recruit with Apache License 2.0 5 votes vote down vote up
def _get_unstack_items(self, unstacker, new_columns):
        """
        Get the placement, values, and mask for a Block unstack.

        This is shared between ObjectBlock and ExtensionBlock. They
        differ in that ObjectBlock passes the values, while ExtensionBlock
        passes the dummy ndarray of positions to be used by a take
        later.

        Parameters
        ----------
        unstacker : pandas.core.reshape.reshape._Unstacker
        new_columns : Index
            All columns of the unstacked BlockManager.

        Returns
        -------
        new_placement : ndarray[int]
            The placement of the new columns in `new_columns`.
        new_values : Union[ndarray, ExtensionArray]
            The first return value from _Unstacker.get_new_values.
        mask : ndarray[bool]
            The second return value from _Unstacker.get_new_values.
        """
        # shared with ExtensionBlock
        new_items = unstacker.get_new_columns()
        new_placement = new_columns.get_indexer(new_items)
        new_values, mask = unstacker.get_new_values()

        mask = mask.any(0)
        return new_placement, new_values, mask 
Example #24
Source File: test_external_block.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_custom_repr():
    values = np.arange(3, dtype='int64')

    # series
    block = CustomBlock(values, placement=slice(0, 3))

    s = pd.Series(SingleBlockManager(block, pd.RangeIndex(3)))
    assert repr(s) == '0    Val: 0\n1    Val: 1\n2    Val: 2\ndtype: int64'

    # dataframe
    block = CustomBlock(values, placement=slice(0, 1))
    blk_mgr = BlockManager([block], [['col'], range(3)])
    df = pd.DataFrame(blk_mgr)
    assert repr(df) == '      col\n0  Val: 0\n1  Val: 1\n2  Val: 2' 
Example #25
Source File: test_external_block.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def df():
    df1 = pd.DataFrame({'a': [1, 2, 3]})
    blocks = df1._data.blocks
    values = np.arange(3, dtype='int64')
    custom_block = CustomBlock(values, placement=slice(1, 2))
    blocks = blocks + (custom_block,)
    block_manager = BlockManager(blocks, [pd.Index(['a', 'b']), df1.index])
    return pd.DataFrame(block_manager) 
Example #26
Source File: test_external_block.py    From recruit with Apache License 2.0 5 votes vote down vote up
def df():
    df1 = pd.DataFrame({'a': [1, 2, 3]})
    blocks = df1._data.blocks
    values = np.arange(3, dtype='int64')
    custom_block = CustomBlock(values, placement=slice(1, 2))
    blocks = blocks + (custom_block,)
    block_manager = BlockManager(blocks, [pd.Index(['a', 'b']), df1.index])
    return pd.DataFrame(block_manager) 
Example #27
Source File: groupby.py    From Computable with MIT License 5 votes vote down vote up
def _get_sorted_data(self):
        # this is the BlockManager
        data = self.data._data

        # this is sort of wasteful but...
        sorted_axis = data.axes[self.axis].take(self.sort_idx)
        sorted_data = data.reindex_axis(sorted_axis, axis=self.axis)

        return sorted_data 
Example #28
Source File: groupby.py    From Computable with MIT License 5 votes vote down vote up
def _wrap_agged_blocks(self, blocks):
        obj = self._obj_with_exclusions

        if self.axis == 0:
            agg_labels = obj.columns
        else:
            agg_labels = obj.index

        if sum(len(x.items) for x in blocks) == len(agg_labels):
            output_keys = agg_labels
        else:
            all_items = []
            for b in blocks:
                all_items.extend(b.items)
            output_keys = agg_labels[agg_labels.isin(all_items)]

            for blk in blocks:
                blk.set_ref_items(output_keys, maybe_rename=False)

        if not self.as_index:
            index = np.arange(blocks[0].values.shape[1])
            mgr = BlockManager(blocks, [output_keys, index])
            result = DataFrame(mgr)

            group_levels = self.grouper.get_group_levels()
            zipped = zip(self.grouper.names, group_levels)

            for i, (name, labels) in enumerate(zipped):
                result.insert(i, name, labels)
            result = result.consolidate()
        else:
            index = self.grouper.result_index
            mgr = BlockManager(blocks, [output_keys, index])
            result = DataFrame(mgr)

        if self.axis == 1:
            result = result.T

        return result.convert_objects() 
Example #29
Source File: generic.py    From Computable with MIT License 5 votes vote down vote up
def as_blocks(self, columns=None):
        """
        Convert the frame to a dict of dtype -> Constructor Types that each has
        a homogeneous dtype.

        are presented in sorted order unless a specific list of columns is
        provided.

        NOTE: the dtypes of the blocks WILL BE PRESERVED HERE (unlike in
              as_matrix)

        Parameters
        ----------
        columns : array-like
            Specific column order

        Returns
        -------
        values : a list of Object
        """
        self._consolidate_inplace()

        bd = dict()
        for b in self._data.blocks:
            b = b.reindex_items_from(columns or b.items)
            bd[str(b.dtype)] = self._constructor(
                BlockManager([b], [b.items, self.index])).__finalize__(self)
        return bd 
Example #30
Source File: generic.py    From Computable with MIT License 5 votes vote down vote up
def _from_axes(cls, data, axes):
        # for construction from BlockManager
        if isinstance(data, BlockManager):
            return cls(data)
        else:
            if cls._AXIS_REVERSED:
                axes = axes[::-1]
            d = cls._construct_axes_dict_from(cls, axes, copy=False)
            return cls(data, **d)