Python dask.array.concatenate() Examples

The following are 30 code examples of dask.array.concatenate(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module dask.array , or try the search function .
Example #1
Source File: test_split.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_blockwise_shufflesplit():
    splitter = dask_ml.model_selection.ShuffleSplit(random_state=0)
    assert splitter.get_n_splits() == 10
    gen = splitter.split(dX)

    train_idx, test_idx = next(gen)
    assert isinstance(train_idx, da.Array)
    assert isinstance(test_idx, da.Array)

    assert train_idx.shape == (99,)  # 90% of 110
    assert test_idx.shape == (11,)

    assert train_idx.chunks == ((45, 45, 9),)
    assert test_idx.chunks == ((5, 5, 1),)

    counts = pd.value_counts(train_idx.compute())
    assert counts.max() == 1

    N = len(X)

    np.testing.assert_array_equal(
        np.unique(da.concatenate([train_idx, test_idx])), np.arange(N)
    ) 
Example #2
Source File: __init__.py    From pyresample with GNU Lesser General Public License v3.0 6 votes vote down vote up
def _concatenate_chunks(chunks):
    """Concatenate chunks to full output array."""
    # Form the full array
    col, res = [], []
    prev_y = 0
    for y, x in sorted(chunks):
        if len(chunks[(y, x)]) > 1:
            chunk = da.nanmax(da.stack(chunks[(y, x)], axis=-1), axis=-1)
        else:
            chunk = chunks[(y, x)][0]
        if y == prev_y:
            col.append(chunk)
            continue
        res.append(da.concatenate(col, axis=1))
        col = [chunk]
        prev_y = y
    res.append(da.concatenate(col, axis=1))

    res = da.concatenate(res, axis=2).squeeze()

    return res 
Example #3
Source File: utils.py    From xmitgcm with MIT License 6 votes vote down vote up
def find_concat_dim(da, possible_concat_dims):
    """ look for available dimensions in dataaray and pick the one
    from a list of candidates

    PARAMETERS
    ----------
    da : xarray.DataArray
        xmitgcm llc data array
    possible_concat_dims : list
        list of potential dims

    RETURNS
    -------
    out : str
        dimension on which to concatenate

    """
    out = None
    for d in possible_concat_dims:
        if d in da.dims:
            out = d
    return out 
Example #4
Source File: utilities.py    From minian with GNU General Public License v3.0 6 votes vote down vote up
def handle_crash(varr, vpath, ssname, vlist, varr_list, frame_dict):
    seg1_list = list(filter(lambda v: re.search('seg1', v), vlist))
    seg2_list = list(filter(lambda v: re.search('seg2', v), vlist))
    if seg1_list and seg2_list:
        tframe = frame_dict[ssname]
        varr1 = darr.concatenate(
            list(compress(varr_list, seg1_list)),
            axis=0)
        varr2 = darr.concatenate(
            list(compress(varr_list, seg2_list)),
            axis=0)
        fm1, fm2 = varr1.shape[0], varr2.shape[0]
        fm_crds = varr.coords['frame']
        fm_crds1 = fm_crds.sel(frame=slice(None, fm1 - 1)).values
        fm_crds2 = fm_crds.sel(frame=slice(fm1, None)).values
        fm_crds2 = fm_crds2 + (tframe - fm_crds2.max())
        fm_crds_new = np.concatenate([fm_crds1, fm_crds2], axis=0)
        return varr.assign_coords(frame=fm_crds_new)
    else:
        return varr 
Example #5
Source File: test__order.py    From dask-image with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_order_comprehensions(da_func, kwargs):
    np.random.seed(0)

    a = np.random.random((3, 12, 14))
    d = da.from_array(a, chunks=(3, 6, 7))

    l2s = [da_func(d[i], **kwargs) for i in range(len(d))]
    l2c = [da_func(d[i], **kwargs)[None] for i in range(len(d))]

    dau.assert_eq(np.stack(l2s), da.stack(l2s))
    dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c)) 
Example #6
Source File: test_split.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_kfold(shuffle):
    splitter = dask_ml.model_selection.KFold(
        n_splits=5, random_state=0, shuffle=shuffle
    )
    assert splitter.get_n_splits() == 5
    gen = splitter.split(dX)

    train_idx, test_idx = next(gen)
    assert isinstance(train_idx, da.Array)
    assert isinstance(test_idx, da.Array)

    assert train_idx.shape == (88,)  # 80% of 110
    assert test_idx.shape == (22,)

    assert train_idx.chunks == ((28, 50, 10),)
    assert test_idx.chunks == ((22,),)

    counts = pd.value_counts(train_idx.compute())
    assert counts.max() == 1

    N = len(X)

    np.testing.assert_array_equal(
        np.unique(da.concatenate([train_idx, test_idx])), np.arange(N)
    )

    expected_chunks = [
        (((22, 6, 50, 10),), ((22,),)),
        (((44, 34, 10),), ((6, 16),)),
        (((50, 16, 12, 10),), ((22,),)),
        (((50, 38),), ((12, 10),)),
    ]

    for (exp_train_idx, exp_test_idx), (train_idx, test_idx) in zip(
        expected_chunks, gen
    ):
        assert train_idx.chunks == exp_train_idx
        assert test_idx.chunks == exp_test_idx 
Example #7
Source File: spectral.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _slice_mostly_sorted(array, keep, rest, ind=None):
    """Slice dask array `array` that is almost entirely sorted already.

    We perform approximately `2 * len(keep)` slices on `array`.
    This is OK, since `keep` is small. Individually, each of these slices
    is entirely sorted.

    Parameters
    ----------
    array : dask.array.Array
    keep : ndarray[Int]
        This must be sorted.
    rest : ndarray[Bool]
    ind : ndarray[Int], optional

    Returns
    -------
    sliced : dask.array.Array
    """
    if ind is None:
        ind = np.arange(len(array))
    idx = np.argsort(np.concatenate([keep, ind[rest]]))

    slices = []
    if keep[0] > 0:  # avoid creating empty slices
        slices.append(slice(None, keep[0]))
    slices.append([keep[0]])
    windows = zip(keep[:-1], keep[1:])

    for l, r in windows:
        if r > l + 1:  # avoid creating empty slices
            slices.append(slice(l + 1, r))
        slices.append([r])

    if keep[-1] < len(array) - 1:  # avoid creating empty slices
        slices.append(slice(keep[-1] + 1, None))
    result = da.concatenate([array[idx[slice_]] for slice_ in slices])
    return result 
Example #8
Source File: _split.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _blockwise_slice(arr, idx):
    """Slice an array that is blockwise-aligned with idx.

    Parameters
    ----------
    arr : Dask array
    idx : Dask array
        Should have the following properties

        * Same blocks as `arr` along the first dimension
        * Contains only integers
        * Each block's values should be between ``[0, len(block))``

    Returns
    -------
    sliced : dask.Array
    """
    objs = []
    offsets = np.hstack([0, np.cumsum(arr.chunks[0])[:-1]])

    for i, (x, idx2) in enumerate(
        zip(arr.to_delayed().ravel(), idx.to_delayed().ravel())
    ):
        idx3 = idx2 - offsets[i]
        objs.append(x[idx3])

    shapes = idx.chunks[0]
    if arr.ndim == 2:
        P = arr.shape[1]
        shapes = [(x, P) for x in shapes]
    else:
        shapes = [(x,) for x in shapes]

    sliced = da.concatenate(
        [
            da.from_delayed(x, shape=shape, dtype=arr.dtype)
            for x, shape in zip(objs, shapes)
        ]
    )
    return sliced 
Example #9
Source File: _split.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _split_blockwise(self, X, seeds):
        chunks = X.chunks[0]

        train_pct, test_pct = _maybe_normalize_split_sizes(
            self.train_size, self.test_size
        )
        sizes = [_validate_shuffle_split(c, test_pct, train_pct) for c in chunks]

        objs = [
            dask.delayed(_generate_idx, nout=2)(chunksize, seed, n_train, n_test)
            for chunksize, seed, (n_train, n_test) in zip(chunks, seeds, sizes)
        ]

        train_objs, test_objs = zip(*objs)
        offsets = np.hstack([0, np.cumsum(chunks)])
        train_idx = da.concatenate(
            [
                da.from_delayed(x + offset, (train_size,), np.dtype("int"))
                for x, chunksize, (train_size, _), offset in zip(
                    train_objs, chunks, sizes, offsets
                )
            ]
        )
        test_idx = da.concatenate(
            [
                da.from_delayed(x + offset, (test_size,), np.dtype("int"))
                for x, chunksize, (_, test_size), offset in zip(
                    test_objs, chunks, sizes, offsets
                )
            ]
        )

        return train_idx, test_idx 
Example #10
Source File: text.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def transform(self, raw_X):
        msg = "'X' should be a 1-dimensional array with length 'num_samples'."

        if not dask.is_dask_collection(raw_X):
            return self._hasher(**self.get_params()).transform(raw_X)

        if isinstance(raw_X, db.Bag):
            bag2 = raw_X.map_partitions(self._transformer)
            objs = bag2.to_delayed()
            arrs = [
                da.from_delayed(obj, (np.nan, self.n_features), self.dtype)
                for obj in objs
            ]
            result = da.concatenate(arrs, axis=0)
        elif isinstance(raw_X, dd.Series):
            result = raw_X.map_partitions(self._transformer)
        elif isinstance(raw_X, da.Array):
            # dask.Array
            chunks = ((np.nan,) * raw_X.numblocks[0], (self.n_features,))
            if raw_X.ndim == 1:
                result = raw_X.map_blocks(
                    self._transformer, dtype="f8", chunks=chunks, new_axis=1
                )
            else:
                raise ValueError(msg)
        else:
            raise ValueError(msg)

        meta = scipy.sparse.eye(0, format="csr")
        result._meta = meta
        return result 
Example #11
Source File: pairwise.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def pairwise_distances_argmin_min(
    X: ArrayLike,
    Y: ArrayLike,
    axis: int = 1,
    metric: Union[str, Callable[[ArrayLike, ArrayLike], float]] = "euclidean",
    batch_size: Optional[int] = None,
    metric_kwargs: Optional[Dict[str, Any]] = None,
):
    if batch_size is not None:
        msg = "'batch_size' is deprecated. Use sklearn.config_context instead.'"
        warnings.warn(msg, FutureWarning)

    XD = X.to_delayed().flatten().tolist()
    func = delayed(metrics.pairwise_distances_argmin_min, pure=True, nout=2)
    blocks = [func(x, Y, metric=metric, metric_kwargs=metric_kwargs) for x in XD]
    argmins, mins = zip(*blocks)

    argmins = [
        da.from_delayed(block, (chunksize,), np.int64)
        for block, chunksize in zip(argmins, X.chunks[0])
    ]
    # Scikit-learn seems to always use float64
    mins = [
        da.from_delayed(block, (chunksize,), "f8")
        for block, chunksize in zip(mins, X.chunks[0])
    ]
    argmins = da.concatenate(argmins)
    mins = da.concatenate(mins)
    return argmins, mins 
Example #12
Source File: seviri_l2_bufr.py    From satpy with GNU General Public License v3.0 5 votes vote down vote up
def get_array(self, key):
        """Get all data from file for the given BUFR key."""
        with open(self.filename, "rb") as fh:
            msgCount = 0
            while True:
                bufr = ec.codes_bufr_new_from_file(fh)
                if bufr is None:
                    break

                ec.codes_set(bufr, 'unpack', 1)

                # if is the first message initialise our final array
                if (msgCount == 0):
                    arr = da.from_array(ec.codes_get_array(
                        bufr, key, float), chunks=CHUNK_SIZE)
                else:
                    tmpArr = da.from_array(ec.codes_get_array(
                        bufr, key, float), chunks=CHUNK_SIZE)
                    arr = da.concatenate((arr, tmpArr))

                msgCount = msgCount+1
                ec.codes_release(bufr)

        if arr.size == 1:
            arr = arr[0]

        return arr 
Example #13
Source File: iasi_l2_so2_bufr.py    From satpy with GNU General Public License v3.0 5 votes vote down vote up
def get_array(self, key):
        """Get all data from file for the given BUFR key."""
        with open(self.filename, "rb") as fh:
            msgCount = 0
            while True:

                bufr = ec.codes_bufr_new_from_file(fh)
                if bufr is None:
                    break

                ec.codes_set(bufr, 'unpack', 1)

                values = ec.codes_get_array(
                        bufr, key, float)

                if len(values) == 1:
                    values = np.repeat(values, 120)

                # if is the first message initialise our final array
                if (msgCount == 0):

                    arr = da.from_array([values], chunks=CHUNK_SIZE)
                else:
                    tmpArr = da.from_array([values], chunks=CHUNK_SIZE)

                    arr = da.concatenate((arr, tmpArr), axis=0)

                msgCount = msgCount+1
                ec.codes_release(bufr)

        if arr.size == 1:
            arr = arr[0]

        return arr 
Example #14
Source File: test__diff.py    From dask-image with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_laplace_comprehensions():
    np.random.seed(0)

    a = np.random.random((3, 12, 14))
    d = da.from_array(a, chunks=(3, 6, 7))

    l2s = [da_ndf.laplace(d[i]) for i in range(len(d))]
    l2c = [da_ndf.laplace(d[i])[None] for i in range(len(d))]

    dau.assert_eq(np.stack(l2s), da.stack(l2s))
    dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c)) 
Example #15
Source File: test__conv.py    From dask-image with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_convolutions_comprehensions(da_func):
    np.random.seed(0)

    a = np.random.random((3, 12, 14))
    d = da.from_array(a, chunks=(3, 6, 7))

    weights = np.ones((1, 1))

    l2s = [da_func(d[i], weights) for i in range(len(d))]
    l2c = [da_func(d[i], weights)[None] for i in range(len(d))]

    dau.assert_eq(np.stack(l2s), da.stack(l2s))
    dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c)) 
Example #16
Source File: test__generic.py    From dask-image with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_generic_filter_comprehensions(da_func):
    da_wfunc = lambda arr: da_func(arr, lambda x: x, 1)  # noqa: E731

    np.random.seed(0)

    a = np.random.random((3, 12, 14))
    d = da.from_array(a, chunks=(3, 6, 7))

    l2s = [da_wfunc(d[i]) for i in range(len(d))]
    l2c = [da_wfunc(d[i])[None] for i in range(len(d))]

    dau.assert_eq(np.stack(l2s), da.stack(l2s))
    dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c)) 
Example #17
Source File: test__edge.py    From dask-image with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_edge_comprehensions(da_func):
    np.random.seed(0)

    a = np.random.random((3, 12, 14))
    d = da.from_array(a, chunks=(3, 6, 7))

    l2s = [da_func(d[i]) for i in range(len(d))]
    l2c = [da_func(d[i])[None] for i in range(len(d))]

    dau.assert_eq(np.stack(l2s), da.stack(l2s))
    dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c)) 
Example #18
Source File: test__smooth.py    From dask-image with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_uniform_comprehensions():
    da_func = lambda arr: da_ndf.uniform_filter(arr, 1, origin=0)  # noqa: E731

    np.random.seed(0)

    a = np.random.random((3, 12, 14))
    d = da.from_array(a, chunks=(3, 6, 7))

    l2s = [da_func(d[i]) for i in range(len(d))]
    l2c = [da_func(d[i])[None] for i in range(len(d))]

    dau.assert_eq(np.stack(l2s), da.stack(l2s))
    dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c)) 
Example #19
Source File: utils.py    From xmitgcm with MIT License 5 votes vote down vote up
def llc_facets_3d_spatial_to_compact(facets, dimname, extra_metadata):
    """ Write in compact form a list of 3d facets

    PARAMETERS
    ----------
    facets : dict
        dict of xarray.dataarrays for the facets
    extra_metadata : dict
        extra_metadata from get_extra_metadata

    RETURNS
    -------
    flatdata : numpy.array
        all the data in vector form
    """

    nz = len(facets['facet0'][dimname])
    nfacets = len(facets)
    flatdata = np.array([])

    for kz in range(nz):
        # rebuild the dict
        tmpdict = {}
        for kfacet in range(nfacets):
            this_facet = facets['facet' + str(kfacet)]
            if this_facet is not None:
                tmpdict['facet' + str(kfacet)] = this_facet.isel(k=kz)
            else:
                tmpdict['facet' + str(kfacet)] = None
        # concatenate all 2d arrays
        compact2d = llc_facets_2d_to_compact(tmpdict, extra_metadata)
        flatdata = np.concatenate([flatdata, compact2d])

    return flatdata 
Example #20
Source File: utils.py    From xmitgcm with MIT License 5 votes vote down vote up
def find_concat_dim_facet(da, facet, extra_metadata):
    """ In llc grids, find along which horizontal dimension to concatenate
    facet between i, i_g and j, j_g. If the order of the facet is F, concat
    along i or i_g. If order is C, concat along j or j_g. Also return
    horizontal dim not to concatenate

    PARAMETERS
    ----------
    da : xarray.DataArray
        xmitgcm llc data array
    facet : int
        facet number
    extra_metadata : dict
        dict of extra_metadata from get_extra_metadata

    RETURNS
    -------
    concat_dim, nonconcat_dim : str, str
        names of the dimensions for concatenation or not

    """
    order = extra_metadata['facet_orders'][facet]
    if order == 'C':
        possible_concat_dims = ['j', 'j_g']
    elif order == 'F':
        possible_concat_dims = ['i', 'i_g']

    concat_dim = find_concat_dim(da, possible_concat_dims)

    # we also need to other horizontal dimension for vector indexing
    all_dims = list(da.dims)
    # discard face
    all_dims.remove('face')
    # remove the concat_dim to find horizontal non_concat dimension
    all_dims.remove(concat_dim)
    non_concat_dim = all_dims[0]
    return concat_dim, non_concat_dim 
Example #21
Source File: __init__.py    From pyresample with GNU Lesser General Public License v3.0 5 votes vote down vote up
def get_border_lonlats(geo_def):
    """Get the border x- and y-coordinates."""
    if geo_def.proj_dict['proj'] == 'geos':
        lon_b, lat_b = get_geostationary_bounding_box(geo_def, 3600)
    else:
        lons, lats = geo_def.get_boundary_lonlats()
        lon_b = np.concatenate((lons.side1, lons.side2, lons.side3, lons.side4))
        lat_b = np.concatenate((lats.side1, lats.side2, lats.side3, lats.side4))

    return lon_b, lat_b 
Example #22
Source File: _bed_read.py    From pandas-plink with MIT License 5 votes vote down vote up
def read_bed(filepath, nrows, ncols):
    from dask.array import concatenate, from_delayed
    from dask.delayed import delayed

    chunk_size = 1024

    row_start = 0
    col_xs = []
    while row_start < nrows:
        row_end = min(row_start + chunk_size, nrows)
        col_start = 0
        row_xs = []
        while col_start < ncols:
            col_end = min(col_start + chunk_size, ncols)

            x = delayed(_read_bed_chunk)(
                filepath, nrows, ncols, row_start, row_end, col_start, col_end
            )

            shape = (row_end - row_start, col_end - col_start)
            row_xs += [from_delayed(x, shape, float64)]
            col_start = col_end
        col_xs += [concatenate(row_xs, axis=1)]
        row_start = row_end
    X = concatenate(col_xs, axis=0)
    return X 
Example #23
Source File: meta.py    From gbdxtools with MIT License 5 votes vote down vote up
def _slice_padded(self, _bounds):
        pads = (max(-_bounds[0], 0), max(-_bounds[1], 0),
                max(_bounds[2]-self.shape[2], 0), max(_bounds[3]-self.shape[1], 0))
        bounds = (max(_bounds[0], 0),
                  max(_bounds[1], 0),
                  max(min(_bounds[2], self.shape[2]), 0),
                  max(min(_bounds[3], self.shape[1]), 0))
        result = self[:, bounds[1]:bounds[3], bounds[0]:bounds[2]]
        if pads[0] > 0:
            dims = (result.shape[0], result.shape[1], pads[0])
            result = da.concatenate([da.zeros(dims, chunks=dims, dtype=result.dtype),
                                     result], axis=2)
        if pads[2] > 0:
            dims = (result.shape[0], result.shape[1], pads[2])
            result = da.concatenate([result,
                                     da.zeros(dims, chunks=dims, dtype=result.dtype)], axis=2)
        if pads[1] > 0:
            dims = (result.shape[0], pads[1], result.shape[2])
            result = da.concatenate([da.zeros(dims, chunks=dims, dtype=result.dtype),
                                     result], axis=1)
        if pads[3] > 0:
            dims = (result.shape[0], pads[3], result.shape[2])
            result = da.concatenate([result,
                                     da.zeros(dims, chunks=dims, dtype=result.dtype)], axis=1)

        return (result, _bounds[0], _bounds[1]) 
Example #24
Source File: transform.py    From nbodykit with GNU General Public License v3.0 4 votes vote down vote up
def ConcatenateSources(*sources, **kwargs):
    """
    Concatenate CatalogSource objects together, optionally including only
    certain columns in the returned source.

    .. note::
        The returned catalog object carries the meta-data from only
        the first catalog supplied to this function (in the ``attrs`` dict).

    Parameters
    ----------
    *sources : subclass of :class:`~nbodykit.base.catalog.CatalogSource`
        the catalog source objects to concatenate together
    columns : str, list of str, optional
        the columns to include in the concatenated catalog

    Returns
    -------
    CatalogSource :
        the concatenated catalog source object

    Examples
    --------
    >>> from nbodykit.lab import *
    >>> source1 = UniformCatalog(nbar=100, BoxSize=1.0)
    >>> source2 = UniformCatalog(nbar=100, BoxSize=1.0)
    >>> print(source1.csize, source2.csize)
    >>> combined = transform.ConcatenateSources(source1, source2, columns=['Position', 'Velocity'])
    >>> print(combined.csize)
    """
    from nbodykit.base.catalog import CatalogSource

    columns = kwargs.get('columns', None)
    if isinstance(columns, string_types):
        columns = [columns]

    # concatenate all columns, if none provided
    if columns is None or columns == []:
        columns = sources[0].columns

    # check comms
    if not all(src.comm == sources[0].comm for src in sources):
        raise ValueError("cannot concatenate sources: comm mismatch")

    # check all columns are there
    for source in sources:
        if not all(col in source for col in columns):
            raise ValueError(("cannot concatenate sources: columns are missing "
                              "from some sources"))
    # the total size
    size = numpy.sum([src.size for src in sources], dtype='intp')

    data = {}
    for col in columns:
        data[col] = da.concatenate([src[col] for src in sources], axis=0)

    toret = CatalogSource._from_columns(size, sources[0].comm, **data)
    toret.attrs.update(sources[0].attrs)
    return toret 
Example #25
Source File: utilities.py    From minian with GNU General Public License v3.0 4 votes vote down vote up
def save_video(movpath, fname_mov_orig, fname_mov_rig, fname_AC, fname_ACbf,
               dsratio):
    """

    Parameters
    ----------
    movpath :

    fname_mov_orig :

    fname_mov_rig :

    fname_AC :

    fname_ACbf :

    dsratio :


    Returns
    -------


    """
    mov_orig = np.load(fname_mov_orig, mmap_mode='r')
    mov_rig = np.load(fname_mov_rig, mmap_mode='r')
    mov_ac = np.load(fname_AC, mmap_mode='r')
    mov_acbf = np.load(fname_ACbf, mmap_mode='r')
    vw = skv.FFmpegWriter(
        movpath, inputdict={'-framerate': '30'}, outputdict={'-r': '30'})
    for fidx in range(0, mov_orig.shape[0], dsratio):
        print("writing frame: " + str(fidx))
        fm_orig = mov_orig[fidx, :, :] * 255
        fm_rig = mov_rig[fidx, :, :] * 255
        fm_acbf = mov_acbf[fidx, :, :] * 255
        fm_ac = mov_ac[fidx, :, :] * 255
        fm = np.concatenate(
            [
                np.concatenate([fm_orig, fm_rig], axis=1),
                np.concatenate([fm_acbf, fm_ac], axis=1)
            ],
            axis=0)
        vw.writeFrame(fm)
    vw.close() 
Example #26
Source File: utils.py    From xmitgcm with MIT License 4 votes vote down vote up
def _pad_array(data, file_metadata, face=0):
    """
    Return a padded array. If input data is a numpy.memmap and no padding
    is necessary, the function preserves its type. Otherwise, the concatenate
    forces it to load into memory.

    Parameters
    ----------

    data          : numpy array or memmap
                    input data
    file_metadata : dict
                    metadata for file
    face          : int, optional
                    llc face if applicable

    Returns
    -------
    numpy.array or numpy.memmap

    """

    # Pad data before in y direction
    if 'pad_before_y' in file_metadata:
        if file_metadata['has_faces']:
            facet_origin = file_metadata['face_facets'][face]
            nypad_before = file_metadata['pad_before_y'][facet_origin]
        else:
            nypad_before = file_metadata['pad_before_y']

        pad_before = np.zeros((nypad_before, file_metadata['nx']))
        data_padded_before = np.concatenate(
            (pad_before, data), axis=0)
    else:
        data_padded_before = data

    # Pad data after in y direction
    if 'pad_after_y' in file_metadata:
        if file_metadata['has_faces']:
            facet_origin = file_metadata['face_facets'][face]
            nypad_after = file_metadata['pad_after_y'][facet_origin]
        else:
            nypad_after = file_metadata['pad_after_y']

        pad_after = np.zeros((nypad_after, file_metadata['nx']))
        data_padded_after = np.concatenate(
            (data_padded_before, pad_after), axis=0)
    else:
        data_padded_after = data_padded_before

    return data_padded_after 
Example #27
Source File: utils.py    From xmitgcm with MIT License 4 votes vote down vote up
def _reshape_llc_data(data, jdim):  # pragma: no cover
    """Fix the weird problem with llc data array order."""
    # Can we do this without copying any data?
    # If not, we need to go upstream and implement this at the MDS level
    # Or can we fudge it with dask?
    # this is all very specific to the llc file output
    # would be nice to generalize more, but how?
    nside = data.shape[jdim] // LLC_NUM_FACES
    # how the LLC data is laid out along the j dimension
    strides = ((0,3), (3,6), (6,7), (7,10), (10,13))
    # whether to reshape each face
    reshape = (False, False, False, True, True)
    # this will slice the data into 5 facets
    slices = [jdim * (slice(None),) + (slice(nside*st[0], nside*st[1]),)
              for st in strides]
    facet_arrays = [data[sl] for sl in slices]
    face_arrays = []
    for ar, rs, st in zip(facet_arrays, reshape, strides):
        nfaces_in_facet = st[1] - st[0]
        shape = list(ar.shape)
        if rs:
            # we assume the other horizontal dimension is immediately after jdim
            shape[jdim] = ar.shape[jdim+1]
            shape[jdim+1] = ar.shape[jdim]
        # insert a length-1 dimension along which to concatenate
        shape.insert(jdim, 1)
        # this modify the array shape in place, with no copies allowed
        # but it doesn't work with dask arrays
        # ar.shape = shape
        ar = ar.reshape(shape)
        # now ar is propery shaped, but we still need to slice it into faces
        face_slice_dim = jdim + 1 + rs
        for n in range(nfaces_in_facet):
            face_slice = (face_slice_dim * (slice(None),) +
                          (slice(nside*n, nside*(n+1)),))
            data_face = ar[face_slice]
            face_arrays.append(data_face)

    # We can't concatenate using numpy (hcat etc.) because it makes a copy,
    # presumably loading the memmaps into memory.
    # Using dask gets around this.
    # But what if we want different chunks, or already chunked the data
    # upstream? Doesn't seem like this is ideal
    # TODO: Refactor handling of dask arrays and chunking
    #return np.concatenate(face_arrays, axis=jdim)
    # the dask version doesn't work because of this:
    # https://github.com/dask/dask/issues/1645
    face_arrays_dask = [dsa.from_array(fa, chunks=fa.shape)
                        for fa in face_arrays]
    concat = dsa.concatenate(face_arrays_dask, axis=jdim)
    return concat 
Example #28
Source File: _split.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def _split(self, test_start, test_stop, n_samples, chunks, seeds):
        train_objs = []
        test_objs = []
        train_sizes = []
        test_sizes = []

        offset = 0
        for chunk, seed in zip(chunks, seeds):
            start, stop = offset, offset + chunk

            test_id_start = max(test_start, start)
            test_id_stop = min(test_stop, stop)

            if test_id_start < test_id_stop:
                test_objs.append(
                    dask.delayed(_generate_offset_idx)(
                        chunk, test_id_start, test_id_stop, offset, seed
                    )
                )
                test_sizes.append(test_id_stop - test_id_start)

            train_id_stop = min(test_id_start, stop)
            if train_id_stop > start:
                train_objs.append(
                    dask.delayed(_generate_offset_idx)(
                        chunk, start, train_id_stop, offset, seed
                    )
                )
                train_sizes.append(train_id_stop - start)

            train_id_start = max(test_id_stop, start)
            if train_id_start < stop:
                train_objs.append(
                    dask.delayed(_generate_offset_idx)(
                        chunk, train_id_start, stop, offset, seed
                    )
                )
                train_sizes.append(stop - train_id_start)
            offset = stop

        train_idx = da.concatenate(
            [
                da.from_delayed(obj, (train_size,), np.dtype("int"))
                for obj, train_size in zip(train_objs, train_sizes)
            ]
        )

        test_idx = da.concatenate(
            [
                da.from_delayed(obj, (test_size,), np.dtype("int"))
                for obj, test_size in zip(test_objs, test_sizes)
            ]
        )

        return train_idx, test_idx 
Example #29
Source File: io_utils.py    From pyxem with GNU General Public License v3.0 4 votes vote down vote up
def _untangle_raw(data, hdr_info, stack_size):
    """
    Corrects for the tangled raw mib format - Only the case for quad chip is considered here.

    Parameters
    --------
        data: dask array
            as stack with the detector array unreshaped, e.g. for a single frame 512*512: (1, 262144)
        hdr_info: dict
            info read from the header- ouput of the _parse_hdr function
        stack_size: int
            The number of frames in the data

    Outputs
    ----------
    untangled_data: dask array
        corrected dask array object reshaped on the detector plane, e.g. for a single frame case
        as above: (1, 512, 512)
    """
    width = hdr_info["width"]
    height = hdr_info["height"]
    width_height = width * height
    if (
        hdr_info["Counter Depth (number)"] == 24
        or hdr_info["Counter Depth (number)"] == 12
    ):
        cols = 4

    elif hdr_info["Counter Depth (number)"] == 1:
        cols = 64

    elif hdr_info["Counter Depth (number)"] == 6:
        cols = 8

    data = data.reshape((stack_size * width_height))

    data = data.reshape(stack_size, height * (height // cols), cols)

    data = da.flip(data, 2)

    if hdr_info["Assembly Size"] == "2x2":
        data = data.reshape((stack_size * width_height))
        data = data.reshape(stack_size, 512 // 2, 512 * 2)

        det1 = data[:, :, 0:256]
        det2 = data[:, :, 256:512]
        det3 = data[:, :, 512 : 512 + 256]
        det4 = data[:, :, 512 + 256 :]

        det3 = da.flip(det3, 2)
        det3 = da.flip(det3, 1)

        det4 = da.flip(det4, 2)
        det4 = da.flip(det4, 1)

        untangled_data = da.concatenate(
            (da.concatenate((det1, det3), 1), da.concatenate((det2, det4), 1)), 2
        )
    return untangled_data 
Example #30
Source File: _encoders.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def _transform(
        self, X: Union[ArrayLike, DataFrameType], handle_unknown: str = "error"
    ) -> Union[ArrayLike, DataFrameType]:
        X = check_array(
            X, accept_dask_dataframe=True, dtype=None, preserve_pandas_dataframe=True
        )

        is_array = isinstance(X, da.Array)

        if is_array:
            _, n_features = X.shape
        else:
            n_features = len(X.columns)

        if is_array:
            # We encode each column independently, as they have different categories.
            Xs = [
                _encode_dask_array(
                    X[:, i],
                    uniques=self.categories_[i],
                    encode=True,
                    onehot_dtype=self.dtype,
                )[1]
                for i in range(n_features)
            ]
            X = da.concatenate(Xs, axis=1)

            if not self.sparse:
                X = X.map_blocks(lambda x: x.toarray(), dtype=self.dtype)

        else:
            import dask.dataframe as dd

            # Validate that all are categorical.
            if not (X.dtypes == "category").all():
                raise ValueError("Must be all categorical.")

            if not len(X.columns) == len(self.categories_):
                raise ValueError(
                    "Number of columns ({}) does not match number "
                    "of categories_ ({})".format(len(X.columns), len(self.categories_))
                )

            for col, dtype in zip(X.columns, self.dtypes_):
                if not (X[col].dtype == dtype):
                    raise ValueError(
                        "Different CategoricalDtype for fit and "
                        "transform. '{}' != {}'".format(dtype, X[col].dtype)
                    )

            return dd.get_dummies(X, sparse=self.sparse, dtype=self.dtype)

        return X