Python dask.array.stack() Examples

The following are 28 code examples of dask.array.stack(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module dask.array , or try the search function .
Example #1
Source File: transform.py    From nbodykit with GNU General Public License v3.0 6 votes vote down vote up
def StackColumns(*cols):
    """
    Stack the input dask arrays vertically, column by column.

    This uses :func:`dask.array.vstack`.

    Parameters
    ----------
    *cols : :class:`dask.array.Array`
        the dask arrays to stack vertically together

    Returns
    -------
    :class:`dask.array.Array` :
        the dask array where columns correspond to the input arrays

    Raises
    ------
    TypeError
        If the input columns are not dask arrays
    """
    cols = da.broadcast_arrays(*cols)
    return da.vstack(cols).T 
Example #2
Source File: test_dask_layers.py    From napari with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def delayed_dask_stack():
    """A 4D (20, 10, 10, 10) delayed dask array, simulates disk io."""
    # we will return a dict with a 'calls' variable that tracks call count
    output = {'calls': 0}

    # create a delayed version of function that simply generates np.arrays
    # but also counts when it has been called
    @dask.delayed
    def get_array():
        nonlocal output
        output['calls'] += 1
        return np.random.rand(10, 10, 10)

    # then make a mock "timelapse" of 3D stacks
    # see https://napari.org/tutorials/applications/dask.html for details
    _list = [get_array() for fn in range(20)]
    output['stack'] = da.stack(
        [da.from_delayed(i, shape=(10, 10, 10), dtype=np.float) for i in _list]
    )
    assert output['stack'].shape == (20, 10, 10, 10)
    return output 
Example #3
Source File: test_dask_layers.py    From napari with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_prevent_dask_cache(delayed_dask_stack):
    """Test that pre-emptively setting cache to zero keeps it off"""
    # the del is not required, it just shows that prior state of the cache
    # does not matter... calling resize_dask_cache(0) will permanently disable
    del utils.dask_cache
    utils.resize_dask_cache(0)

    v = viewer.ViewerModel()
    dask_stack = delayed_dask_stack['stack']
    # adding a new stack will not increase the cache size
    v.add_image(dask_stack, multiscale=False, contrast_limits=(0, 1))
    assert utils.dask_cache.cache.available_bytes == 0
    # and the cache will not be populated
    for i in range(3):
        v.dims.set_point(0, i)
    assert len(utils.dask_cache.cache.heap.heap) == 0 
Example #4
Source File: resample.py    From satpy with GNU General Public License v3.0 6 votes vote down vote up
def _call_fornav(self, cols, rows, target_geo_def, data,
                     grid_coverage=0, **kwargs):
        """Wrap fornav() to run as a dask delayed."""
        num_valid_points, res = fornav(cols, rows, target_geo_def,
                                       data, **kwargs)

        if isinstance(data, tuple):
            # convert 'res' from tuple of arrays to one array
            res = np.stack(res)
            num_valid_points = sum(num_valid_points)

        grid_covered_ratio = num_valid_points / float(res.size)
        grid_covered = grid_covered_ratio > grid_coverage
        if not grid_covered:
            msg = "EWA resampling only found %f%% of the grid covered " \
                  "(need %f%%)" % (grid_covered_ratio * 100,
                                   grid_coverage * 100)
            raise RuntimeError(msg)
        LOG.debug("EWA resampling found %f%% of the grid covered" %
                  (grid_covered_ratio * 100))

        return res 
Example #5
Source File: resample.py    From satpy with GNU General Public License v3.0 6 votes vote down vote up
def _call_ll2cr(self, lons, lats, target_geo_def, swath_usage=0):
        """Wrap ll2cr() for handling dask delayed calls better."""
        new_src = SwathDefinition(lons, lats)

        swath_points_in_grid, cols, rows = ll2cr(new_src, target_geo_def)
        # FIXME: How do we check swath usage/coverage if we only do this
        #        per-block
        # # Determine if enough of the input swath was used
        # grid_name = getattr(self.target_geo_def, "name", "N/A")
        # fraction_in = swath_points_in_grid / float(lons.size)
        # swath_used = fraction_in > swath_usage
        # if not swath_used:
        #     LOG.info("Data does not fit in grid %s because it only %f%% of "
        #              "the swath is used" %
        #              (grid_name, fraction_in * 100))
        #     raise RuntimeError("Data does not fit in grid %s" % (grid_name,))
        # else:
        #     LOG.debug("Data fits in grid %s and uses %f%% of the swath",
        #               grid_name, fraction_in * 100)

        return np.stack([cols, rows], axis=0) 
Example #6
Source File: test__diff.py    From dask-image with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_laplace_comprehensions():
    np.random.seed(0)

    a = np.random.random((3, 12, 14))
    d = da.from_array(a, chunks=(3, 6, 7))

    l2s = [da_ndf.laplace(d[i]) for i in range(len(d))]
    l2c = [da_ndf.laplace(d[i])[None] for i in range(len(d))]

    dau.assert_eq(np.stack(l2s), da.stack(l2s))
    dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c)) 
Example #7
Source File: test_dask_rasterio.py    From dask-rasterio with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_read_raster_multi_band(some_raster_path):
    array = read_raster(some_raster_path, band=(1, 3))
    assert isinstance(array, da.Array)

    expected_array = da.stack([
        read_raster_band(some_raster_path, band=1),
        read_raster_band(some_raster_path, band=3)
    ])
    assert array.shape == expected_array.shape
    assert array.dtype == expected_array.dtype
    assert_array_equal(array.compute(), expected_array.compute()) 
Example #8
Source File: read.py    From dask-rasterio with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def read_raster(path, band=None, block_size=1):
    """Read all or some bands from raster

    Arguments:
        path {string} -- path to raster file

    Keyword Arguments:
        band {int, iterable(int)} -- band number or iterable of bands.
            When passing None, it reads all bands (default: {None})
        block_size {int} -- block size multiplier (default: {1})

    Returns:
        dask.array.Array -- a Dask array
    """

    if isinstance(band, int):
        return read_raster_band(path, band=band, block_size=block_size)
    else:
        if band is None:
            bands = range(1, get_band_count(path) + 1)
        else:
            bands = list(band)
        return da.stack([
            read_raster_band(path, band=band, block_size=block_size)
            for band in bands
        ]) 
Example #9
Source File: naive_bayes.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _joint_log_likelihood(self, X):
        jll = []
        for i in range(np.size(self.classes_)):
            jointi = da.log(self.class_prior_[i])
            n_ij = -0.5 * da.sum(da.log(2.0 * np.pi * self.sigma_[i, :]))
            n_ij -= 0.5 * da.sum(
                ((X - self.theta_[i, :]) ** 2) / (self.sigma_[i, :]), 1
            )
            jll.append(jointi + n_ij)

        joint_log_likelihood = da.stack(jll).T
        return joint_log_likelihood 
Example #10
Source File: _blockwise.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _predict_proba_stack(part, estimators):
    # predict for a batch of estimators and stack up the results.
    batches = [estimator.predict_proba(part) for estimator in estimators]
    return np.stack(batches) 
Example #11
Source File: _blockwise.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _predict_stack(part, estimators):
    # predict for a batch of estimators and stack up the results.
    batches = [estimator.predict(part) for estimator in estimators]
    return np.vstack(batches).T 
Example #12
Source File: _blockwise.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _collect_probas(self, X):
        if isinstance(X, da.Array):
            chunks = (len(self.estimators_), X.chunks[0], len(self.classes_))
            meta = np.array([], dtype="float64")
            # (n_estimators, len(X), n_classses)
            combined = X.map_blocks(
                _predict_proba_stack,
                estimators=self.estimators_,
                chunks=chunks,
                meta=meta,
            )
        elif isinstance(X, dd._Frame):
            # TODO: replace with a _predict_proba_stack version.
            # This current raises; dask.dataframe doesn't like map_partitions that
            # return new axes.
            # meta = np.empty((len(self.estimators_), 0, len(self.classes_)),
            #                 dtype="float64")
            # combined = X.map_partitions(_predict_proba_stack, meta=meta,
            #                             estimators=self.estimators_)
            # combined._chunks = ((len(self.estimators_),),
            #                     (np.nan,) * X.npartitions,
            #                     (len(X.columns),))
            meta = np.empty((0, len(self.classes_)), dtype="float64")
            probas = [
                X.map_partitions(_predict_proba, meta=meta, estimator=estimator)
                for estimator in self.estimators_
            ]
            # TODO(https://github.com/dask/dask/issues/6177): replace with da.stack
            chunks = probas[0]._chunks
            for proba in probas:
                proba._chunks = ((1,) * len(chunks[0]), chunks[1])

            combined = da.stack(probas)
            combined._chunks = ((1,) * len(self.estimators_),) + chunks
        else:
            # ndarray, etc.
            combined = np.stack(
                [estimator.predict_proba(X) for estimator in self.estimators_]
            )

        return combined 
Example #13
Source File: resample.py    From satpy with GNU General Public License v3.0 5 votes vote down vote up
def compute(self, data, **kwargs):
        """Call the resampling."""
        LOG.debug("Resampling %s", str(data.name))
        results = []
        if data.ndim == 3:
            for _i in range(data.shape[0]):
                res = self.resampler.get_count()
                results.append(res)
        else:
            res = self.resampler.get_count()
            results.append(res)

        return da.stack(results) 
Example #14
Source File: resample.py    From satpy with GNU General Public License v3.0 5 votes vote down vote up
def compute(self, data, mask_all_nan=False, **kwargs):
        """Call the resampling."""
        LOG.debug("Resampling %s", str(data.name))
        results = []
        if data.ndim == 3:
            for i in range(data.shape[0]):
                res = self.resampler.get_sum(data[i, :, :],
                                             mask_all_nan=mask_all_nan)
                results.append(res)
        else:
            res = self.resampler.get_sum(data, mask_all_nan=mask_all_nan)
            results.append(res)

        return da.stack(results) 
Example #15
Source File: generic_image.py    From satpy with GNU General Public License v3.0 5 votes vote down vote up
def mask_image_data(data):
    """Mask image data if alpha channel is present."""
    if data.bands.size in (2, 4):
        if not np.issubdtype(data.dtype, np.integer):
            raise ValueError("Only integer datatypes can be used as a mask.")
        mask = data.data[-1, :, :] == np.iinfo(data.dtype).min
        data = data.astype(np.float64)
        masked_data = da.stack([da.where(mask, np.nan, data.data[i, :, :])
                                for i in range(data.shape[0])])
        data.data = masked_data
        data = data.sel(bands=BANDS[data.bands.size - 1])
    return data 
Example #16
Source File: seviri_base.py    From satpy with GNU General Public License v3.0 5 votes vote down vote up
def dec10216(inbuf):
    """Decode 10 bits data into 16 bits words.

    ::

        /*
         * pack 4 10-bit words in 5 bytes into 4 16-bit words
         *
         * 0       1       2       3       4       5
         * 01234567890123456789012345678901234567890
         * 0         1         2         3         4
         */
        ip = &in_buffer[i];
        op = &out_buffer[j];
        op[0] = ip[0]*4 + ip[1]/64;
        op[1] = (ip[1] & 0x3F)*16 + ip[2]/16;
        op[2] = (ip[2] & 0x0F)*64 + ip[3]/4;
        op[3] = (ip[3] & 0x03)*256 +ip[4];

    """
    arr10 = inbuf.astype(np.uint16)
    arr16_len = int(len(arr10) * 4 / 5)
    arr10_len = int((arr16_len * 5) / 4)
    arr10 = arr10[:arr10_len]  # adjust size

    # dask is slow with indexing
    arr10_0 = arr10[::5]
    arr10_1 = arr10[1::5]
    arr10_2 = arr10[2::5]
    arr10_3 = arr10[3::5]
    arr10_4 = arr10[4::5]

    arr16_0 = (arr10_0 << 2) + (arr10_1 >> 6)
    arr16_1 = ((arr10_1 & 63) << 4) + (arr10_2 >> 4)
    arr16_2 = ((arr10_2 & 15) << 6) + (arr10_3 >> 2)
    arr16_3 = ((arr10_3 & 3) << 8) + arr10_4
    arr16 = da.stack([arr16_0, arr16_1, arr16_2, arr16_3], axis=-1).ravel()
    arr16 = da.rechunk(arr16, arr16.shape[0])

    return arr16 
Example #17
Source File: app.py    From Gather-Deployment with MIT License 5 votes vote down vote up
def classify(texts):
    batch_x_text = [clearstring(t) for t in texts]
    batch_x = str_idx(batch_x_text, dict_sentiment['dictionary'], 100)
    output_sentiment = sess_sentiment.run(
        logits_sentiment, feed_dict = {x_sentiment: batch_x}
    )
    labels = [sentiment_label[l] for l in np.argmax(output_sentiment, 1)]
    return da.stack(labels, axis = 0) 
Example #18
Source File: test__conv.py    From dask-image with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_convolutions_comprehensions(da_func):
    np.random.seed(0)

    a = np.random.random((3, 12, 14))
    d = da.from_array(a, chunks=(3, 6, 7))

    weights = np.ones((1, 1))

    l2s = [da_func(d[i], weights) for i in range(len(d))]
    l2c = [da_func(d[i], weights)[None] for i in range(len(d))]

    dau.assert_eq(np.stack(l2s), da.stack(l2s))
    dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c)) 
Example #19
Source File: test__generic.py    From dask-image with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_generic_filter_comprehensions(da_func):
    da_wfunc = lambda arr: da_func(arr, lambda x: x, 1)  # noqa: E731

    np.random.seed(0)

    a = np.random.random((3, 12, 14))
    d = da.from_array(a, chunks=(3, 6, 7))

    l2s = [da_wfunc(d[i]) for i in range(len(d))]
    l2c = [da_wfunc(d[i])[None] for i in range(len(d))]

    dau.assert_eq(np.stack(l2s), da.stack(l2s))
    dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c)) 
Example #20
Source File: test__edge.py    From dask-image with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_edge_comprehensions(da_func):
    np.random.seed(0)

    a = np.random.random((3, 12, 14))
    d = da.from_array(a, chunks=(3, 6, 7))

    l2s = [da_func(d[i]) for i in range(len(d))]
    l2c = [da_func(d[i])[None] for i in range(len(d))]

    dau.assert_eq(np.stack(l2s), da.stack(l2s))
    dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c)) 
Example #21
Source File: test__order.py    From dask-image with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_order_comprehensions(da_func, kwargs):
    np.random.seed(0)

    a = np.random.random((3, 12, 14))
    d = da.from_array(a, chunks=(3, 6, 7))

    l2s = [da_func(d[i], **kwargs) for i in range(len(d))]
    l2c = [da_func(d[i], **kwargs)[None] for i in range(len(d))]

    dau.assert_eq(np.stack(l2s), da.stack(l2s))
    dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c)) 
Example #22
Source File: test__smooth.py    From dask-image with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_uniform_comprehensions():
    da_func = lambda arr: da_ndf.uniform_filter(arr, 1, origin=0)  # noqa: E731

    np.random.seed(0)

    a = np.random.random((3, 12, 14))
    d = da.from_array(a, chunks=(3, 6, 7))

    l2s = [da_func(d[i]) for i in range(len(d))]
    l2c = [da_func(d[i])[None] for i in range(len(d))]

    dau.assert_eq(np.stack(l2s), da.stack(l2s))
    dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c)) 
Example #23
Source File: kd_tree.py    From pyresample with GNU Lesser General Public License v3.0 5 votes vote down vote up
def lonlat2xyz(lons, lats):
    R = 6370997.0
    x_coords = R * np.cos(np.deg2rad(lats)) * np.cos(np.deg2rad(lons))
    y_coords = R * np.cos(np.deg2rad(lats)) * np.sin(np.deg2rad(lons))
    z_coords = R * np.sin(np.deg2rad(lats))

    stack = np.stack if isinstance(lons, np.ndarray) else da.stack
    return stack(
        (x_coords.ravel(), y_coords.ravel(), z_coords.ravel()), axis=-1) 
Example #24
Source File: xarr.py    From pyresample with GNU Lesser General Public License v3.0 5 votes vote down vote up
def lonlat2xyz(lons, lats):
    """Convert geographic coordinates to cartesian 3D coordinates."""
    R = 6370997.0
    x_coords = R * da.cos(da.deg2rad(lats)) * da.cos(da.deg2rad(lons))
    y_coords = R * da.cos(da.deg2rad(lats)) * da.sin(da.deg2rad(lons))
    z_coords = R * da.sin(da.deg2rad(lats))

    return da.stack(
        (x_coords.ravel(), y_coords.ravel(), z_coords.ravel()), axis=-1) 
Example #25
Source File: test_dask_layers.py    From napari with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_dask_optimized_slicing(delayed_dask_stack, monkeypatch):
    """Test that dask_configure reduces compute with dask stacks."""

    # add dask stack to the viewer, making sure to pass multiscale and clims
    v = viewer.ViewerModel()
    dask_stack = delayed_dask_stack['stack']
    v.add_image(dask_stack, multiscale=False, contrast_limits=(0, 1))
    assert delayed_dask_stack['calls'] == 1  # the first stack will be loaded

    # changing the Z plane should never incur calls
    # since the stack has already been loaded (& it is chunked as a 3D array)
    for i in range(3):
        v.dims.set_point(1, i)
        assert delayed_dask_stack['calls'] == 1  # still just the first call

    # changing the timepoint will, of course, incur some compute calls
    v.dims.set_point(0, 1)
    assert delayed_dask_stack['calls'] == 2
    v.dims.set_point(0, 2)
    assert delayed_dask_stack['calls'] == 3

    # but going back to previous timepoints should not, since they are cached
    v.dims.set_point(0, 1)
    v.dims.set_point(0, 0)
    assert delayed_dask_stack['calls'] == 3
    v.dims.set_point(0, 3)
    assert delayed_dask_stack['calls'] == 4 
Example #26
Source File: npy.py    From intake with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def _get_schema(self):
        from fsspec import open_files
        import dask.array as da
        if self._arr is None:
            path = self._get_cache(self.path)[0]

            files = open_files(path, 'rb', compression=None,
                               **self.storage)
            if self.shape is None:
                arr = NumpyAccess(files[0])
                self.shape = arr.shape
                self.dtype = arr.dtype
                arrs = [arr] + [NumpyAccess(f, self.shape, self.dtype,
                                            offset=arr.offset)
                                for f in files[1:]]
            else:
                arrs = [NumpyAccess(f, self.shape, self.dtype)
                        for f in files]
            self.chunks = (self._chunks, ) + (-1, ) * (len(self.shape) - 1)
            self._arrs = [da.from_array(arr, self.chunks) for arr in arrs]

            if len(self._arrs) > 1:
                self._arr = da.stack(self._arrs)
            else:
                self._arr = self._arrs[0]
            self.chunks = self._arr.chunks
        return Schema(dtype=str(self.dtype), shape=self.shape,
                      extra_metadata=self.metadata,
                      npartitions=self._arr.npartitions,
                      chunks=self.chunks) 
Example #27
Source File: resample.py    From satpy with GNU General Public License v3.0 4 votes vote down vote up
def resample(self, data, **kwargs):
        """Resample `data` by calling `precompute` and `compute` methods.

        Args:
            data (xarray.DataArray): Data to be resampled

        Returns (xarray.DataArray): Data resampled to the target area

        """
        self.precompute(**kwargs)
        attrs = data.attrs.copy()
        data_arr = data.data
        if data.ndim == 3 and data.dims[0] == 'bands':
            dims = ('bands', 'y', 'x')
        # Both one and two dimensional input data results in 2D output
        elif data.ndim in (1, 2):
            dims = ('y', 'x')
        else:
            dims = data.dims
        result = self.compute(data_arr, **kwargs)
        coords = {}
        if 'bands' in data.coords:
            coords['bands'] = data.coords['bands']
        # Fractions are returned in a dict
        elif isinstance(result, dict):
            coords['categories'] = sorted(result.keys())
            dims = ('categories', 'y', 'x')
            new_result = []
            for cat in coords['categories']:
                new_result.append(result[cat])
            result = da.stack(new_result)
        if result.ndim > len(dims):
            result = da.squeeze(result)

        # Adjust some attributes
        if "BucketFraction" in str(self):
            attrs['units'] = ''
            attrs['calibration'] = ''
            attrs['standard_name'] = 'area_fraction'
        elif "BucketCount" in str(self):
            attrs['units'] = ''
            attrs['calibration'] = ''
            attrs['standard_name'] = 'number_of_observations'

        result = xr.DataArray(result, dims=dims, coords=coords,
                              attrs=attrs)

        return result 
Example #28
Source File: test_dask_layers.py    From napari with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def test_dask_cache_resizing(delayed_dask_stack):
    """Test that we can spin up, resize, and spin down the cache."""
    # add dask stack to the viewer, making sure to pass multiscale and clims
    utils.dask_cache = None

    v = viewer.ViewerModel()
    dask_stack = delayed_dask_stack['stack']

    # adding a new stack should spin up a cache
    v.add_image(dask_stack, multiscale=False, contrast_limits=(0, 1))
    assert utils.dask_cache.cache.available_bytes > 0
    # make sure the cache actually has been populated
    assert len(utils.dask_cache.cache.heap.heap) > 0

    # we can resize that cache back to 0 bytes
    utils.resize_dask_cache(0)
    assert utils.dask_cache.cache.available_bytes == 0

    # adding a 2nd stack should not adjust the cache size once created
    v.add_image(dask_stack, multiscale=False, contrast_limits=(0, 1))
    assert utils.dask_cache.cache.available_bytes == 0
    # and the cache will remain empty regardless of what we do
    for i in range(3):
        v.dims.set_point(1, i)
    assert len(utils.dask_cache.cache.heap.heap) == 0

    # but we can always spin it up again
    utils.resize_dask_cache(1e4)
    assert utils.dask_cache.cache.available_bytes == 1e4
    # and adding a new image doesn't change the size
    v.add_image(dask_stack, multiscale=False, contrast_limits=(0, 1))
    assert utils.dask_cache.cache.available_bytes == 1e4
    # but the cache heap is getting populated again
    for i in range(3):
        v.dims.set_point(0, i)
    assert len(utils.dask_cache.cache.heap.heap) > 0

    # however, if the dask_cache attribute is deleted entirely (or set to None)
    # we will have no memory of it ever having been created.
    # and adding a new stack will spin up a cache
    del utils.dask_cache
    v.add_image(dask_stack, multiscale=False, contrast_limits=(0, 1))
    assert utils.dask_cache.cache.available_bytes > 0