Python Examples of dask.array.stack

Source File: transform.py From nbodykit with GNU General Public License v3.0

6 votes

def StackColumns(*cols):
    """
    Stack the input dask arrays vertically, column by column.

    This uses :func:`dask.array.vstack`.

    Parameters
    ----------
    *cols : :class:`dask.array.Array`
        the dask arrays to stack vertically together

    Returns
    -------
    :class:`dask.array.Array` :
        the dask array where columns correspond to the input arrays

    Raises
    ------
    TypeError
        If the input columns are not dask arrays
    """
    cols = da.broadcast_arrays(*cols)
    return da.vstack(cols).T

Source File: test_dask_layers.py From napari with BSD 3-Clause "New" or "Revised" License

6 votes

def delayed_dask_stack():
    """A 4D (20, 10, 10, 10) delayed dask array, simulates disk io."""
    # we will return a dict with a 'calls' variable that tracks call count
    output = {'calls': 0}

    # create a delayed version of function that simply generates np.arrays
    # but also counts when it has been called
    @dask.delayed
    def get_array():
        nonlocal output
        output['calls'] += 1
        return np.random.rand(10, 10, 10)

    # then make a mock "timelapse" of 3D stacks
    # see https://napari.org/tutorials/applications/dask.html for details
    _list = [get_array() for fn in range(20)]
    output['stack'] = da.stack(
        [da.from_delayed(i, shape=(10, 10, 10), dtype=np.float) for i in _list]
    )
    assert output['stack'].shape == (20, 10, 10, 10)
    return output

Source File: test_dask_layers.py From napari with BSD 3-Clause "New" or "Revised" License

6 votes

def test_prevent_dask_cache(delayed_dask_stack):
    """Test that pre-emptively setting cache to zero keeps it off"""
    # the del is not required, it just shows that prior state of the cache
    # does not matter... calling resize_dask_cache(0) will permanently disable
    del utils.dask_cache
    utils.resize_dask_cache(0)

    v = viewer.ViewerModel()
    dask_stack = delayed_dask_stack['stack']
    # adding a new stack will not increase the cache size
    v.add_image(dask_stack, multiscale=False, contrast_limits=(0, 1))
    assert utils.dask_cache.cache.available_bytes == 0
    # and the cache will not be populated
    for i in range(3):
        v.dims.set_point(0, i)
    assert len(utils.dask_cache.cache.heap.heap) == 0

Source File: resample.py From satpy with GNU General Public License v3.0

6 votes

def _call_fornav(self, cols, rows, target_geo_def, data,
                     grid_coverage=0, **kwargs):
        """Wrap fornav() to run as a dask delayed."""
        num_valid_points, res = fornav(cols, rows, target_geo_def,
                                       data, **kwargs)

        if isinstance(data, tuple):
            # convert 'res' from tuple of arrays to one array
            res = np.stack(res)
            num_valid_points = sum(num_valid_points)

        grid_covered_ratio = num_valid_points / float(res.size)
        grid_covered = grid_covered_ratio > grid_coverage
        if not grid_covered:
            msg = "EWA resampling only found %f%% of the grid covered " \
                  "(need %f%%)" % (grid_covered_ratio * 100,
                                   grid_coverage * 100)
            raise RuntimeError(msg)
        LOG.debug("EWA resampling found %f%% of the grid covered" %
                  (grid_covered_ratio * 100))

        return res

Source File: resample.py From satpy with GNU General Public License v3.0

6 votes

def _call_ll2cr(self, lons, lats, target_geo_def, swath_usage=0):
        """Wrap ll2cr() for handling dask delayed calls better."""
        new_src = SwathDefinition(lons, lats)

        swath_points_in_grid, cols, rows = ll2cr(new_src, target_geo_def)
        # FIXME: How do we check swath usage/coverage if we only do this
        #        per-block
        # # Determine if enough of the input swath was used
        # grid_name = getattr(self.target_geo_def, "name", "N/A")
        # fraction_in = swath_points_in_grid / float(lons.size)
        # swath_used = fraction_in > swath_usage
        # if not swath_used:
        #     LOG.info("Data does not fit in grid %s because it only %f%% of "
        #              "the swath is used" %
        #              (grid_name, fraction_in * 100))
        #     raise RuntimeError("Data does not fit in grid %s" % (grid_name,))
        # else:
        #     LOG.debug("Data fits in grid %s and uses %f%% of the swath",
        #               grid_name, fraction_in * 100)

        return np.stack([cols, rows], axis=0)

Source File: test__diff.py From dask-image with BSD 3-Clause "New" or "Revised" License

5 votes

def test_laplace_comprehensions():
    np.random.seed(0)

    a = np.random.random((3, 12, 14))
    d = da.from_array(a, chunks=(3, 6, 7))

    l2s = [da_ndf.laplace(d[i]) for i in range(len(d))]
    l2c = [da_ndf.laplace(d[i])[None] for i in range(len(d))]

    dau.assert_eq(np.stack(l2s), da.stack(l2s))
    dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c))

Source File: test_dask_rasterio.py From dask-rasterio with BSD 3-Clause "New" or "Revised" License

5 votes

def test_read_raster_multi_band(some_raster_path):
    array = read_raster(some_raster_path, band=(1, 3))
    assert isinstance(array, da.Array)

    expected_array = da.stack([
        read_raster_band(some_raster_path, band=1),
        read_raster_band(some_raster_path, band=3)
    ])
    assert array.shape == expected_array.shape
    assert array.dtype == expected_array.dtype
    assert_array_equal(array.compute(), expected_array.compute())

Source File: read.py From dask-rasterio with BSD 3-Clause "New" or "Revised" License

5 votes

def read_raster(path, band=None, block_size=1):
    """Read all or some bands from raster

    Arguments:
        path {string} -- path to raster file

    Keyword Arguments:
        band {int, iterable(int)} -- band number or iterable of bands.
            When passing None, it reads all bands (default: {None})
        block_size {int} -- block size multiplier (default: {1})

    Returns:
        dask.array.Array -- a Dask array
    """

    if isinstance(band, int):
        return read_raster_band(path, band=band, block_size=block_size)
    else:
        if band is None:
            bands = range(1, get_band_count(path) + 1)
        else:
            bands = list(band)
        return da.stack([
            read_raster_band(path, band=band, block_size=block_size)
            for band in bands
        ])

Source File: naive_bayes.py From dask-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def _joint_log_likelihood(self, X):
        jll = []
        for i in range(np.size(self.classes_)):
            jointi = da.log(self.class_prior_[i])
            n_ij = -0.5 * da.sum(da.log(2.0 * np.pi * self.sigma_[i, :]))
            n_ij -= 0.5 * da.sum(
                ((X - self.theta_[i, :]) ** 2) / (self.sigma_[i, :]), 1
            )
            jll.append(jointi + n_ij)

        joint_log_likelihood = da.stack(jll).T
        return joint_log_likelihood

Source File: _blockwise.py From dask-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def _predict_proba_stack(part, estimators):
    # predict for a batch of estimators and stack up the results.
    batches = [estimator.predict_proba(part) for estimator in estimators]
    return np.stack(batches)

Source File: _blockwise.py From dask-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def _predict_stack(part, estimators):
    # predict for a batch of estimators and stack up the results.
    batches = [estimator.predict(part) for estimator in estimators]
    return np.vstack(batches).T

Source File: _blockwise.py From dask-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def _collect_probas(self, X):
        if isinstance(X, da.Array):
            chunks = (len(self.estimators_), X.chunks[0], len(self.classes_))
            meta = np.array([], dtype="float64")
            # (n_estimators, len(X), n_classses)
            combined = X.map_blocks(
                _predict_proba_stack,
                estimators=self.estimators_,
                chunks=chunks,
                meta=meta,
            )
        elif isinstance(X, dd._Frame):
            # TODO: replace with a _predict_proba_stack version.
            # This current raises; dask.dataframe doesn't like map_partitions that
            # return new axes.
            # meta = np.empty((len(self.estimators_), 0, len(self.classes_)),
            #                 dtype="float64")
            # combined = X.map_partitions(_predict_proba_stack, meta=meta,
            #                             estimators=self.estimators_)
            # combined._chunks = ((len(self.estimators_),),
            #                     (np.nan,) * X.npartitions,
            #                     (len(X.columns),))
            meta = np.empty((0, len(self.classes_)), dtype="float64")
            probas = [
                X.map_partitions(_predict_proba, meta=meta, estimator=estimator)
                for estimator in self.estimators_
            ]
            # TODO(https://github.com/dask/dask/issues/6177): replace with da.stack
            chunks = probas[0]._chunks
            for proba in probas:
                proba._chunks = ((1,) * len(chunks[0]), chunks[1])

            combined = da.stack(probas)
            combined._chunks = ((1,) * len(self.estimators_),) + chunks
        else:
            # ndarray, etc.
            combined = np.stack(
                [estimator.predict_proba(X) for estimator in self.estimators_]
            )

        return combined

Source File: resample.py From satpy with GNU General Public License v3.0

5 votes

def compute(self, data, **kwargs):
        """Call the resampling."""
        LOG.debug("Resampling %s", str(data.name))
        results = []
        if data.ndim == 3:
            for _i in range(data.shape[0]):
                res = self.resampler.get_count()
                results.append(res)
        else:
            res = self.resampler.get_count()
            results.append(res)

        return da.stack(results)

Source File: resample.py From satpy with GNU General Public License v3.0

5 votes

def compute(self, data, mask_all_nan=False, **kwargs):
        """Call the resampling."""
        LOG.debug("Resampling %s", str(data.name))
        results = []
        if data.ndim == 3:
            for i in range(data.shape[0]):
                res = self.resampler.get_sum(data[i, :, :],
                                             mask_all_nan=mask_all_nan)
                results.append(res)
        else:
            res = self.resampler.get_sum(data, mask_all_nan=mask_all_nan)
            results.append(res)

        return da.stack(results)

Source File: generic_image.py From satpy with GNU General Public License v3.0

5 votes

def mask_image_data(data):
    """Mask image data if alpha channel is present."""
    if data.bands.size in (2, 4):
        if not np.issubdtype(data.dtype, np.integer):
            raise ValueError("Only integer datatypes can be used as a mask.")
        mask = data.data[-1, :, :] == np.iinfo(data.dtype).min
        data = data.astype(np.float64)
        masked_data = da.stack([da.where(mask, np.nan, data.data[i, :, :])
                                for i in range(data.shape[0])])
        data.data = masked_data
        data = data.sel(bands=BANDS[data.bands.size - 1])
    return data

Source File: seviri_base.py From satpy with GNU General Public License v3.0

5 votes

def dec10216(inbuf):
    """Decode 10 bits data into 16 bits words.

    ::

        /*
         * pack 4 10-bit words in 5 bytes into 4 16-bit words
         *
         * 0       1       2       3       4       5
         * 01234567890123456789012345678901234567890
         * 0         1         2         3         4
         */
        ip = &in_buffer[i];
        op = &out_buffer[j];
        op[0] = ip[0]*4 + ip[1]/64;
        op[1] = (ip[1] & 0x3F)*16 + ip[2]/16;
        op[2] = (ip[2] & 0x0F)*64 + ip[3]/4;
        op[3] = (ip[3] & 0x03)*256 +ip[4];

    """
    arr10 = inbuf.astype(np.uint16)
    arr16_len = int(len(arr10) * 4 / 5)
    arr10_len = int((arr16_len * 5) / 4)
    arr10 = arr10[:arr10_len]  # adjust size

    # dask is slow with indexing
    arr10_0 = arr10[::5]
    arr10_1 = arr10[1::5]
    arr10_2 = arr10[2::5]
    arr10_3 = arr10[3::5]
    arr10_4 = arr10[4::5]

    arr16_0 = (arr10_0 << 2) + (arr10_1 >> 6)
    arr16_1 = ((arr10_1 & 63) << 4) + (arr10_2 >> 4)
    arr16_2 = ((arr10_2 & 15) << 6) + (arr10_3 >> 2)
    arr16_3 = ((arr10_3 & 3) << 8) + arr10_4
    arr16 = da.stack([arr16_0, arr16_1, arr16_2, arr16_3], axis=-1).ravel()
    arr16 = da.rechunk(arr16, arr16.shape[0])

    return arr16

Source File: app.py From Gather-Deployment with MIT License

5 votes

def classify(texts):
    batch_x_text = [clearstring(t) for t in texts]
    batch_x = str_idx(batch_x_text, dict_sentiment['dictionary'], 100)
    output_sentiment = sess_sentiment.run(
        logits_sentiment, feed_dict = {x_sentiment: batch_x}
    )
    labels = [sentiment_label[l] for l in np.argmax(output_sentiment, 1)]
    return da.stack(labels, axis = 0)

Source File: test__conv.py From dask-image with BSD 3-Clause "New" or "Revised" License

5 votes

def test_convolutions_comprehensions(da_func):
    np.random.seed(0)

    a = np.random.random((3, 12, 14))
    d = da.from_array(a, chunks=(3, 6, 7))

    weights = np.ones((1, 1))

    l2s = [da_func(d[i], weights) for i in range(len(d))]
    l2c = [da_func(d[i], weights)[None] for i in range(len(d))]

    dau.assert_eq(np.stack(l2s), da.stack(l2s))
    dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c))

Source File: test__generic.py From dask-image with BSD 3-Clause "New" or "Revised" License

5 votes

def test_generic_filter_comprehensions(da_func):
    da_wfunc = lambda arr: da_func(arr, lambda x: x, 1)  # noqa: E731

    np.random.seed(0)

    a = np.random.random((3, 12, 14))
    d = da.from_array(a, chunks=(3, 6, 7))

    l2s = [da_wfunc(d[i]) for i in range(len(d))]
    l2c = [da_wfunc(d[i])[None] for i in range(len(d))]

    dau.assert_eq(np.stack(l2s), da.stack(l2s))
    dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c))

Source File: test__edge.py From dask-image with BSD 3-Clause "New" or "Revised" License

5 votes

def test_edge_comprehensions(da_func):
    np.random.seed(0)

    a = np.random.random((3, 12, 14))
    d = da.from_array(a, chunks=(3, 6, 7))

    l2s = [da_func(d[i]) for i in range(len(d))]
    l2c = [da_func(d[i])[None] for i in range(len(d))]

    dau.assert_eq(np.stack(l2s), da.stack(l2s))
    dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c))

Source File: test__order.py From dask-image with BSD 3-Clause "New" or "Revised" License

5 votes

def test_order_comprehensions(da_func, kwargs):
    np.random.seed(0)

    a = np.random.random((3, 12, 14))
    d = da.from_array(a, chunks=(3, 6, 7))

    l2s = [da_func(d[i], **kwargs) for i in range(len(d))]
    l2c = [da_func(d[i], **kwargs)[None] for i in range(len(d))]

    dau.assert_eq(np.stack(l2s), da.stack(l2s))
    dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c))

Source File: test__smooth.py From dask-image with BSD 3-Clause "New" or "Revised" License

5 votes

def test_uniform_comprehensions():
    da_func = lambda arr: da_ndf.uniform_filter(arr, 1, origin=0)  # noqa: E731

    np.random.seed(0)

    a = np.random.random((3, 12, 14))
    d = da.from_array(a, chunks=(3, 6, 7))

    l2s = [da_func(d[i]) for i in range(len(d))]
    l2c = [da_func(d[i])[None] for i in range(len(d))]

    dau.assert_eq(np.stack(l2s), da.stack(l2s))
    dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c))

Source File: kd_tree.py From pyresample with GNU Lesser General Public License v3.0

5 votes

def lonlat2xyz(lons, lats):
    R = 6370997.0
    x_coords = R * np.cos(np.deg2rad(lats)) * np.cos(np.deg2rad(lons))
    y_coords = R * np.cos(np.deg2rad(lats)) * np.sin(np.deg2rad(lons))
    z_coords = R * np.sin(np.deg2rad(lats))

    stack = np.stack if isinstance(lons, np.ndarray) else da.stack
    return stack(
        (x_coords.ravel(), y_coords.ravel(), z_coords.ravel()), axis=-1)

Source File: xarr.py From pyresample with GNU Lesser General Public License v3.0

5 votes

def lonlat2xyz(lons, lats):
    """Convert geographic coordinates to cartesian 3D coordinates."""
    R = 6370997.0
    x_coords = R * da.cos(da.deg2rad(lats)) * da.cos(da.deg2rad(lons))
    y_coords = R * da.cos(da.deg2rad(lats)) * da.sin(da.deg2rad(lons))
    z_coords = R * da.sin(da.deg2rad(lats))

    return da.stack(
        (x_coords.ravel(), y_coords.ravel(), z_coords.ravel()), axis=-1)

Source File: test_dask_layers.py From napari with BSD 3-Clause "New" or "Revised" License

5 votes

def test_dask_optimized_slicing(delayed_dask_stack, monkeypatch):
    """Test that dask_configure reduces compute with dask stacks."""

    # add dask stack to the viewer, making sure to pass multiscale and clims
    v = viewer.ViewerModel()
    dask_stack = delayed_dask_stack['stack']
    v.add_image(dask_stack, multiscale=False, contrast_limits=(0, 1))
    assert delayed_dask_stack['calls'] == 1  # the first stack will be loaded

    # changing the Z plane should never incur calls
    # since the stack has already been loaded (& it is chunked as a 3D array)
    for i in range(3):
        v.dims.set_point(1, i)
        assert delayed_dask_stack['calls'] == 1  # still just the first call

    # changing the timepoint will, of course, incur some compute calls
    v.dims.set_point(0, 1)
    assert delayed_dask_stack['calls'] == 2
    v.dims.set_point(0, 2)
    assert delayed_dask_stack['calls'] == 3

    # but going back to previous timepoints should not, since they are cached
    v.dims.set_point(0, 1)
    v.dims.set_point(0, 0)
    assert delayed_dask_stack['calls'] == 3
    v.dims.set_point(0, 3)
    assert delayed_dask_stack['calls'] == 4

Source File: npy.py From intake with BSD 2-Clause "Simplified" License

5 votes

def _get_schema(self):
        from fsspec import open_files
        import dask.array as da
        if self._arr is None:
            path = self._get_cache(self.path)[0]

            files = open_files(path, 'rb', compression=None,
                               **self.storage)
            if self.shape is None:
                arr = NumpyAccess(files[0])
                self.shape = arr.shape
                self.dtype = arr.dtype
                arrs = [arr] + [NumpyAccess(f, self.shape, self.dtype,
                                            offset=arr.offset)
                                for f in files[1:]]
            else:
                arrs = [NumpyAccess(f, self.shape, self.dtype)
                        for f in files]
            self.chunks = (self._chunks, ) + (-1, ) * (len(self.shape) - 1)
            self._arrs = [da.from_array(arr, self.chunks) for arr in arrs]

            if len(self._arrs) > 1:
                self._arr = da.stack(self._arrs)
            else:
                self._arr = self._arrs[0]
            self.chunks = self._arr.chunks
        return Schema(dtype=str(self.dtype), shape=self.shape,
                      extra_metadata=self.metadata,
                      npartitions=self._arr.npartitions,
                      chunks=self.chunks)

Source File: resample.py From satpy with GNU General Public License v3.0

4 votes

def resample(self, data, **kwargs):
        """Resample `data` by calling `precompute` and `compute` methods.

        Args:
            data (xarray.DataArray): Data to be resampled

        Returns (xarray.DataArray): Data resampled to the target area

        """
        self.precompute(**kwargs)
        attrs = data.attrs.copy()
        data_arr = data.data
        if data.ndim == 3 and data.dims[0] == 'bands':
            dims = ('bands', 'y', 'x')
        # Both one and two dimensional input data results in 2D output
        elif data.ndim in (1, 2):
            dims = ('y', 'x')
        else:
            dims = data.dims
        result = self.compute(data_arr, **kwargs)
        coords = {}
        if 'bands' in data.coords:
            coords['bands'] = data.coords['bands']
        # Fractions are returned in a dict
        elif isinstance(result, dict):
            coords['categories'] = sorted(result.keys())
            dims = ('categories', 'y', 'x')
            new_result = []
            for cat in coords['categories']:
                new_result.append(result[cat])
            result = da.stack(new_result)
        if result.ndim > len(dims):
            result = da.squeeze(result)

        # Adjust some attributes
        if "BucketFraction" in str(self):
            attrs['units'] = ''
            attrs['calibration'] = ''
            attrs['standard_name'] = 'area_fraction'
        elif "BucketCount" in str(self):
            attrs['units'] = ''
            attrs['calibration'] = ''
            attrs['standard_name'] = 'number_of_observations'

        result = xr.DataArray(result, dims=dims, coords=coords,
                              attrs=attrs)

        return result

Source File: test_dask_layers.py From napari with BSD 3-Clause "New" or "Revised" License

4 votes

def test_dask_cache_resizing(delayed_dask_stack):
    """Test that we can spin up, resize, and spin down the cache."""
    # add dask stack to the viewer, making sure to pass multiscale and clims
    utils.dask_cache = None

    v = viewer.ViewerModel()
    dask_stack = delayed_dask_stack['stack']

    # adding a new stack should spin up a cache
    v.add_image(dask_stack, multiscale=False, contrast_limits=(0, 1))
    assert utils.dask_cache.cache.available_bytes > 0
    # make sure the cache actually has been populated
    assert len(utils.dask_cache.cache.heap.heap) > 0

    # we can resize that cache back to 0 bytes
    utils.resize_dask_cache(0)
    assert utils.dask_cache.cache.available_bytes == 0

    # adding a 2nd stack should not adjust the cache size once created
    v.add_image(dask_stack, multiscale=False, contrast_limits=(0, 1))
    assert utils.dask_cache.cache.available_bytes == 0
    # and the cache will remain empty regardless of what we do
    for i in range(3):
        v.dims.set_point(1, i)
    assert len(utils.dask_cache.cache.heap.heap) == 0

    # but we can always spin it up again
    utils.resize_dask_cache(1e4)
    assert utils.dask_cache.cache.available_bytes == 1e4
    # and adding a new image doesn't change the size
    v.add_image(dask_stack, multiscale=False, contrast_limits=(0, 1))
    assert utils.dask_cache.cache.available_bytes == 1e4
    # but the cache heap is getting populated again
    for i in range(3):
        v.dims.set_point(0, i)
    assert len(utils.dask_cache.cache.heap.heap) > 0

    # however, if the dask_cache attribute is deleted entirely (or set to None)
    # we will have no memory of it ever having been created.
    # and adding a new stack will spin up a cache
    del utils.dask_cache
    v.add_image(dask_stack, multiscale=False, contrast_limits=(0, 1))
    assert utils.dask_cache.cache.available_bytes > 0

Python dask.array.stack() Examples