Python dask.array.stack() Examples
The following are 28
code examples of dask.array.stack().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
dask.array
, or try the search function
.
Example #1
Source File: transform.py From nbodykit with GNU General Public License v3.0 | 6 votes |
def StackColumns(*cols): """ Stack the input dask arrays vertically, column by column. This uses :func:`dask.array.vstack`. Parameters ---------- *cols : :class:`dask.array.Array` the dask arrays to stack vertically together Returns ------- :class:`dask.array.Array` : the dask array where columns correspond to the input arrays Raises ------ TypeError If the input columns are not dask arrays """ cols = da.broadcast_arrays(*cols) return da.vstack(cols).T
Example #2
Source File: test_dask_layers.py From napari with BSD 3-Clause "New" or "Revised" License | 6 votes |
def delayed_dask_stack(): """A 4D (20, 10, 10, 10) delayed dask array, simulates disk io.""" # we will return a dict with a 'calls' variable that tracks call count output = {'calls': 0} # create a delayed version of function that simply generates np.arrays # but also counts when it has been called @dask.delayed def get_array(): nonlocal output output['calls'] += 1 return np.random.rand(10, 10, 10) # then make a mock "timelapse" of 3D stacks # see https://napari.org/tutorials/applications/dask.html for details _list = [get_array() for fn in range(20)] output['stack'] = da.stack( [da.from_delayed(i, shape=(10, 10, 10), dtype=np.float) for i in _list] ) assert output['stack'].shape == (20, 10, 10, 10) return output
Example #3
Source File: test_dask_layers.py From napari with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_prevent_dask_cache(delayed_dask_stack): """Test that pre-emptively setting cache to zero keeps it off""" # the del is not required, it just shows that prior state of the cache # does not matter... calling resize_dask_cache(0) will permanently disable del utils.dask_cache utils.resize_dask_cache(0) v = viewer.ViewerModel() dask_stack = delayed_dask_stack['stack'] # adding a new stack will not increase the cache size v.add_image(dask_stack, multiscale=False, contrast_limits=(0, 1)) assert utils.dask_cache.cache.available_bytes == 0 # and the cache will not be populated for i in range(3): v.dims.set_point(0, i) assert len(utils.dask_cache.cache.heap.heap) == 0
Example #4
Source File: resample.py From satpy with GNU General Public License v3.0 | 6 votes |
def _call_fornav(self, cols, rows, target_geo_def, data, grid_coverage=0, **kwargs): """Wrap fornav() to run as a dask delayed.""" num_valid_points, res = fornav(cols, rows, target_geo_def, data, **kwargs) if isinstance(data, tuple): # convert 'res' from tuple of arrays to one array res = np.stack(res) num_valid_points = sum(num_valid_points) grid_covered_ratio = num_valid_points / float(res.size) grid_covered = grid_covered_ratio > grid_coverage if not grid_covered: msg = "EWA resampling only found %f%% of the grid covered " \ "(need %f%%)" % (grid_covered_ratio * 100, grid_coverage * 100) raise RuntimeError(msg) LOG.debug("EWA resampling found %f%% of the grid covered" % (grid_covered_ratio * 100)) return res
Example #5
Source File: resample.py From satpy with GNU General Public License v3.0 | 6 votes |
def _call_ll2cr(self, lons, lats, target_geo_def, swath_usage=0): """Wrap ll2cr() for handling dask delayed calls better.""" new_src = SwathDefinition(lons, lats) swath_points_in_grid, cols, rows = ll2cr(new_src, target_geo_def) # FIXME: How do we check swath usage/coverage if we only do this # per-block # # Determine if enough of the input swath was used # grid_name = getattr(self.target_geo_def, "name", "N/A") # fraction_in = swath_points_in_grid / float(lons.size) # swath_used = fraction_in > swath_usage # if not swath_used: # LOG.info("Data does not fit in grid %s because it only %f%% of " # "the swath is used" % # (grid_name, fraction_in * 100)) # raise RuntimeError("Data does not fit in grid %s" % (grid_name,)) # else: # LOG.debug("Data fits in grid %s and uses %f%% of the swath", # grid_name, fraction_in * 100) return np.stack([cols, rows], axis=0)
Example #6
Source File: test__diff.py From dask-image with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_laplace_comprehensions(): np.random.seed(0) a = np.random.random((3, 12, 14)) d = da.from_array(a, chunks=(3, 6, 7)) l2s = [da_ndf.laplace(d[i]) for i in range(len(d))] l2c = [da_ndf.laplace(d[i])[None] for i in range(len(d))] dau.assert_eq(np.stack(l2s), da.stack(l2s)) dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c))
Example #7
Source File: test_dask_rasterio.py From dask-rasterio with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_read_raster_multi_band(some_raster_path): array = read_raster(some_raster_path, band=(1, 3)) assert isinstance(array, da.Array) expected_array = da.stack([ read_raster_band(some_raster_path, band=1), read_raster_band(some_raster_path, band=3) ]) assert array.shape == expected_array.shape assert array.dtype == expected_array.dtype assert_array_equal(array.compute(), expected_array.compute())
Example #8
Source File: read.py From dask-rasterio with BSD 3-Clause "New" or "Revised" License | 5 votes |
def read_raster(path, band=None, block_size=1): """Read all or some bands from raster Arguments: path {string} -- path to raster file Keyword Arguments: band {int, iterable(int)} -- band number or iterable of bands. When passing None, it reads all bands (default: {None}) block_size {int} -- block size multiplier (default: {1}) Returns: dask.array.Array -- a Dask array """ if isinstance(band, int): return read_raster_band(path, band=band, block_size=block_size) else: if band is None: bands = range(1, get_band_count(path) + 1) else: bands = list(band) return da.stack([ read_raster_band(path, band=band, block_size=block_size) for band in bands ])
Example #9
Source File: naive_bayes.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _joint_log_likelihood(self, X): jll = [] for i in range(np.size(self.classes_)): jointi = da.log(self.class_prior_[i]) n_ij = -0.5 * da.sum(da.log(2.0 * np.pi * self.sigma_[i, :])) n_ij -= 0.5 * da.sum( ((X - self.theta_[i, :]) ** 2) / (self.sigma_[i, :]), 1 ) jll.append(jointi + n_ij) joint_log_likelihood = da.stack(jll).T return joint_log_likelihood
Example #10
Source File: _blockwise.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _predict_proba_stack(part, estimators): # predict for a batch of estimators and stack up the results. batches = [estimator.predict_proba(part) for estimator in estimators] return np.stack(batches)
Example #11
Source File: _blockwise.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _predict_stack(part, estimators): # predict for a batch of estimators and stack up the results. batches = [estimator.predict(part) for estimator in estimators] return np.vstack(batches).T
Example #12
Source File: _blockwise.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _collect_probas(self, X): if isinstance(X, da.Array): chunks = (len(self.estimators_), X.chunks[0], len(self.classes_)) meta = np.array([], dtype="float64") # (n_estimators, len(X), n_classses) combined = X.map_blocks( _predict_proba_stack, estimators=self.estimators_, chunks=chunks, meta=meta, ) elif isinstance(X, dd._Frame): # TODO: replace with a _predict_proba_stack version. # This current raises; dask.dataframe doesn't like map_partitions that # return new axes. # meta = np.empty((len(self.estimators_), 0, len(self.classes_)), # dtype="float64") # combined = X.map_partitions(_predict_proba_stack, meta=meta, # estimators=self.estimators_) # combined._chunks = ((len(self.estimators_),), # (np.nan,) * X.npartitions, # (len(X.columns),)) meta = np.empty((0, len(self.classes_)), dtype="float64") probas = [ X.map_partitions(_predict_proba, meta=meta, estimator=estimator) for estimator in self.estimators_ ] # TODO(https://github.com/dask/dask/issues/6177): replace with da.stack chunks = probas[0]._chunks for proba in probas: proba._chunks = ((1,) * len(chunks[0]), chunks[1]) combined = da.stack(probas) combined._chunks = ((1,) * len(self.estimators_),) + chunks else: # ndarray, etc. combined = np.stack( [estimator.predict_proba(X) for estimator in self.estimators_] ) return combined
Example #13
Source File: resample.py From satpy with GNU General Public License v3.0 | 5 votes |
def compute(self, data, **kwargs): """Call the resampling.""" LOG.debug("Resampling %s", str(data.name)) results = [] if data.ndim == 3: for _i in range(data.shape[0]): res = self.resampler.get_count() results.append(res) else: res = self.resampler.get_count() results.append(res) return da.stack(results)
Example #14
Source File: resample.py From satpy with GNU General Public License v3.0 | 5 votes |
def compute(self, data, mask_all_nan=False, **kwargs): """Call the resampling.""" LOG.debug("Resampling %s", str(data.name)) results = [] if data.ndim == 3: for i in range(data.shape[0]): res = self.resampler.get_sum(data[i, :, :], mask_all_nan=mask_all_nan) results.append(res) else: res = self.resampler.get_sum(data, mask_all_nan=mask_all_nan) results.append(res) return da.stack(results)
Example #15
Source File: generic_image.py From satpy with GNU General Public License v3.0 | 5 votes |
def mask_image_data(data): """Mask image data if alpha channel is present.""" if data.bands.size in (2, 4): if not np.issubdtype(data.dtype, np.integer): raise ValueError("Only integer datatypes can be used as a mask.") mask = data.data[-1, :, :] == np.iinfo(data.dtype).min data = data.astype(np.float64) masked_data = da.stack([da.where(mask, np.nan, data.data[i, :, :]) for i in range(data.shape[0])]) data.data = masked_data data = data.sel(bands=BANDS[data.bands.size - 1]) return data
Example #16
Source File: seviri_base.py From satpy with GNU General Public License v3.0 | 5 votes |
def dec10216(inbuf): """Decode 10 bits data into 16 bits words. :: /* * pack 4 10-bit words in 5 bytes into 4 16-bit words * * 0 1 2 3 4 5 * 01234567890123456789012345678901234567890 * 0 1 2 3 4 */ ip = &in_buffer[i]; op = &out_buffer[j]; op[0] = ip[0]*4 + ip[1]/64; op[1] = (ip[1] & 0x3F)*16 + ip[2]/16; op[2] = (ip[2] & 0x0F)*64 + ip[3]/4; op[3] = (ip[3] & 0x03)*256 +ip[4]; """ arr10 = inbuf.astype(np.uint16) arr16_len = int(len(arr10) * 4 / 5) arr10_len = int((arr16_len * 5) / 4) arr10 = arr10[:arr10_len] # adjust size # dask is slow with indexing arr10_0 = arr10[::5] arr10_1 = arr10[1::5] arr10_2 = arr10[2::5] arr10_3 = arr10[3::5] arr10_4 = arr10[4::5] arr16_0 = (arr10_0 << 2) + (arr10_1 >> 6) arr16_1 = ((arr10_1 & 63) << 4) + (arr10_2 >> 4) arr16_2 = ((arr10_2 & 15) << 6) + (arr10_3 >> 2) arr16_3 = ((arr10_3 & 3) << 8) + arr10_4 arr16 = da.stack([arr16_0, arr16_1, arr16_2, arr16_3], axis=-1).ravel() arr16 = da.rechunk(arr16, arr16.shape[0]) return arr16
Example #17
Source File: app.py From Gather-Deployment with MIT License | 5 votes |
def classify(texts): batch_x_text = [clearstring(t) for t in texts] batch_x = str_idx(batch_x_text, dict_sentiment['dictionary'], 100) output_sentiment = sess_sentiment.run( logits_sentiment, feed_dict = {x_sentiment: batch_x} ) labels = [sentiment_label[l] for l in np.argmax(output_sentiment, 1)] return da.stack(labels, axis = 0)
Example #18
Source File: test__conv.py From dask-image with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_convolutions_comprehensions(da_func): np.random.seed(0) a = np.random.random((3, 12, 14)) d = da.from_array(a, chunks=(3, 6, 7)) weights = np.ones((1, 1)) l2s = [da_func(d[i], weights) for i in range(len(d))] l2c = [da_func(d[i], weights)[None] for i in range(len(d))] dau.assert_eq(np.stack(l2s), da.stack(l2s)) dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c))
Example #19
Source File: test__generic.py From dask-image with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_generic_filter_comprehensions(da_func): da_wfunc = lambda arr: da_func(arr, lambda x: x, 1) # noqa: E731 np.random.seed(0) a = np.random.random((3, 12, 14)) d = da.from_array(a, chunks=(3, 6, 7)) l2s = [da_wfunc(d[i]) for i in range(len(d))] l2c = [da_wfunc(d[i])[None] for i in range(len(d))] dau.assert_eq(np.stack(l2s), da.stack(l2s)) dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c))
Example #20
Source File: test__edge.py From dask-image with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_edge_comprehensions(da_func): np.random.seed(0) a = np.random.random((3, 12, 14)) d = da.from_array(a, chunks=(3, 6, 7)) l2s = [da_func(d[i]) for i in range(len(d))] l2c = [da_func(d[i])[None] for i in range(len(d))] dau.assert_eq(np.stack(l2s), da.stack(l2s)) dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c))
Example #21
Source File: test__order.py From dask-image with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_order_comprehensions(da_func, kwargs): np.random.seed(0) a = np.random.random((3, 12, 14)) d = da.from_array(a, chunks=(3, 6, 7)) l2s = [da_func(d[i], **kwargs) for i in range(len(d))] l2c = [da_func(d[i], **kwargs)[None] for i in range(len(d))] dau.assert_eq(np.stack(l2s), da.stack(l2s)) dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c))
Example #22
Source File: test__smooth.py From dask-image with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_uniform_comprehensions(): da_func = lambda arr: da_ndf.uniform_filter(arr, 1, origin=0) # noqa: E731 np.random.seed(0) a = np.random.random((3, 12, 14)) d = da.from_array(a, chunks=(3, 6, 7)) l2s = [da_func(d[i]) for i in range(len(d))] l2c = [da_func(d[i])[None] for i in range(len(d))] dau.assert_eq(np.stack(l2s), da.stack(l2s)) dau.assert_eq(np.concatenate(l2c), da.concatenate(l2c))
Example #23
Source File: kd_tree.py From pyresample with GNU Lesser General Public License v3.0 | 5 votes |
def lonlat2xyz(lons, lats): R = 6370997.0 x_coords = R * np.cos(np.deg2rad(lats)) * np.cos(np.deg2rad(lons)) y_coords = R * np.cos(np.deg2rad(lats)) * np.sin(np.deg2rad(lons)) z_coords = R * np.sin(np.deg2rad(lats)) stack = np.stack if isinstance(lons, np.ndarray) else da.stack return stack( (x_coords.ravel(), y_coords.ravel(), z_coords.ravel()), axis=-1)
Example #24
Source File: xarr.py From pyresample with GNU Lesser General Public License v3.0 | 5 votes |
def lonlat2xyz(lons, lats): """Convert geographic coordinates to cartesian 3D coordinates.""" R = 6370997.0 x_coords = R * da.cos(da.deg2rad(lats)) * da.cos(da.deg2rad(lons)) y_coords = R * da.cos(da.deg2rad(lats)) * da.sin(da.deg2rad(lons)) z_coords = R * da.sin(da.deg2rad(lats)) return da.stack( (x_coords.ravel(), y_coords.ravel(), z_coords.ravel()), axis=-1)
Example #25
Source File: test_dask_layers.py From napari with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_dask_optimized_slicing(delayed_dask_stack, monkeypatch): """Test that dask_configure reduces compute with dask stacks.""" # add dask stack to the viewer, making sure to pass multiscale and clims v = viewer.ViewerModel() dask_stack = delayed_dask_stack['stack'] v.add_image(dask_stack, multiscale=False, contrast_limits=(0, 1)) assert delayed_dask_stack['calls'] == 1 # the first stack will be loaded # changing the Z plane should never incur calls # since the stack has already been loaded (& it is chunked as a 3D array) for i in range(3): v.dims.set_point(1, i) assert delayed_dask_stack['calls'] == 1 # still just the first call # changing the timepoint will, of course, incur some compute calls v.dims.set_point(0, 1) assert delayed_dask_stack['calls'] == 2 v.dims.set_point(0, 2) assert delayed_dask_stack['calls'] == 3 # but going back to previous timepoints should not, since they are cached v.dims.set_point(0, 1) v.dims.set_point(0, 0) assert delayed_dask_stack['calls'] == 3 v.dims.set_point(0, 3) assert delayed_dask_stack['calls'] == 4
Example #26
Source File: npy.py From intake with BSD 2-Clause "Simplified" License | 5 votes |
def _get_schema(self): from fsspec import open_files import dask.array as da if self._arr is None: path = self._get_cache(self.path)[0] files = open_files(path, 'rb', compression=None, **self.storage) if self.shape is None: arr = NumpyAccess(files[0]) self.shape = arr.shape self.dtype = arr.dtype arrs = [arr] + [NumpyAccess(f, self.shape, self.dtype, offset=arr.offset) for f in files[1:]] else: arrs = [NumpyAccess(f, self.shape, self.dtype) for f in files] self.chunks = (self._chunks, ) + (-1, ) * (len(self.shape) - 1) self._arrs = [da.from_array(arr, self.chunks) for arr in arrs] if len(self._arrs) > 1: self._arr = da.stack(self._arrs) else: self._arr = self._arrs[0] self.chunks = self._arr.chunks return Schema(dtype=str(self.dtype), shape=self.shape, extra_metadata=self.metadata, npartitions=self._arr.npartitions, chunks=self.chunks)
Example #27
Source File: resample.py From satpy with GNU General Public License v3.0 | 4 votes |
def resample(self, data, **kwargs): """Resample `data` by calling `precompute` and `compute` methods. Args: data (xarray.DataArray): Data to be resampled Returns (xarray.DataArray): Data resampled to the target area """ self.precompute(**kwargs) attrs = data.attrs.copy() data_arr = data.data if data.ndim == 3 and data.dims[0] == 'bands': dims = ('bands', 'y', 'x') # Both one and two dimensional input data results in 2D output elif data.ndim in (1, 2): dims = ('y', 'x') else: dims = data.dims result = self.compute(data_arr, **kwargs) coords = {} if 'bands' in data.coords: coords['bands'] = data.coords['bands'] # Fractions are returned in a dict elif isinstance(result, dict): coords['categories'] = sorted(result.keys()) dims = ('categories', 'y', 'x') new_result = [] for cat in coords['categories']: new_result.append(result[cat]) result = da.stack(new_result) if result.ndim > len(dims): result = da.squeeze(result) # Adjust some attributes if "BucketFraction" in str(self): attrs['units'] = '' attrs['calibration'] = '' attrs['standard_name'] = 'area_fraction' elif "BucketCount" in str(self): attrs['units'] = '' attrs['calibration'] = '' attrs['standard_name'] = 'number_of_observations' result = xr.DataArray(result, dims=dims, coords=coords, attrs=attrs) return result
Example #28
Source File: test_dask_layers.py From napari with BSD 3-Clause "New" or "Revised" License | 4 votes |
def test_dask_cache_resizing(delayed_dask_stack): """Test that we can spin up, resize, and spin down the cache.""" # add dask stack to the viewer, making sure to pass multiscale and clims utils.dask_cache = None v = viewer.ViewerModel() dask_stack = delayed_dask_stack['stack'] # adding a new stack should spin up a cache v.add_image(dask_stack, multiscale=False, contrast_limits=(0, 1)) assert utils.dask_cache.cache.available_bytes > 0 # make sure the cache actually has been populated assert len(utils.dask_cache.cache.heap.heap) > 0 # we can resize that cache back to 0 bytes utils.resize_dask_cache(0) assert utils.dask_cache.cache.available_bytes == 0 # adding a 2nd stack should not adjust the cache size once created v.add_image(dask_stack, multiscale=False, contrast_limits=(0, 1)) assert utils.dask_cache.cache.available_bytes == 0 # and the cache will remain empty regardless of what we do for i in range(3): v.dims.set_point(1, i) assert len(utils.dask_cache.cache.heap.heap) == 0 # but we can always spin it up again utils.resize_dask_cache(1e4) assert utils.dask_cache.cache.available_bytes == 1e4 # and adding a new image doesn't change the size v.add_image(dask_stack, multiscale=False, contrast_limits=(0, 1)) assert utils.dask_cache.cache.available_bytes == 1e4 # but the cache heap is getting populated again for i in range(3): v.dims.set_point(0, i) assert len(utils.dask_cache.cache.heap.heap) > 0 # however, if the dask_cache attribute is deleted entirely (or set to None) # we will have no memory of it ever having been created. # and adding a new stack will spin up a cache del utils.dask_cache v.add_image(dask_stack, multiscale=False, contrast_limits=(0, 1)) assert utils.dask_cache.cache.available_bytes > 0