Python dask.array.from_delayed() Examples
The following are 24
code examples of dask.array.from_delayed().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
dask.array
, or try the search function
.
Example #1
Source File: test_dask_layers.py From napari with BSD 3-Clause "New" or "Revised" License | 6 votes |
def delayed_dask_stack(): """A 4D (20, 10, 10, 10) delayed dask array, simulates disk io.""" # we will return a dict with a 'calls' variable that tracks call count output = {'calls': 0} # create a delayed version of function that simply generates np.arrays # but also counts when it has been called @dask.delayed def get_array(): nonlocal output output['calls'] += 1 return np.random.rand(10, 10, 10) # then make a mock "timelapse" of 3D stacks # see https://napari.org/tutorials/applications/dask.html for details _list = [get_array() for fn in range(20)] output['stack'] = da.stack( [da.from_delayed(i, shape=(10, 10, 10), dtype=np.float) for i in _list] ) assert output['stack'].shape == (20, 10, 10, 10) return output
Example #2
Source File: viirs.py From satpy with GNU General Public License v3.0 | 6 votes |
def __call__(self, datasets, **info): """Create the composite by scaling the DNB data using a histogram equalization method. :param datasets: 2-element tuple (Day/Night Band data, Solar Zenith Angle data) :param **info: Miscellaneous metadata for the newly produced composite """ if len(datasets) != 2: raise ValueError("Expected 2 datasets, got %d" % (len(datasets), )) dnb_data = datasets[0] sza_data = datasets[1] delayed = dask.delayed(self._run_dnb_normalization)(dnb_data.data, sza_data.data) output_dataset = dnb_data.copy() output_data = da.from_delayed(delayed, dnb_data.shape, dnb_data.dtype) output_dataset.data = output_data.rechunk(dnb_data.data.chunks) info = dnb_data.attrs.copy() info.update(self.attrs) info["standard_name"] = "equalized_radiance" info["mode"] = "L" output_dataset.attrs = info return output_dataset
Example #3
Source File: eps_l1b.py From satpy with GNU General Public License v3.0 | 6 votes |
def get_full_lonlats(self): """Get the interpolated lons/lats.""" if self.lons is not None and self.lats is not None: return self.lons, self.lats raw_lats = np.hstack((self["EARTH_LOCATION_FIRST"][:, [0]], self["EARTH_LOCATIONS"][:, :, 0], self["EARTH_LOCATION_LAST"][:, [0]])) raw_lons = np.hstack((self["EARTH_LOCATION_FIRST"][:, [1]], self["EARTH_LOCATIONS"][:, :, 1], self["EARTH_LOCATION_LAST"][:, [1]])) self.lons, self.lats = self._get_full_lonlats(raw_lons, raw_lats) self.lons = da.from_delayed(self.lons, dtype=self["EARTH_LOCATIONS"].dtype, shape=(self.scanlines, self.pixels)) self.lats = da.from_delayed(self.lats, dtype=self["EARTH_LOCATIONS"].dtype, shape=(self.scanlines, self.pixels)) return self.lons, self.lats
Example #4
Source File: xrft.py From xrft with MIT License | 6 votes |
def _apply_window(da, dims, window_type='hanning'): """Creating windows in dimensions dims.""" if window_type not in ['hanning']: raise NotImplementedError("Only hanning window is supported for now.") numpy_win_func = getattr(np, window_type) if da.chunks: def dask_win_func(n): return dsar.from_delayed( delayed(numpy_win_func, pure=True)(n), (n,), float) win_func = dask_win_func else: win_func = numpy_win_func windows = [xr.DataArray(win_func(len(da[d])), dims=da[d].dims, coords=da[d].coords) for d in dims] return da * reduce(operator.mul, windows[::-1])
Example #5
Source File: __init__.py From satpy with GNU General Public License v3.0 | 6 votes |
def three_d_effect(img, **kwargs): """Create 3D effect using convolution.""" w = kwargs.get('weight', 1) LOG.debug("Applying 3D effect with weight %.2f", w) kernel = np.array([[-w, 0, w], [-w, 1, w], [-w, 0, w]]) mode = kwargs.get('convolve_mode', 'same') def func(band_data, kernel=kernel, mode=mode, index=None): del index delay = dask.delayed(_three_d_effect_delayed)(band_data, kernel, mode) new_data = da.from_delayed(delay, shape=band_data.shape, dtype=band_data.dtype) return new_data return apply_enhancement(img.data, func, separate=True, pass_dask=True)
Example #6
Source File: wrappers.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _first_block(dask_object): """Extract the first block / partition from a dask object """ if isinstance(dask_object, da.Array): if dask_object.ndim > 1 and dask_object.numblocks[-1] != 1: raise NotImplementedError( "IID estimators require that the array " "blocked only along the first axis. " "Rechunk your array before fitting." ) shape = (dask_object.chunks[0][0],) if dask_object.ndim > 1: shape = shape + (dask_object.chunks[1][0],) return da.from_delayed( dask_object.to_delayed().flatten()[0], shape, dask_object.dtype ) if isinstance(dask_object, dd._Frame): return dask_object.get_partition(0) else: return dask_object
Example #7
Source File: __init__.py From satpy with GNU General Public License v3.0 | 5 votes |
def lookup(img, **kwargs): """Assign values to channels based on a table.""" luts = np.array(kwargs['luts'], dtype=np.float32) / 255.0 def func(band_data, luts=luts, index=-1): # NaN/null values will become 0 lut = luts[:, index] if len(luts.shape) == 2 else luts band_data = band_data.clip(0, lut.size - 1).astype(np.uint8) new_delay = dask.delayed(_lookup_delayed)(lut, band_data) new_data = da.from_delayed(new_delay, shape=band_data.shape, dtype=luts.dtype) return new_data return apply_enhancement(img.data, func, separate=True, pass_dask=True)
Example #8
Source File: naive_bayes.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def fit(self, X, y=None): if self.classes is None: # TODO: delayed (self.classes_,) = dask.compute(np.unique(y)) thetas = [] sigmas = [] counts = [] N, P = X.shape K = len(self.classes_) for i, c in enumerate(self.classes_): X_c = X[y == c] thetas.append(X_c.mean(axis=0)) sigmas.append(X_c.var(axis=0)) counts.append(delayed(len)(X_c)) thetas = da.from_delayed(delayed(np.array)(thetas), (K, P), np.float64) sigmas = da.from_delayed(delayed(np.array)(sigmas), (K, P), np.float64) counts = da.from_delayed( delayed(np.array)(counts, np.float64), (P,), np.float64 ) priors = counts / N # Should these be explicitly cached on self? self.theta_ = thetas self.sigma_ = sigmas self.class_count_ = counts self.class_prior_ = priors return self
Example #9
Source File: data.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def fit( self, X: Union[ArrayLike, DataFrameType], y: Optional[Union[ArrayLike, SeriesType]] = None, ) -> "RobustScaler": q_min, q_max = self.quantile_range if not 0 <= q_min <= q_max <= 100: raise ValueError("Invalid quantile range: %s" % str(self.quantile_range)) if isinstance(X, dd.DataFrame): n_columns = len(X.columns) partition_lengths = X.map_partitions(len).compute() dtype = np.find_common_type(X.dtypes, []) blocks = X.to_delayed() X = da.vstack( [ da.from_delayed( block.values, shape=(length, n_columns), dtype=dtype ) for block, length in zip(blocks, partition_lengths) ] ) quantiles: Any = [da.percentile(col, [q_min, 50.0, q_max]) for col in X.T] quantiles = da.vstack(quantiles).compute() self.center_: List[float] = quantiles[:, 1] self.scale_: List[float] = quantiles[:, 2] - quantiles[:, 0] self.scale_ = _handle_zeros_in_scale(self.scale_, copy=False) self.n_features_in_ = X.shape[1] return self
Example #10
Source File: incremental_pca.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def svd_flip(u, v): """ This is a replicate of svd_flip() which calls svd_flip_fixed() instead of skm.svd_flip() """ u2, v2 = delayed(_svd_flip_copy, nout=2)(u, v, False) u = da.from_delayed(u2, shape=u.shape, dtype=u.dtype) v = da.from_delayed(v2, shape=v.shape, dtype=v.dtype) return u, v
Example #11
Source File: _split.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _blockwise_slice(arr, idx): """Slice an array that is blockwise-aligned with idx. Parameters ---------- arr : Dask array idx : Dask array Should have the following properties * Same blocks as `arr` along the first dimension * Contains only integers * Each block's values should be between ``[0, len(block))`` Returns ------- sliced : dask.Array """ objs = [] offsets = np.hstack([0, np.cumsum(arr.chunks[0])[:-1]]) for i, (x, idx2) in enumerate( zip(arr.to_delayed().ravel(), idx.to_delayed().ravel()) ): idx3 = idx2 - offsets[i] objs.append(x[idx3]) shapes = idx.chunks[0] if arr.ndim == 2: P = arr.shape[1] shapes = [(x, P) for x in shapes] else: shapes = [(x,) for x in shapes] sliced = da.concatenate( [ da.from_delayed(x, shape=shape, dtype=arr.dtype) for x, shape in zip(objs, shapes) ] ) return sliced
Example #12
Source File: _split.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _split_blockwise(self, X, seeds): chunks = X.chunks[0] train_pct, test_pct = _maybe_normalize_split_sizes( self.train_size, self.test_size ) sizes = [_validate_shuffle_split(c, test_pct, train_pct) for c in chunks] objs = [ dask.delayed(_generate_idx, nout=2)(chunksize, seed, n_train, n_test) for chunksize, seed, (n_train, n_test) in zip(chunks, seeds, sizes) ] train_objs, test_objs = zip(*objs) offsets = np.hstack([0, np.cumsum(chunks)]) train_idx = da.concatenate( [ da.from_delayed(x + offset, (train_size,), np.dtype("int")) for x, chunksize, (train_size, _), offset in zip( train_objs, chunks, sizes, offsets ) ] ) test_idx = da.concatenate( [ da.from_delayed(x + offset, (test_size,), np.dtype("int")) for x, chunksize, (_, test_size), offset in zip( test_objs, chunks, sizes, offsets ) ] ) return train_idx, test_idx
Example #13
Source File: utils.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def svd_flip(u, v): u2, v2 = delayed(_svd_flip_copy, nout=2)(u, v) u = da.from_delayed(u2, shape=u.shape, dtype=u.dtype) v = da.from_delayed(v2, shape=v.shape, dtype=v.dtype) return u, v
Example #14
Source File: text.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def transform(self, raw_X): msg = "'X' should be a 1-dimensional array with length 'num_samples'." if not dask.is_dask_collection(raw_X): return self._hasher(**self.get_params()).transform(raw_X) if isinstance(raw_X, db.Bag): bag2 = raw_X.map_partitions(self._transformer) objs = bag2.to_delayed() arrs = [ da.from_delayed(obj, (np.nan, self.n_features), self.dtype) for obj in objs ] result = da.concatenate(arrs, axis=0) elif isinstance(raw_X, dd.Series): result = raw_X.map_partitions(self._transformer) elif isinstance(raw_X, da.Array): # dask.Array chunks = ((np.nan,) * raw_X.numblocks[0], (self.n_features,)) if raw_X.ndim == 1: result = raw_X.map_blocks( self._transformer, dtype="f8", chunks=chunks, new_axis=1 ) else: raise ValueError(msg) else: raise ValueError(msg) meta = scipy.sparse.eye(0, format="csr") result._meta = meta return result
Example #15
Source File: pairwise.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def pairwise_distances_argmin_min( X: ArrayLike, Y: ArrayLike, axis: int = 1, metric: Union[str, Callable[[ArrayLike, ArrayLike], float]] = "euclidean", batch_size: Optional[int] = None, metric_kwargs: Optional[Dict[str, Any]] = None, ): if batch_size is not None: msg = "'batch_size' is deprecated. Use sklearn.config_context instead.'" warnings.warn(msg, FutureWarning) XD = X.to_delayed().flatten().tolist() func = delayed(metrics.pairwise_distances_argmin_min, pure=True, nout=2) blocks = [func(x, Y, metric=metric, metric_kwargs=metric_kwargs) for x in XD] argmins, mins = zip(*blocks) argmins = [ da.from_delayed(block, (chunksize,), np.int64) for block, chunksize in zip(argmins, X.chunks[0]) ] # Scikit-learn seems to always use float64 mins = [ da.from_delayed(block, (chunksize,), "f8") for block, chunksize in zip(mins, X.chunks[0]) ] argmins = da.concatenate(argmins) mins = da.concatenate(mins) return argmins, mins
Example #16
Source File: msi_safe.py From satpy with GNU General Public License v3.0 | 5 votes |
def get_dataset(self, key, info): """Load a dataset.""" if self._channel != key.name: return logger.debug('Reading %s.', key.name) # FIXME: get this from MTD_MSIL1C.xml quantification_value = 10000. jp2 = glymur.Jp2k(self.filename) bitdepth = 0 for seg in jp2.codestream.segment: try: bitdepth = max(bitdepth, seg.bitdepth[0]) except AttributeError: pass jp2.dtype = (np.uint8 if bitdepth <= 8 else np.uint16) # Initialize the jp2 reader / doesn't work in a multi-threaded context. # jp2[0, 0] # data = da.from_array(jp2, chunks=CHUNK_SIZE) / quantification_value * 100 data = da.from_delayed(delayed(jp2.read)(), jp2.shape, jp2.dtype) data = data.rechunk(CHUNK_SIZE) / quantification_value * 100 proj = DataArray(data, dims=['y', 'x']) proj.attrs = info.copy() proj.attrs['units'] = '%' proj.attrs['platform_name'] = self.platform_name return proj
Example #17
Source File: aapp_l1b.py From satpy with GNU General Public License v3.0 | 5 votes |
def navigate(self): """Get the longitudes and latitudes of the scene.""" lons40km = self._data["pos"][:, :, 1] * 1e-4 lats40km = self._data["pos"][:, :, 0] * 1e-4 try: from geotiepoints import SatelliteInterpolator except ImportError: logger.warning("Could not interpolate lon/lats, " "python-geotiepoints missing.") self.lons, self.lats = lons40km, lats40km else: cols40km = np.arange(24, 2048, 40) cols1km = np.arange(2048) lines = lons40km.shape[0] rows40km = np.arange(lines) rows1km = np.arange(lines) along_track_order = 1 cross_track_order = 3 satint = SatelliteInterpolator( (lons40km, lats40km), (rows40km, cols40km), (rows1km, cols1km), along_track_order, cross_track_order) self.lons, self.lats = delayed(satint.interpolate, nout=2)() self.lons = da.from_delayed(self.lons, (lines, 2048), lons40km.dtype) self.lats = da.from_delayed(self.lats, (lines, 2048), lats40km.dtype)
Example #18
Source File: aapp_l1b.py From satpy with GNU General Public License v3.0 | 5 votes |
def get_angles(self, angle_id): """Get sun-satellite viewing angles.""" sunz40km = self._data["ang"][:, :, 0] * 1e-2 satz40km = self._data["ang"][:, :, 1] * 1e-2 azidiff40km = self._data["ang"][:, :, 2] * 1e-2 try: from geotiepoints.interpolator import Interpolator except ImportError: logger.warning("Could not interpolate sun-sat angles, " "python-geotiepoints missing.") self.sunz, self.satz, self.azidiff = sunz40km, satz40km, azidiff40km else: cols40km = np.arange(24, 2048, 40) cols1km = np.arange(2048) lines = sunz40km.shape[0] rows40km = np.arange(lines) rows1km = np.arange(lines) along_track_order = 1 cross_track_order = 3 satint = Interpolator( [sunz40km, satz40km, azidiff40km], (rows40km, cols40km), (rows1km, cols1km), along_track_order, cross_track_order) self.sunz, self.satz, self.azidiff = delayed(satint.interpolate, nout=3)() self.sunz = da.from_delayed(self.sunz, (lines, 2048), sunz40km.dtype) self.satz = da.from_delayed(self.satz, (lines, 2048), satz40km.dtype) self.azidiff = da.from_delayed(self.azidiff, (lines, 2048), azidiff40km.dtype) return create_xarray(getattr(self, ANGLES[angle_id]))
Example #19
Source File: __init__.py From pyresample with GNU Lesser General Public License v3.0 | 5 votes |
def parallel_gradient_search(data, src_x, src_y, dst_x, dst_y, src_gradient_xl, src_gradient_xp, src_gradient_yl, src_gradient_yp, dst_mosaic_locations, dst_slices, **kwargs): """Run gradient search in parallel in input area coordinates.""" method = kwargs.get('method', 'bilinear') # Determine the number of bands bands = np.array([arr.shape[0] for arr in data if arr is not None]) num_bands = np.max(bands) if np.any(bands != num_bands): raise ValueError("All source data chunks have to have the same number of bands") chunks = {} is_pad = False # Collect co-located target chunks for i, arr in enumerate(data): if arr is None: is_pad = True res = da.full((num_bands, dst_slices[i][1] - dst_slices[i][0], dst_slices[i][3] - dst_slices[i][2]), np.nan) else: is_pad = False res = dask.delayed(_gradient_resample_data)( arr.astype(np.float64), src_x[i], src_y[i], src_gradient_xl[i], src_gradient_xp[i], src_gradient_yl[i], src_gradient_yp[i], dst_x[i], dst_y[i], method=method) res = da.from_delayed(res, (num_bands, ) + dst_x[i].shape, dtype=np.float64) if dst_mosaic_locations[i] in chunks: if not is_pad: chunks[dst_mosaic_locations[i]].append(res) else: chunks[dst_mosaic_locations[i]] = [res, ] return _concatenate_chunks(chunks)
Example #20
Source File: _bed_read.py From pandas-plink with MIT License | 5 votes |
def read_bed(filepath, nrows, ncols): from dask.array import concatenate, from_delayed from dask.delayed import delayed chunk_size = 1024 row_start = 0 col_xs = [] while row_start < nrows: row_end = min(row_start + chunk_size, nrows) col_start = 0 row_xs = [] while col_start < ncols: col_end = min(col_start + chunk_size, ncols) x = delayed(_read_bed_chunk)( filepath, nrows, ncols, row_start, row_end, col_start, col_end ) shape = (row_end - row_start, col_end - col_start) row_xs += [from_delayed(x, shape, float64)] col_start = col_end col_xs += [concatenate(row_xs, axis=1)] row_start = row_end X = concatenate(col_xs, axis=0) return X
Example #21
Source File: fixtures.py From xhistogram with MIT License | 5 votes |
def empty_dask_array(shape, dtype=float, chunks=None): # a dask array that errors if you try to comput it def raise_if_computed(): raise ValueError('Triggered forbidden computation') a = dsa.from_delayed(dask.delayed(raise_if_computed)(), shape, dtype) if chunks is not None: a = a.rechunk(chunks) return a
Example #22
Source File: default_reader.py From aicsimageio with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _read_delayed(self) -> da.core.Array: with imageio.get_reader(self._file) as reader: # Store length as it is used a bunch image_length = reader.get_length() # Handle single image formats like png, jpeg, etc if image_length == 1: return da.from_array(self._get_data(self._file, 0)) # Handle many image formats like gif, mp4, etc elif image_length > 1: # Get a sample image sample = self._get_data(self._file, 0) # Create operating shape for the final dask array by prepending # image length to a tuple of ones that is the same length as # the sample shape operating_shape = (image_length,) + ((1,) * len(sample.shape)) # Create numpy array of empty arrays for delayed get data # functions lazy_arrays = np.ndarray(operating_shape, dtype=object) for indicies, _ in np.ndenumerate(lazy_arrays): lazy_arrays[indicies] = da.from_delayed( delayed(self._get_data)(self._file, indicies[0]), shape=sample.shape, dtype=sample.dtype, ) # Block them into a single dask array return da.block(lazy_arrays.tolist()) # Catch all other image types as unsupported # https://imageio.readthedocs.io/en/stable/userapi.html#imageio.core.format.Reader.get_length else: raise exceptions.UnsupportedFileFormatError(self._file)
Example #23
Source File: resample.py From satpy with GNU General Public License v3.0 | 4 votes |
def compute(self, data, cache_id=None, fill_value=0, weight_count=10000, weight_min=0.01, weight_distance_max=1.0, weight_delta_max=1.0, weight_sum_min=-1.0, maximum_weight_mode=False, grid_coverage=0, **kwargs): """Resample the data according to the precomputed X/Y coordinates.""" rows = self.cache["rows"] cols = self.cache["cols"] # if the data is scan based then check its metadata or the passed # kwargs otherwise assume the entire input swath is one large # "scanline" rows_per_scan = kwargs.get('rows_per_scan', data.attrs.get("rows_per_scan", data.shape[0])) if data.ndim == 3 and 'bands' in data.dims: data_in = tuple(data.sel(bands=band).data for band in data['bands']) elif data.ndim == 2: data_in = data.data else: raise ValueError("Unsupported data shape for EWA resampling.") res = dask.delayed(self._call_fornav)( cols, rows, self.target_geo_def, data_in, grid_coverage=grid_coverage, rows_per_scan=rows_per_scan, weight_count=weight_count, weight_min=weight_min, weight_distance_max=weight_distance_max, weight_delta_max=weight_delta_max, weight_sum_min=weight_sum_min, maximum_weight_mode=maximum_weight_mode) if isinstance(data_in, tuple): new_shape = (len(data_in),) + self.target_geo_def.shape else: new_shape = self.target_geo_def.shape data_arr = da.from_delayed(res, new_shape, data.dtype) # from delayed creates one large chunk, break it up a bit if we can data_arr = data_arr.rechunk([CHUNK_SIZE] * data_arr.ndim) if data.ndim == 3 and data.dims[0] == 'bands': dims = ('bands', 'y', 'x') elif data.ndim == 2: dims = ('y', 'x') else: dims = data.dims res = xr.DataArray(data_arr, dims=dims, attrs=data.attrs.copy()) return update_resampled_coords(data, res, self.target_geo_def)
Example #24
Source File: tiff_reader.py From aicsimageio with BSD 3-Clause "New" or "Revised" License | 4 votes |
def _read_delayed(self) -> da.core.Array: # Load Tiff with TiffFile(self._file) as tiff: # Check each scene has the same shape # If scene shape checking fails, use the specified scene and update # operating shape scenes = tiff.series operating_shape = scenes[0].shape if not self._scene_shape_is_consistent(tiff, S=self.specific_s_index): operating_shape = scenes[self.specific_s_index].shape scenes = [scenes[self.specific_s_index]] # Get sample yx plane sample = scenes[0].pages[0].asarray() # Combine length of scenes and operating shape # Replace YX dims with empty dimensions operating_shape = (len(scenes), *operating_shape) operating_shape = operating_shape[:-2] + (1, 1) # Make ndarray for lazy arrays to fill lazy_arrays = np.ndarray(operating_shape, dtype=object) for all_page_index, (np_index, _) in enumerate(np.ndenumerate(lazy_arrays)): # Scene index is the first index in np_index scene_index = np_index[0] # This page index is current enumeration divided by scene index + 1 # For example if the image has 10 Z slices and 5 scenes, there # would be 50 total pages this_page_index = all_page_index // (scene_index + 1) # Fill the numpy array with the delayed arrays lazy_arrays[np_index] = da.from_delayed( delayed(TiffReader._imread)( self._file, scene_index, this_page_index ), shape=sample.shape, dtype=sample.dtype, ) # Convert the numpy array of lazy readers into a dask array data = da.block(lazy_arrays.tolist()) # Only return the scene dimension if multiple scenes are present if len(scenes) == 1: data = data[0, :] return data