Python dask.array.Array() Examples
The following are 30
code examples of dask.array.Array().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
, or try the search function
Example #1
Source File: From satpy with GNU General Public License v3.0 | 6 votes |
def interpolate_xarray(xpoints, ypoints, values, shape, kind='cubic', blocksize=CHUNK_SIZE): """Interpolate, generating a dask array.""" vchunks = range(0, shape[0], blocksize) hchunks = range(0, shape[1], blocksize) token = tokenize(blocksize, xpoints, ypoints, values, kind, shape) name = 'interpolate-' + token from scipy.interpolate import interp2d interpolator = interp2d(xpoints, ypoints, values, kind=kind) dskx = {(name, i, j): (interpolate_slice, slice(vcs, min(vcs + blocksize, shape[0])), slice(hcs, min(hcs + blocksize, shape[1])), interpolator) for i, vcs in enumerate(vchunks) for j, hcs in enumerate(hchunks) } res = da.Array(dskx, name, shape=list(shape), chunks=(blocksize, blocksize), dtype=values.dtype) return DataArray(res, dims=('y', 'x'))
Example #2
Source File: From xgcm with MIT License | 6 votes |
def _dask_or_eager_func(name, eager_module=np, list_of_args=False, n_array_args=1): """Create a function that dispatches to dask for dask array inputs.""" if has_dask: def f(*args, **kwargs): dispatch_args = args[0] if list_of_args else args if any(isinstance(a, dsa.Array) for a in dispatch_args[:n_array_args]): module = dsa else: module = eager_module return getattr(module, name)(*args, **kwargs) else: def f(data, *args, **kwargs): return getattr(eager_module, name)(data, *args, **kwargs) return f
Example #3
Source File: From nbodykit with GNU General Public License v3.0 | 6 votes |
def StackColumns(*cols): """ Stack the input dask arrays vertically, column by column. This uses :func:`dask.array.vstack`. Parameters ---------- *cols : :class:`dask.array.Array` the dask arrays to stack vertically together Returns ------- :class:`dask.array.Array` : the dask array where columns correspond to the input arrays Raises ------ TypeError If the input columns are not dask arrays """ cols = da.broadcast_arrays(*cols) return da.vstack(cols).T
Example #4
Source File: From xhistogram with MIT License | 6 votes |
def _dask_or_eager_func(name, eager_module=np, list_of_args=False, n_array_args=1): """Create a function that dispatches to dask for dask array inputs.""" if has_dask: def f(*args, **kwargs): dispatch_args = args[0] if list_of_args else args if any(isinstance(a, dsa.Array) for a in dispatch_args[:n_array_args]): module = dsa else: module = eager_module return getattr(module, name)(*args, **kwargs) else: def f(data, *args, **kwargs): return getattr(eager_module, name)(data, *args, **kwargs) return f
Example #5
Source File: From gbdxtools with MIT License | 6 votes |
def __new__(cls, dm, **kwargs): if isinstance(dm, da.Array): dm = DaskMeta.from_darray(dm) elif isinstance(dm, dict): dm = DaskMeta(**dm) elif isinstance(dm, DaskMeta): pass elif dm.__class__.__name__ in ("Op", "GraphMeta", "TmsMeta", "TemplateMeta"): itr = [dm.dask,, dm.chunks, dm.dtype, dm.shape] dm = DaskMeta._make(itr) else: raise ValueError("{} must be initialized with a DaskMeta, a dask array, or a dict with DaskMeta fields".format(cls.__name__)) self = da.Array.__new__(cls, dm.dask,, dm.chunks, dtype=dm.dtype, shape=dm.shape) if "__geo_transform__" in kwargs: self.__geo_transform__ = kwargs["__geo_transform__"] if "__geo_interface__" in kwargs: self.__geo_interface__ = kwargs["__geo_interface__"] return self
Example #6
Source File: From gbdxtools with MIT License | 6 votes |
def _build_image_layer(self, image, image_bounds, cmap='viridis'): if image is not None: if isinstance(image, da.Array): if len(image.shape) == 2 or \ (image.shape[0] == 1 and len(image.shape) == 3): arr = image.compute() else: arr = image.rgb() coords = box(*image.bounds) else: assert image_bounds is not None, "Must pass image_bounds with ndarray images" arr = image coords = box(*image_bounds) b64 = self._encode_image(arr, cmap) return ImageLayer(b64, self._polygon_coords(coords)) else: return 'false';
Example #7
Source File: From psyplot with GNU General Public License v2.0 | 6 votes |
def _load_GeoTransform(self): """Calculate latitude and longitude variable calculated from the gdal.Open.GetGeoTransform method""" def load_lon(): return arange(ds.RasterXSize)*b[1]+b[0] def load_lat(): return arange(ds.RasterYSize)*b[5]+b[3] ds = self.ds b = self.ds.GetGeoTransform() # bbox, interval if with_dask: lat = Array( {('lat', 0): (load_lat,)}, 'lat', (self.ds.RasterYSize,), shape=(self.ds.RasterYSize,), dtype=float) lon = Array( {('lon', 0): (load_lon,)}, 'lon', (self.ds.RasterXSize,), shape=(self.ds.RasterXSize,), dtype=float) else: lat = load_lat() lon = load_lon() return Variable(('lat',), lat), Variable(('lon',), lon)
Example #8
Source File: From pyresample with GNU Lesser General Public License v3.0 | 6 votes |
def test_nearest_swath_1d_mask_to_grid_1n(self): """Test 1D swath definition to 2D grid definition; 1 neighbor.""" from pyresample.kd_tree import XArrayResamplerNN import xarray as xr import dask.array as da resampler = XArrayResamplerNN(self.tswath_1d, self.tgrid, radius_of_influence=100000, neighbours=1) data = self.tdata_1d ninfo = resampler.get_neighbour_info(mask=data.isnull()) for val in ninfo[:3]: # vii, ia, voi self.assertIsInstance(val, da.Array) res = resampler.get_sample_from_neighbour_info(data) self.assertIsInstance(res, xr.DataArray) self.assertIsInstance(, da.Array) actual = res.values expected = np.array([ [1., 2., 2.], [1., 2., 2.], [1., np.nan, 2.], [1., 2., 2.], ]) np.testing.assert_allclose(actual, expected)
Example #9
Source File: From pyresample with GNU Lesser General Public License v3.0 | 6 votes |
def test_nearest_swath_2d_mask_to_area_1n(self): """Test 2D swath definition to 2D area definition; 1 neighbor.""" from pyresample.kd_tree import XArrayResamplerNN import xarray as xr import dask.array as da swath_def = self.swath_def_2d data = self.data_2d resampler = XArrayResamplerNN(swath_def, self.area_def, radius_of_influence=50000, neighbours=1) ninfo = resampler.get_neighbour_info(mask=data.isnull()) for val in ninfo[:3]: # vii, ia, voi self.assertIsInstance(val, da.Array) res = resampler.get_sample_from_neighbour_info(data) self.assertIsInstance(res, xr.DataArray) self.assertIsInstance(, da.Array) res = res.values cross_sum = np.nansum(res) expected = 15874591.0 self.assertEqual(cross_sum, expected)
Example #10
Source File: From pyresample with GNU Lesser General Public License v3.0 | 6 votes |
def test_nearest_swath_1d_mask_to_grid_8n(self): """Test 1D swath definition to 2D grid definition; 8 neighbors.""" from pyresample.kd_tree import XArrayResamplerNN import xarray as xr import dask.array as da resampler = XArrayResamplerNN(self.tswath_1d, self.tgrid, radius_of_influence=100000, neighbours=8) data = self.tdata_1d ninfo = resampler.get_neighbour_info(mask=data.isnull()) for val in ninfo[:3]: # vii, ia, voi self.assertIsInstance(val, da.Array) res = resampler.get_sample_from_neighbour_info(data) self.assertIsInstance(res, xr.DataArray) self.assertIsInstance(, da.Array) # actual = res.values # expected = TODO # np.testing.assert_allclose(actual, expected)
Example #11
Source File: From xmitgcm with MIT License | 6 votes |
def _dask_or_eager_func(name, eager_module=np, list_of_args=False, n_array_args=1): """Create a function that dispatches to dask for dask array inputs.""" if has_dask: def f(*args, **kwargs): dispatch_args = args[0] if list_of_args else args if any(isinstance(a, dsa.Array) for a in dispatch_args[:n_array_args]): module = dsa else: module = eager_module return getattr(module, name)(*args, **kwargs) else: def f(data, *args, **kwargs): return getattr(eager_module, name)(data, *args, **kwargs) return f
Example #12
Source File: From xmitgcm with MIT License | 6 votes |
def _dask_array_vgrid(self, varname, klevels, k_chunksize): # return a dask array for a 1D vertical grid var # single chunk for 1D variables chunks = ((len(klevels),),) # manually build dask graph dsk = {} token = tokenize(varname, name = '-'.join([varname, token]) nz = if _VAR_METADATA[varname]['dims'] != ['k_p1'] else task = (_get_1d_chunk,, varname, list(klevels), nz, self.dtype) key = name, 0 dsk[key] = task return dsa.Array(dsk, name, chunks, self.dtype)
Example #13
Source File: From cooltools with MIT License | 6 votes |
def compute_scaling(df, region1, region2=None, dmin=int(1e1), dmax=int(1e7), n_bins=50): import dask.array as da if region2 is None: region2 = region1 distbins = numutils.logbins(dmin, dmax, N=n_bins) areas = contact_areas(distbins, region1, region2) df = df[ (df["pos1"] >= region1[0]) & (df["pos1"] < region1[1]) & (df["pos2"] >= region2[0]) & (df["pos2"] < region2[1]) ] dists = (df["pos2"] - df["pos1"]).values if isinstance(dists, da.Array): obs, _ = da.histogram(dists[(dists >= dmin) & (dists < dmax)], bins=distbins) else: obs, _ = np.histogram(dists[(dists >= dmin) & (dists < dmax)], bins=distbins) return distbins, obs, areas
Example #14
Source File: From scanpy with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_write_zarr(self, adata, adata_dist): import dask.array as da import zarr log1p(adata_dist) temp_store = zarr.TempStore() chunks = adata_dist.X.chunks if isinstance(chunks[0], tuple): chunks = (chunks[0][0],) + chunks[1] # write metadata using regular anndata adata.write_zarr(temp_store, chunks) if isinstance(adata_dist.X, da.Array): adata_dist.X.to_zarr(temp_store.dir_path("X"), overwrite=True) else: adata_dist.X.to_zarr(temp_store.dir_path("X"), chunks) # read back as zarr directly and check it is the same as adata.X adata_log1p = ad.read_zarr(temp_store) log1p(adata) npt.assert_allclose(adata_log1p.X, adata.X)
Example #15
Source File: From xarrayutils with MIT License | 5 votes |
def wrap_func(grid, data, dim, wrap, func="diff", idx=0): """interpolates data over discontuity (e.g. longitude values) TODO ---- Write tests that runs np and dask arrays through this """ if isinstance(, da.Array): data.load() redask = 1 else: redask = 0 if func == "diff": out = grid.diff(data, dim) elif func == "interp": out = grid.interp(data, dim) # when interpolating the discontinuty gets halved wrap = -wrap / 2 else: raise RuntimeError("`func` argument not recognized") target_dim = [ a for a in out.dims if a in xgcm.comodo.get_axis_coords(grid._ds, dim) ] if len(target_dim) == 1: target_dim = target_dim[0] else: raise RuntimeError("more then one target dim found") # TODO the idx should be determined by a combo of the c grid shift and func out[{target_dim: idx}] = out[{target_dim: idx}] + wrap if redask: = da.from_array(, return out
Example #16
Source File: From dask-lightgbm with BSD 3-Clause "New" or "Revised" License | 5 votes |
def predict(client, model, data, proba=False, dtype=np.float32, **kwargs): if isinstance(data, dd._Frame): return data.map_partitions(_predict_part, model=model, proba=proba, **kwargs).values elif isinstance(data, da.Array): if proba: kwargs['chunks'] = (data.chunks[0], (model.n_classes_,)) else: kwargs['drop_axis'] = 1 return data.map_blocks(_predict_part, model=model, proba=proba, dtype=dtype, **kwargs) else: raise TypeError(f'Data must be either Dask array or dataframe. Got {type(data)}.')
Example #17
Source File: From holoviews with BSD 3-Clause "New" or "Revised" License | 5 votes |
def is_dask_array(data): da = None if 'dask.array' in sys.modules: import dask.array as da return (da is not None and isinstance(data, da.Array))
Example #18
Source File: From holoviews with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_select_lazy(self): import dask.array as da arr = da.from_array(np.arange(1, 12), 3) ds = Dataset({'x': range(11), 'y': arr}, 'x', 'y') self.assertIsInstance(, 5)).data['y'], da.Array)
Example #19
Source File: From holoviews with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_dataset_weighted_histogram_dask(self): import dask.array as da ds = Dataset((da.from_array(np.array(range(10), dtype='f'), chunks=3), da.from_array([i/10. for i in range(10)], chunks=3)), ['x', 'y'], datatype=['dask']) op_hist = histogram(ds, weight_dimension='y', num_bins=3) hist = Histogram(([0, 3, 6, 9], [0.022222, 0.088889, 0.222222]), vdims='y') self.assertIsInstance(['y'], da.Array) self.assertEqual(op_hist, hist)
Example #20
Source File: From holoviews with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_dataset_cumulative_histogram_dask(self): import dask.array as da ds = Dataset((da.from_array(np.array(range(10), dtype='f'), chunks=(3)),), ['x'], datatype=['dask']) op_hist = histogram(ds, num_bins=3, cumulative=True) hist = Histogram(([0, 3, 6, 9], [0.3, 0.6, 1]), vdims=('x_frequency', 'Frequency')) self.assertIsInstance(['x_frequency'], da.Array) self.assertEqual(op_hist, hist)
Example #21
Source File: From holoviews with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_dataset_histogram_dask(self): import dask.array as da ds = Dataset((da.from_array(np.array(range(10), dtype='f'), chunks=(3)),), ['x'], datatype=['dask']) op_hist = histogram(ds, num_bins=3) hist = Histogram(([0, 3, 6, 9], [0.1, 0.1, 0.133333]), vdims=('x_frequency', 'Frequency')) self.assertIsInstance(['x_frequency'], da.Array) self.assertEqual(op_hist, hist)
Example #22
Source File: From intake-xarray with BSD 2-Clause "Simplified" License | 5 votes |
def test_rasterio_glob(): import dask.array as da pytest.importorskip('rasterio') cat = intake.open_catalog(os.path.join(here, 'data', 'catalog.yaml')) s = cat.tiff_glob_source info = assert info['shape'] == (1, 3, 718, 791) x = s.to_dask() assert isinstance(, da.Array) x = assert == (1, 3, 718, 791)
Example #23
Source File: From intake-xarray with BSD 2-Clause "Simplified" License | 5 votes |
def test_rasterio(): import dask.array as da pytest.importorskip('rasterio') cat = intake.open_catalog(os.path.join(here, 'data', 'catalog.yaml')) s = cat.tiff_source info = assert info['shape'] == (3, 718, 791) x = s.to_dask() assert isinstance(, da.Array) x = assert == (3, 718, 791)
Example #24
Source File: From intake-xarray with BSD 2-Clause "Simplified" License | 5 votes |
def test_grib_dask(): pytest.importorskip('Nio') import dask.array as da cat = intake.open_catalog(os.path.join(here, 'data', 'catalog.yaml')) x = cat.grib.to_dask() assert len(x.fileno) == 2 assert isinstance(, da.Array) values = x2 = assert (values == x2.APCP_P8_L1_GLL0_acc6h.values).all()
Example #25
Source File: From xclim with Apache License 2.0 | 5 votes |
def normalize( x: xr.DataArray, *, group: Union[str, Grouper] = "time", kind: str = ADDITIVE, norm: Optional[xr.DataArray] = None, ): """Normalize an array by removing its mean. Normalization if performed group-wise. Parameters ---------- x : xr.DataArray Array to be normalized. group : Union[str, Grouper] Grouping information. See :py:class:`xclim.sdba.base.Grouper` for details. kind : {'+', '*'} How to apply the adjustment, either additively or multiplicatively. norm : xr.DataArray If the norm was already computed (for example with `group.apply("mean", x)`), skip the computation step. The array should have the same dimensions as `x` except for "time" that should be replaced by `group.prop`. Returns ------- xr.DataArray or xr.Dataset Group-wise anomaly of x """ def _normalize_group(grp, dim=["time"]): return apply_correction(grp, invert(grp.mean(dim=dim), kind), kind) if norm is None: return group.apply(_normalize_group, x) return apply_correction( x, broadcast(invert(norm, kind), x, group=group, interp="nearest"), kind, )
Example #26
Source File: From xclim with Apache License 2.0 | 5 votes |
def rle_1d( arr: Union[int, float, bool, Sequence[Union[int, float, bool]]] ) -> Tuple[np.array, np.array, np.array]: """Return the length, starting position and value of consecutive identical values. Parameters ---------- arr : Sequence[Union[int, float, bool]] Array of values to be parsed. Returns ------- values : np.array The values taken by arr over each run run lengths : np.array The length of each run start position : np.array The starting index of each run Examples -------- >>> a = [1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3] >>> rle_1d(a) (array([1, 2, 3]), array([2, 4, 6]), array([0, 2, 6])) """ ia = np.asarray(arr) n = len(ia) if n == 0: e = "run length array empty" warn(e) # Returning None makes some other 1d func below fail. return np.array(np.nan), 0, np.array(np.nan) y = np.array(ia[1:] != ia[:-1]) # pairwise unequal (string safe) i = np.append(np.where(y), n - 1) # must include last element position rl = np.diff(np.append(-1, i)) # run lengths pos = np.cumsum(np.append(0, rl))[:-1] # positions return ia[i], rl, pos
Example #27
Source File: From xcube with MIT License | 5 votes |
def _compute_ij_images_xarray_dask_block(dtype: np.dtype, block_id: int, block_shape: Tuple[int, int], block_slices: Tuple[Tuple[int, int], Tuple[int, int], Tuple[int, int]], src_x: xr.DataArray, src_y: xr.DataArray, src_ij_bboxes: np.ndarray, dst_x_min: float, dst_y_min: float, dst_xy_res: float, uv_delta: float) -> np.ndarray: """Compute dask.array.Array destination block with source pixel i,j coords from xarray.DataArray x,y sources """ dst_src_ij_block = np.full(block_shape, np.nan, dtype=dtype) _, (dst_y_slice_start, _), (dst_x_slice_start, _) = block_slices src_ij_bbox = src_ij_bboxes[block_id] src_i_min, src_j_min, src_i_max, src_j_max = src_ij_bbox if src_i_min == -1: return dst_src_ij_block src_x_values = src_x[src_j_min:src_j_max + 1, src_i_min:src_i_max + 1].values src_y_values = src_y[src_j_min:src_j_max + 1, src_i_min:src_i_max + 1].values _compute_ij_images_numpy_sequential(src_x_values, src_y_values, src_i_min, src_j_min, dst_src_ij_block, dst_x_min + dst_x_slice_start * dst_xy_res, dst_y_min + dst_y_slice_start * dst_xy_res, dst_xy_res, uv_delta) return dst_src_ij_block
Example #28
Source File: From xcube with MIT License | 5 votes |
def _compute_var_image_xarray_dask(src_var: xr.DataArray, dst_src_ij_images: np.ndarray, fill_value: Union[int, float, complex] = np.nan) -> da.Array: """Extract source pixels from xarray.DataArray source with dask.array.Array data""" return da.map_blocks(_compute_var_image_xarray_dask_block, src_var.values, dst_src_ij_images, fill_value, dtype=src_var.dtype, drop_axis=0)
Example #29
Source File: From psyplot with GNU General Public License v2.0 | 5 votes |
def get_variables(self): def load(band): band = ds.GetRasterBand(band) a = band.ReadAsArray() no_data = band.GetNoDataValue() if no_data is not None: try: a[a == no_data] = a.dtype.type(nan) except ValueError: pass return a ds = self.ds dims = ['lat', 'lon'] chunks = ((ds.RasterYSize,), (ds.RasterXSize,)) shape = (ds.RasterYSize, ds.RasterXSize) variables = OrderedDict() for iband in range(1, ds.RasterCount+1): band = ds.GetRasterBand(iband) dt = dtype([band.DataType]) if with_dask: dsk = {('x', 0, 0): (load, iband)} arr = Array(dsk, 'x', chunks, shape=shape, dtype=dt) else: arr = load(iband) attrs = band.GetMetadata_Dict() try: dt.type(nan) attrs['_FillValue'] = nan except ValueError: no_data = band.GetNoDataValue() attrs.update({'_FillValue': no_data} if no_data else {}) variables['Band%i' % iband] = Variable(dims, arr, attrs) variables['lat'], variables['lon'] = self._load_GeoTransform() return FrozenOrderedDict(variables)
Example #30
Source File: From holoviews with BSD 3-Clause "New" or "Revised" License | 5 votes |
def is_dask(array): da = dask_array_module() if da is None: return False return da and isinstance(array, da.Array)