Python dask.array() Examples
The following are 30
code examples of dask.array().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
dask
, or try the search function
.
Example #1
Source File: catalog.py From nbodykit with GNU General Public License v3.0 | 6 votes |
def persist(self, columns=None): """ Return a CatalogSource, where the selected columns are computed and persist in memory. """ import dask.array as da if columns is None: columns = self.columns r = {} for key in columns: r[key] = self[key] r = da.compute(r)[0] # particularity of dask from nbodykit.source.catalog.array import ArrayCatalog c = ArrayCatalog(r, comm=self.comm) c.attrs.update(self.attrs) return c
Example #2
Source File: meta.py From gbdxtools with MIT License | 6 votes |
def aoi(self, **kwargs): """ Subsets the Image by the given bounds Args: bbox (list): optional. A bounding box array [minx, miny, maxx, maxy] wkt (str): optional. A WKT geometry string geojson (str): optional. A GeoJSON geometry dictionary Returns: image: an image instance of the same type """ g = self._parse_geoms(**kwargs) if g is None: return self else: return self[g]
Example #3
Source File: meta.py From gbdxtools with MIT License | 6 votes |
def __new__(cls, dm, **kwargs): if isinstance(dm, da.Array): dm = DaskMeta.from_darray(dm) elif isinstance(dm, dict): dm = DaskMeta(**dm) elif isinstance(dm, DaskMeta): pass elif dm.__class__.__name__ in ("Op", "GraphMeta", "TmsMeta", "TemplateMeta"): itr = [dm.dask, dm.name, dm.chunks, dm.dtype, dm.shape] dm = DaskMeta._make(itr) else: raise ValueError("{} must be initialized with a DaskMeta, a dask array, or a dict with DaskMeta fields".format(cls.__name__)) self = da.Array.__new__(cls, dm.dask, dm.name, dm.chunks, dtype=dm.dtype, shape=dm.shape) if "__geo_transform__" in kwargs: self.__geo_transform__ = kwargs["__geo_transform__"] if "__geo_interface__" in kwargs: self.__geo_interface__ = kwargs["__geo_interface__"] return self
Example #4
Source File: xarray_container.py From intake-xarray with BSD 2-Clause "Simplified" License | 6 votes |
def __init__(self, url, headers, **kwargs): """ Initialise local xarray, whose dask arrays contain tasks that pull data The matadata contains a key "internal", which is a result of running ``serialize_zarr_ds`` on the xarray on the server. It is a dict containing the metadata parts of the original dataset (i.e., the keys with names like ".z*"). This can be opened by xarray as-is, and will make a local xarray object. In ``._get_schema()``, the numpy parts (coordinates) are fetched and the dask-array parts (cariables) have their dask graphs redefined to tasks that fetch data from the server. """ import xarray as xr super(RemoteXarray, self).__init__(url, headers, **kwargs) self._schema = None self._ds = xr.open_zarr(self.metadata['internal'])
Example #5
Source File: data.py From psyplot with GNU General Public License v2.0 | 6 votes |
def to_slice(arr): """Test whether `arr` is an integer array that can be replaced by a slice Parameters ---------- arr: numpy.array Numpy integer array Returns ------- slice or None If `arr` could be converted to an array, this is returned, otherwise `None` is returned See Also -------- get_index_from_coord""" if isinstance(arr, slice): return arr if len(arr) == 1: return slice(arr[0], arr[0] + 1) step = np.unique(arr[1:] - arr[:-1]) if len(step) == 1: return slice(arr[0], arr[-1] + step[0], step[0])
Example #6
Source File: __init__.py From pyresample with GNU Lesser General Public License v3.0 | 6 votes |
def _filter_data(self, data, is_src=True, add_dim=False): """Filter unused chunks from the given array.""" if add_dim: if data.ndim not in [2, 3]: raise NotImplementedError('Gradient search resampling only ' 'supports 2D or 3D arrays.') if data.ndim == 2: data = data[np.newaxis, :, :] data_out = [] for i, covers in enumerate(self.coverage_status): if covers: if is_src: y_start, y_end, x_start, x_end = self.src_slices[i] else: y_start, y_end, x_start, x_end = self.dst_slices[i] try: val = data[:, y_start:y_end, x_start:x_end] except IndexError: val = data[y_start:y_end, x_start:x_end] else: val = None data_out.append(val) return data_out
Example #7
Source File: test_dask_layers.py From napari with BSD 3-Clause "New" or "Revised" License | 6 votes |
def delayed_dask_stack(): """A 4D (20, 10, 10, 10) delayed dask array, simulates disk io.""" # we will return a dict with a 'calls' variable that tracks call count output = {'calls': 0} # create a delayed version of function that simply generates np.arrays # but also counts when it has been called @dask.delayed def get_array(): nonlocal output output['calls'] += 1 return np.random.rand(10, 10, 10) # then make a mock "timelapse" of 3D stacks # see https://napari.org/tutorials/applications/dask.html for details _list = [get_array() for fn in range(20)] output['stack'] = da.stack( [da.from_delayed(i, shape=(10, 10, 10), dtype=np.float) for i in _list] ) assert output['stack'].shape == (20, 10, 10, 10) return output
Example #8
Source File: data.py From psyplot with GNU General Public License v2.0 | 6 votes |
def remove(self, arr): """Removes an array from the list Parameters ---------- arr: str or :class:`InteractiveBase` The array name or the data object in this list to remove Raises ------ ValueError If no array with the specified array name is in the list""" name = arr if isinstance(arr, six.string_types) else arr.psy.arr_name if arr not in self: raise ValueError( "Array {0} not in the list".format(name)) for i, arr in enumerate(self): if arr.psy.arr_name == name: del self[i] return raise ValueError( "No array found with name {0}".format(name))
Example #9
Source File: __init__.py From pyresample with GNU Lesser General Public License v3.0 | 6 votes |
def _concatenate_chunks(chunks): """Concatenate chunks to full output array.""" # Form the full array col, res = [], [] prev_y = 0 for y, x in sorted(chunks): if len(chunks[(y, x)]) > 1: chunk = da.nanmax(da.stack(chunks[(y, x)], axis=-1), axis=-1) else: chunk = chunks[(y, x)][0] if y == prev_y: col.append(chunk) continue res.append(da.concatenate(col, axis=1)) col = [chunk] prev_y = y res.append(da.concatenate(col, axis=1)) res = da.concatenate(res, axis=2).squeeze() return res
Example #10
Source File: data.py From psyplot with GNU General Public License v2.0 | 6 votes |
def append(self, value, new_name=False): """ Append a new array to the list Parameters ---------- value: InteractiveBase The data object to append to this list %(ArrayList.rename.parameters.new_name)s Raises ------ %(ArrayList.rename.raises)s See Also -------- list.append, extend, rename""" arr, renamed = self.rename(value, new_name) if renamed is not None: super(ArrayList, self).append(value)
Example #11
Source File: test_bucket.py From pyresample with GNU Lesser General Public License v3.0 | 6 votes |
def test_round_to_resolution(self): """Test rounding to given resolution""" # Scalar, integer resolution self.assertEqual(bucket.round_to_resolution(5.5, 2.), 6) # Scalar, non-integer resolution self.assertEqual(bucket.round_to_resolution(5.5, 1.7), 5.1) # List self.assertTrue(np.all(bucket.round_to_resolution([4.2, 5.6], 2) == np.array([4., 6.]))) # Numpy array self.assertTrue(np.all(bucket.round_to_resolution(np.array([4.2, 5.6]), 2) == np.array([4., 6.]))) # Dask array self.assertTrue( np.all(bucket.round_to_resolution(da.array([4.2, 5.6]), 2) == np.array([4., 6.])))
Example #12
Source File: data.py From psyplot with GNU General Public License v2.0 | 6 votes |
def next_available_name(self, fmt_str='arr{0}', counter=None): """Create a new array out of the given format string Parameters ---------- format_str: str The base string to use. ``'{0}'`` will be replaced by a counter counter: iterable An iterable where the numbers should be drawn from. If None, ``range(100)`` is used Returns ------- str A possible name that is not in the current project""" names = self.arr_names counter = counter or iter(range(1000)) try: new_name = next( filter(lambda n: n not in names, map(fmt_str.format, counter))) except StopIteration: raise ValueError( "{0} already in the list".format(fmt_str)) return new_name
Example #13
Source File: data.py From psyplot with GNU General Public License v2.0 | 6 votes |
def _contains_array(self, val): """Checks whether exactly this array is in the list""" arr = self(arr_name=val.psy.arr_name)[0] is_not_list = any( map(lambda a: not isinstance(a, InteractiveList), [arr, val])) is_list = any(map(lambda a: isinstance(a, InteractiveList), [arr, val])) # if one is an InteractiveList and the other not, they differ if is_list and is_not_list: return False # if both are interactive lists, check the lists if is_list: return all(a in arr for a in val) and all(a in val for a in arr) # else we check the shapes and values return arr is val
Example #14
Source File: data.py From psyplot with GNU General Public License v2.0 | 6 votes |
def __init__(self, plotter=None, arr_name='arr0', auto_update=None): """ Parameters ---------- plotter: Plotter Default: None. Interactive plotter that makes the plot via formatoption keywords. arr_name: str Default: ``'data'``. unique string of the array auto_update: bool Default: None. A boolean indicating whether this list shall automatically update the contained arrays when calling the :meth:`update` method or not. See also the :attr:`no_auto_update` attribute. If None, the value from the ``'lists.auto_update'`` key in the :attr:`psyplot.rcParams` dictionary is used.""" self.plotter = plotter self.arr_name = arr_name if auto_update is None: auto_update = rcParams['lists.auto_update'] self.no_auto_update = not bool(auto_update) self.replot = False
Example #15
Source File: catalog.py From nbodykit with GNU General Public License v3.0 | 6 votes |
def read(self, columns): """ Return the requested columns as dask arrays. Parameters ---------- columns : list of str the names of the requested columns Returns ------- list of :class:`dask.array.Array` : the list of column data, in the form of dask arrays """ missing = set(columns) - set(self.columns) if len(missing) > 0: msg = "source does not contain columns: %s; " %str(missing) msg += "try adding columns via `source[column] = data`" raise ValueError(msg) return [self[col] for col in columns]
Example #16
Source File: tree.py From uproot with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _normalize_dtype(self, interpretation, awkward): if inspect.isclass(interpretation) and issubclass(interpretation, awkward.numpy.generic): return self._normalize_dtype(awkward.numpy.dtype(interpretation), awkward) elif isinstance(interpretation, awkward.numpy.dtype): # user specified a Numpy dtype default = interpret(self, awkward) if isinstance(default, (asdtype, asjagged)): return default.to(interpretation) else: raise ValueError("cannot cast branch {0} (default interpretation {1}) as dtype {2}".format(repr(self.name), default, interpretation)) elif isinstance(interpretation, awkward.numpy.ndarray): # user specified a Numpy array default = interpret(self, awkward) if isinstance(default, asdtype): return default.toarray(interpretation) else: raise ValueError("cannot cast branch {0} (default interpretation {1}) as dtype {2}".format(repr(self.name), default, interpretation)) elif not isinstance(interpretation, uproot.interp.interp.Interpretation): raise TypeError("branch interpretation must be an Interpretation, not {0} (type {1})".format(interpretation, type(interpretation))) else: return interpretation
Example #17
Source File: data.py From psyplot with GNU General Public License v2.0 | 6 votes |
def _insert_fldmean_bounds(self, da, keepdims=False): xcoord = self.get_coord('x') ycoord = self.get_coord('y') sdims = (self.get_dim('y'), self.get_dim('x')) xbounds = np.array([[xcoord.min(), xcoord.max()]]) ybounds = np.array([[ycoord.min(), ycoord.max()]]) xdims = (sdims[-1], 'bnds') if keepdims else ('bnds', ) ydims = (sdims[0], 'bnds') if keepdims else ('bnds', ) xattrs = xcoord.attrs.copy() xattrs.pop('bounds', None) yattrs = ycoord.attrs.copy() yattrs.pop('bounds', None) da.psy.base.coords[xcoord.name + '_bnds'] = xr.Variable( xdims, xbounds if keepdims else xbounds[0], attrs=xattrs) da.psy.base.coords[ycoord.name + '_bnds'] = xr.Variable( ydims, ybounds if keepdims else ybounds[0], attrs=yattrs)
Example #18
Source File: test_bucket.py From pyresample with GNU Lesser General Public License v3.0 | 5 votes |
def test_get_sum(self): """Test drop-in-a-bucket sum.""" data = da.from_array(np.array([[2., 2.], [2., 2.]]), chunks=self.chunks) with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_sum(data) result = result.compute() # One bin with two hits, so max value is 2.0 self.assertTrue(np.max(result) == 4.) # Two bins with the same value self.assertEqual(np.sum(result == 2.), 2) # One bin with double the value self.assertEqual(np.sum(result == 4.), 1) self.assertEqual(result.shape, self.adef.shape) # Test that also Xarray.DataArrays work data = xr.DataArray(data) with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_sum(data) # One bin with two hits, so max value is 2.0 self.assertTrue(np.max(result) == 4.) # Two bins with the same value self.assertEqual(np.sum(result == 2.), 2) # One bin with double the value self.assertEqual(np.sum(result == 4.), 1) self.assertEqual(result.shape, self.adef.shape) # Test masking all-NaN bins data = da.from_array(np.array([[np.nan, np.nan], [np.nan, np.nan]]), chunks=self.chunks) with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_sum(data, mask_all_nan=True) self.assertTrue(np.all(np.isnan(result))) # By default all-NaN bins have a value of 0.0 with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_sum(data) self.assertEqual(np.nanmax(result), 0.0)
Example #19
Source File: tree.py From uproot with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _basket(self, i, interpretation, local_entrystart, local_entrystop, awkward, basketcache, keycache): basketdata = None if basketcache is not None: basketcachekey = self._basketcachekey(i) basketdata = basketcache.get(basketcachekey, None) key = self._threadsafe_key(i, keycache, True) if basketdata is None: basketdata = key.basketdata() if basketcache is not None: basketcache[basketcachekey] = basketdata if key._fObjlen == key.border: data, byteoffsets = basketdata, None if self._countbranch is not None and awkward.numpy.uint8(self._tree_iofeatures) & awkward.numpy.uint8(uproot.const.kGenerateOffsetMap) != 0: counts = self._countbranch.array(entrystart=(local_entrystart + self.basket_entrystart(i)), entrystop=(local_entrystop + self.basket_entrystart(i))) itemsize = 1 if isinstance(interpretation, asjagged): itemsize = interpretation.content.fromdtype.itemsize awkward.numpy.multiply(counts, itemsize, counts) byteoffsets = awkward.numpy.empty(len(counts) + 1, dtype=awkward.numpy.int32) byteoffsets[0] = 0 awkward.numpy.cumsum(counts, out=byteoffsets[1:]) else: data = basketdata[:key.border] byteoffsets = awkward.numpy.empty((key._fObjlen - key.border - 4) // 4, dtype=awkward.numpy.int32) # native endian byteoffsets[:-1] = basketdata[key.border + 4 : -4].view(">i4") # read as big-endian and convert byteoffsets[-1] = key._fLast awkward.numpy.subtract(byteoffsets, key._fKeylen, byteoffsets) return interpretation.fromroot(data, byteoffsets, local_entrystart, local_entrystop, key._fKeylen)
Example #20
Source File: test_bucket.py From pyresample with GNU Lesser General Public License v3.0 | 5 votes |
def test_resample_bucket_fractions(self): """Test fraction calculations for categorical data.""" data = da.from_array(np.array([[2, 4], [2, 2]]), chunks=self.chunks) categories = [1, 2, 3, 4] with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_fractions(data, categories=categories) self.assertEqual(set(categories), set(result.keys())) res = result[1].compute() self.assertTrue(np.nanmax(res) == 0.) res = result[2].compute() self.assertTrue(np.nanmax(res) == 1.) self.assertTrue(np.nanmin(res) == 0.5) res = result[3].compute() self.assertTrue(np.nanmax(res) == 0.) res = result[4].compute() self.assertTrue(np.nanmax(res) == 0.5) self.assertTrue(np.nanmin(res) == 0.) # There should be NaN values self.assertTrue(np.any(np.isnan(res))) # Use a fill value with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_fractions(data, categories=categories, fill_value=-1) # There should not be any NaN values for i in categories: res = result[i].compute() self.assertFalse(np.any(np.isnan(res))) self.assertTrue(np.min(res) == -1) # No categories given, need to compute the data once to get # the categories with dask.config.set(scheduler=CustomScheduler(max_computes=1)): result = self.resampler.get_fractions(data, categories=None)
Example #21
Source File: tree.py From uproot with BSD 3-Clause "New" or "Revised" License | 5 votes |
def iterate(path, treepath, branches=None, entrysteps=float("inf"), outputtype=dict, namedecode=None, reportpath=False, reportfile=False, reportentries=False, flatten=False, flatname=None, awkwardlib=None, cache=None, basketcache=None, keycache=None, executor=None, blocking=True, localsource=MemmapSource.defaults, xrootdsource=XRootDSource.defaults, httpsource=HTTPSource.defaults, **options): awkward = _normalize_awkwardlib(awkwardlib) for tree, branchesinterp, globalentrystart, thispath, thisfile in _iterate(path, treepath, branches, awkward, localsource, xrootdsource, httpsource, **options): for start, stop, arrays in tree.iterate(branches=branchesinterp, entrysteps=entrysteps, outputtype=outputtype, namedecode=namedecode, reportentries=True, entrystart=0, entrystop=tree.numentries, flatten=flatten, flatname=flatname, awkwardlib=awkward, cache=cache, basketcache=basketcache, keycache=keycache, executor=executor, blocking=blocking): if getattr(outputtype, "__name__", None) == "DataFrame" and getattr(outputtype, "__module__", None) == "pandas.core.frame": if type(arrays.index).__name__ == "MultiIndex": if hasattr(arrays.index.levels[0], "array"): index = arrays.index.levels[0].array # pandas>=0.24.0 else: index = arrays.index.levels[0].values # pandas<0.24.0 awkward.numpy.add(index, globalentrystart, out=index) elif type(arrays.index).__name__ == "RangeIndex": if hasattr(arrays.index, "start") and hasattr(arrays.index, "stop"): indexstart = arrays.index.start # pandas>=0.25.0 indexstop = arrays.index.stop else: indexstart = arrays.index._start # pandas<0.25.0 indexstop = arrays.index._stop arrays.index = type(arrays.index)(indexstart + globalentrystart, indexstop + globalentrystart) else: if hasattr(arrays.index, "array"): index = arrays.index.array # pandas>=0.24.0 else: index = arrays.index.values # pandas<0.24.0 awkward.numpy.add(index, globalentrystart, out=index) out = (arrays,) if reportentries: out = (globalentrystart + start, globalentrystart + stop) + out if reportfile: out = (thisfile,) + out if reportpath: out = (thispath,) + out if len(out) == 1: yield out[0] else: yield out
Example #22
Source File: tree.py From uproot with BSD 3-Clause "New" or "Revised" License | 5 votes |
def array(self, branch, interpretation=None, entrystart=None, entrystop=None, flatten=False, awkwardlib=None, cache=None, basketcache=None, keycache=None, executor=None, blocking=True): awkward = _normalize_awkwardlib(awkwardlib) branches = list(self._normalize_branches(branch, awkward)) if len(branches) == 1: if interpretation is None: tbranch, interpretation = branches[0] else: tbranch, _ = branches[0] else: raise ValueError("list of branch names or glob/regex matches more than one branch; use TTree.arrays (plural)") return tbranch.array(interpretation=interpretation, entrystart=entrystart, entrystop=entrystop, flatten=flatten, awkwardlib=awkwardlib, cache=cache, basketcache=basketcache, keycache=keycache, executor=executor, blocking=blocking)
Example #23
Source File: xarray_container.py From intake-xarray with BSD 2-Clause "Simplified" License | 5 votes |
def _get_partition(self, i): """ The partition should look like ("var_name", int, int...), where the number of ints matches the number of coordinate axes in the named variable, and is between 0 and the number of chunks in each axis. For an array, as opposed to a dataset, omit the variable name. """ return get_partition(self.url, self.headers, self._source_id, self.container, i)
Example #24
Source File: data.py From psyplot with GNU General Public License v2.0 | 5 votes |
def __getitem__(self, key): return decode_absolute_time(self.array[key])
Example #25
Source File: data.py From psyplot with GNU General Public License v2.0 | 5 votes |
def copy(self, deep=False): """Copy the array This method returns a copy of the underlying array in the :attr:`arr` attribute. It is more stable because it creates a new `psy` accessor""" ds = self.ds.copy(deep) ds.psy = DatasetAccessor(ds) return ds
Example #26
Source File: data.py From psyplot with GNU General Public License v2.0 | 5 votes |
def is_unstructured(self): """A boolean for each array whether it is unstructured or not""" return [ arr.psy.decoder.is_unstructured(arr) if not isinstance(arr, ArrayList) else arr.is_unstructured for arr in self]
Example #27
Source File: data.py From psyplot with GNU General Public License v2.0 | 5 votes |
def arr_names(self, value): value = list(islice(value, 0, len(self))) if not len(set(value)) == len(self): raise ValueError( "Got %i unique array names for %i data objects!" % ( len(set(value)), len(self))) for arr, n in zip(self, value): arr.psy.arr_name = n
Example #28
Source File: data.py From psyplot with GNU General Public License v2.0 | 5 votes |
def arr_names(self): """Names of the arrays (!not of the variables!) in this list This attribute can be set with an iterable of unique names to change the array names of the data objects in this list.""" return list(arr.psy.arr_name for arr in self)
Example #29
Source File: data.py From psyplot with GNU General Public License v2.0 | 5 votes |
def _fldaverage_args(self): """Masked array, xname, yname and axis for calculating the average""" arr = self.arr sdims = (self.get_dim('y'), self.get_dim('x')) if sdims[0] == sdims[1]: sdims = sdims[:1] axis = tuple(map(arr.dims.index, sdims)) return arr, sdims, axis
Example #30
Source File: data.py From psyplot with GNU General Public License v2.0 | 5 votes |
def gridweights(self, keepdims=False, keepshape=False, use_cdo=None): """Calculate the cell weights for each grid cell Parameters ---------- keepdims: bool If True, keep the number of dimensions keepshape: bool If True, keep the exact shape as the source array and the missing values in the array are masked use_cdo: bool or None If True, use Climate Data Operators (CDOs) to calculate the weights. Note that this is used automatically for unstructured grids. If None, it depends on the ``'gridweights.use_cdo'`` item in the :attr:`psyplot.rcParams`. Returns ------- xarray.DataArray The 2D-DataArray with the grid weights""" if use_cdo is None: use_cdo = rcParams['gridweights.use_cdo'] if not use_cdo and self.decoder.is_unstructured(self.arr): use_cdo = True if use_cdo is None or use_cdo: try: weights = self._gridweights_cdo() except Exception: if use_cdo: raise else: weights = self._gridweights() else: weights = self._gridweights() return self._weights_to_da(weights, keepdims=keepdims, keepshape=keepshape)