Python dask.delayed() Examples
The following are 30
code examples of dask.delayed().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
dask
, or try the search function
.
Example #1
Source File: utils.py From verde with BSD 3-Clause "New" or "Revised" License | 7 votes |
def dispatch(function, delayed=False, client=None): """ Decide how to wrap a function for Dask depending on the options given. Parameters ---------- function : callable The function that will be called. delayed : bool If True, will wrap the function in :func:`dask.delayed`. client : None or dask.distributed Client If *delayed* is False and *client* is not None, will return a partial execution of the ``client.submit`` with the function as first argument. Returns ------- function : callable The function wrapped in Dask. """ if delayed: return dask.delayed(function) if client is not None: return functools.partial(client.submit, function) return function
Example #2
Source File: test_model_selection_sklearn.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_hyperparameter_searcher_with_fit_params(cls, kwargs): X = np.arange(100).reshape(10, 10) y = np.array([0] * 5 + [1] * 5) clf = CheckingClassifier(expected_fit_params=["spam", "eggs"]) pipe = Pipeline([("clf", clf)]) searcher = cls(pipe, {"clf__foo_param": [1, 2, 3]}, cv=2, **kwargs) # The CheckingClassifer generates an assertion error if # a parameter is missing or has length != len(X). with pytest.raises(AssertionError) as exc: searcher.fit(X, y, clf__spam=np.ones(10)) assert "Expected fit parameter(s) ['eggs'] not seen." in str(exc.value) searcher.fit(X, y, clf__spam=np.ones(10), clf__eggs=np.zeros(10)) # Test with dask objects as parameters searcher.fit( X, y, clf__spam=da.ones(10, chunks=2), clf__eggs=dask.delayed(np.zeros(10)) )
Example #3
Source File: views.py From AutoOut with MIT License | 6 votes |
def detect_outliers(request): """ Detect outliers end point """ dataset_id = int(request.GET.get("dataset_id")) if dataset_id is None: return JsonResponse({"status": "failure", "message": "Dataset id is not provided"}) dataset = Dataset.objects.get(pk=dataset_id) file_path = dataset.path delete_features = json.loads(dataset.deleted_features) # Create a detection experiment and start outlier detection process = Process.objects.get(name='Detection') process_status = ProcessStatus.objects.get(name='Running') experiment = Experiment(dataset=dataset, process=process, process_status=process_status) experiment.save() results = delayed(detect_all)(os.path.join(settings.MEDIA_ROOT, file_path), experiment.id, settings.RESULTS_ROOT, delete_features) dask.compute(results) return JsonResponse( {'status': 'success', 'message': 'Detection started successfully', 'experiment_id': experiment.id})
Example #4
Source File: cache.py From intake with BSD 2-Clause "Simplified" License | 6 votes |
def _load(self, files_in, files_out, urlpath, meta=True): """Download a set of files""" import dask out = [] outnames = [] for file_in, file_out in zip(files_in, files_out): cache_path = file_out.path outnames.append(cache_path) # If `_munge_path` did not find a match we want to avoid # writing to the urlpath. if cache_path == urlpath: continue if not os.path.isfile(cache_path): logger.debug("Caching file: {}".format(file_in.path)) logger.debug("Original path: {}".format(urlpath)) logger.debug("Cached at: {}".format(cache_path)) if meta: self._log_metadata(urlpath, file_in.path, cache_path) ddown = dask.delayed(_download) out.append(ddown(file_in, file_out, self.blocksize, self.output)) dask.compute(*out) return outnames
Example #5
Source File: test_dask_layers.py From napari with BSD 3-Clause "New" or "Revised" License | 6 votes |
def delayed_dask_stack(): """A 4D (20, 10, 10, 10) delayed dask array, simulates disk io.""" # we will return a dict with a 'calls' variable that tracks call count output = {'calls': 0} # create a delayed version of function that simply generates np.arrays # but also counts when it has been called @dask.delayed def get_array(): nonlocal output output['calls'] += 1 return np.random.rand(10, 10, 10) # then make a mock "timelapse" of 3D stacks # see https://napari.org/tutorials/applications/dask.html for details _list = [get_array() for fn in range(20)] output['stack'] = da.stack( [da.from_delayed(i, shape=(10, 10, 10), dtype=np.float) for i in _list] ) assert output['stack'].shape == (20, 10, 10, 10) return output
Example #6
Source File: cross_registration.py From minian with GNU General Public License v3.0 | 6 votes |
def calculate_centroids_old(cnmds, window, grp_dim=['animal', 'session']): print("computing centroids") cnt_list = [] for anm, cur_anm in cnmds.groupby('animal'): for ss, cur_ss in cur_anm.groupby('session'): # cnt = centroids(cur_ss['A_shifted'], window.sel(animal=anm)) cnt = da.delayed(centroids)( cur_ss['A_shifted'], window.sel(animal=anm)) cnt_list.append(cnt) with ProgressBar(): cnt_list, = da.compute(cnt_list) cnts_ds = pd.concat(cnt_list, ignore_index=True) cnts_ds.height = cnts_ds.height.astype(float) cnts_ds.width = cnts_ds.width.astype(float) cnts_ds.unit_id = cnts_ds.unit_id.astype(int) cnts_ds.animal = cnts_ds.animal.astype(str) cnts_ds.session = cnts_ds.session.astype(str) cnts_ds.session_id = cnts_ds.session_id.astype(str) return cnts_ds
Example #7
Source File: cross_registration.py From minian with GNU General Public License v3.0 | 6 votes |
def calculate_centroid_distance_old(cents, A, window, grp_dim=['animal'], tile=(50, 50), shift=True, hamming=True, corr=False): dist_list = [] A = da.delayed(A) for cur_anm, cur_grp in cents.groupby('animal'): print("processing animal: {}".format(cur_anm)) cur_A = A.sel(animal=cur_anm) cur_wnd = window.sel(animal=cur_anm) dist = centroids_distance(cur_grp, cur_A, cur_wnd, shift, hamming, corr, tile) dist['meta', 'animal'] = cur_anm dist_list.append(dist) dist = pd.concat(dist_list, ignore_index=True) return dist
Example #8
Source File: cross_registration.py From minian with GNU General Public License v3.0 | 6 votes |
def centroids_distance_old(cents, A, window, shift, hamming, corr, tile=(50, 50)): sessions = cents['session'].unique() dim_h = (np.min(cents['height']), np.max(cents['height'])) dim_w = (np.min(cents['width']), np.max(cents['width'])) dist_list = [] for ssA, ssB in itt.combinations(sessions, 2): # dist = _calc_cent_dist(ssA, ssB, cents, cnmds, window, tile, dim_h, dim_w) dist = da.delayed(_calc_cent_dist)(ssA, ssB, cents, A, window, tile, dim_h, dim_w, shift, hamming, corr) dist_list.append(dist) with ProgressBar(): dist_list, = da.compute(dist_list) dists = pd.concat(dist_list, ignore_index=True) return dists
Example #9
Source File: __init__.py From dask-image with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _labeled_comprehension_delayed(func, out_dtype, default, a, positions=None): """ Wrapped delayed labeled comprehension function Included in the module for pickling purposes. Also handle cases where computation should not occur. """ result = numpy.empty((1,), dtype=out_dtype) if a.size: if positions is None: result[0] = func(a) else: result[0] = func(a, positions) else: result[0] = default[0] return result
Example #10
Source File: __init__.py From dask-image with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _labeled_comprehension_func(func, out_dtype, default, a, positions=None): """ Wrapped labeled comprehension function Ensures the result is a proper Dask Array and the computation delayed. """ return dask.array.from_delayed( _labeled_comprehension_delayed(func, out_dtype, default, a, positions), (1,), out_dtype )
Example #11
Source File: __init__.py From satpy with GNU General Public License v3.0 | 6 votes |
def three_d_effect(img, **kwargs): """Create 3D effect using convolution.""" w = kwargs.get('weight', 1) LOG.debug("Applying 3D effect with weight %.2f", w) kernel = np.array([[-w, 0, w], [-w, 1, w], [-w, 0, w]]) mode = kwargs.get('convolve_mode', 'same') def func(band_data, kernel=kernel, mode=mode, index=None): del index delay = dask.delayed(_three_d_effect_delayed)(band_data, kernel, mode) new_data = da.from_delayed(delay, shape=band_data.shape, dtype=band_data.dtype) return new_data return apply_enhancement(img.data, func, separate=True, pass_dask=True)
Example #12
Source File: resample.py From satpy with GNU General Public License v3.0 | 6 votes |
def _call_ll2cr(self, lons, lats, target_geo_def, swath_usage=0): """Wrap ll2cr() for handling dask delayed calls better.""" new_src = SwathDefinition(lons, lats) swath_points_in_grid, cols, rows = ll2cr(new_src, target_geo_def) # FIXME: How do we check swath usage/coverage if we only do this # per-block # # Determine if enough of the input swath was used # grid_name = getattr(self.target_geo_def, "name", "N/A") # fraction_in = swath_points_in_grid / float(lons.size) # swath_used = fraction_in > swath_usage # if not swath_used: # LOG.info("Data does not fit in grid %s because it only %f%% of " # "the swath is used" % # (grid_name, fraction_in * 100)) # raise RuntimeError("Data does not fit in grid %s" % (grid_name,)) # else: # LOG.debug("Data fits in grid %s and uses %f%% of the swath", # grid_name, fraction_in * 100) return np.stack([cols, rows], axis=0)
Example #13
Source File: resample.py From satpy with GNU General Public License v3.0 | 6 votes |
def _call_fornav(self, cols, rows, target_geo_def, data, grid_coverage=0, **kwargs): """Wrap fornav() to run as a dask delayed.""" num_valid_points, res = fornav(cols, rows, target_geo_def, data, **kwargs) if isinstance(data, tuple): # convert 'res' from tuple of arrays to one array res = np.stack(res) num_valid_points = sum(num_valid_points) grid_covered_ratio = num_valid_points / float(res.size) grid_covered = grid_covered_ratio > grid_coverage if not grid_covered: msg = "EWA resampling only found %f%% of the grid covered " \ "(need %f%%)" % (grid_covered_ratio * 100, grid_coverage * 100) raise RuntimeError(msg) LOG.debug("EWA resampling found %f%% of the grid covered" % (grid_covered_ratio * 100)) return res
Example #14
Source File: benchmark.py From SDV with MIT License | 6 votes |
def benchmark(datasets=None, datasets_path=None, distributed=True, timeout=None): if datasets is None: if datasets_path is None: datasets = get_available_demos().name else: datasets = os.listdir(datasets_path) if distributed: import dask global score_dataset score_dataset = dask.delayed(score_dataset) scores = list() for dataset in datasets: scores.append(score_dataset(dataset, datasets_path, timeout)) if distributed: scores = dask.compute(*scores) return pd.DataFrame(scores)
Example #15
Source File: _blockwise.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def fit(self, X, y, **kwargs): X = self._check_array(X) estimatord = dask.delayed(self.estimator) Xs = X.to_delayed() ys = y.to_delayed() if isinstance(X, da.Array): Xs = Xs.flatten() if isinstance(y, da.Array): ys = ys.flatten() if len(Xs) != len(ys): raise ValueError( f"The number of blocks in X and y must match. {len(Xs)} != {len(ys)}" ) estimators = [ dask.delayed(sklearn.base.clone)(estimatord) for _ in range(len(Xs)) ] results = [ estimator_.fit(X_, y_, **kwargs) for estimator_, X_, y_, in zip(estimators, Xs, ys) ] results = list(dask.compute(*results)) self.estimators_ = results
Example #16
Source File: utils.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def to_keys(dsk, *args): for x in args: if x is None: yield None elif isinstance(x, (da.Array, dd.DataFrame)): x = delayed(x) dsk.update(x.dask) yield x.key elif isinstance(x, Delayed): dsk.update(x.dask) yield x.key else: assert not is_dask_collection(x) key = type(x).__name__ + "-" + tokenize(x) dsk[key] = x yield key
Example #17
Source File: viirs.py From satpy with GNU General Public License v3.0 | 6 votes |
def __call__(self, datasets, **info): """Create the composite by scaling the DNB data using a histogram equalization method. :param datasets: 2-element tuple (Day/Night Band data, Solar Zenith Angle data) :param **info: Miscellaneous metadata for the newly produced composite """ if len(datasets) != 2: raise ValueError("Expected 2 datasets, got %d" % (len(datasets), )) dnb_data = datasets[0] sza_data = datasets[1] delayed = dask.delayed(self._run_dnb_normalization)(dnb_data.data, sza_data.data) output_dataset = dnb_data.copy() output_data = da.from_delayed(delayed, dnb_data.shape, dnb_data.dtype) output_dataset.data = output_data.rechunk(dnb_data.data.chunks) info = dnb_data.attrs.copy() info.update(self.attrs) info["standard_name"] = "equalized_radiance" info["mode"] = "L" output_dataset.attrs = info return output_dataset
Example #18
Source File: _search.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def check_cv(cv=3, y=None, classifier=False): """Dask aware version of ``sklearn.model_selection.check_cv`` Same as the scikit-learn version, but works if ``y`` is a dask object. """ if cv is None: cv = 3 # If ``cv`` is not an integer, the scikit-learn implementation doesn't # touch the ``y`` object, so passing on a dask object is fine if not is_dask_collection(y) or not isinstance(cv, numbers.Integral): return model_selection.check_cv(cv, y, classifier=classifier) if classifier: # ``y`` is a dask object. We need to compute the target type target_type = delayed(type_of_target, pure=True)(y).compute() if target_type in ("binary", "multiclass"): return StratifiedKFold(cv) return KFold(cv)
Example #19
Source File: test_update.py From kartothek with MIT License | 6 votes |
def _update_dataset(partitions, *args, **kwargs): # TODO: fix the parsing below to adapt for all supported formats (see: parse_input_to_metapartition) if any(partitions): table_name = next(iter(dict(partitions[0]["data"]).keys())) delayed_partitions = [ dask.delayed(_unwrap_partition)(part) for part in partitions ] partitions = dd.from_delayed(delayed_partitions) else: table_name = "core" partitions = None ddf = update_dataset_from_ddf(partitions, *args, table=table_name, **kwargs) s = pickle.dumps(ddf, pickle.HIGHEST_PROTOCOL) ddf = pickle.loads(s) return ddf.compute()
Example #20
Source File: utilities.py From minian with GNU General Public License v3.0 | 5 votes |
def load_avi_lazy(fname): cap = cv2.VideoCapture(fname) f = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) fmread = da.delayed(load_avi_perframe) flist = [fmread(fname, i) for i in range(f)] sample = flist[0].compute() arr = [da.array.from_delayed( fm, dtype=sample.dtype, shape=sample.shape) for fm in flist] return da.array.stack(arr, axis=0)
Example #21
Source File: visualization.py From minian with GNU General Public License v3.0 | 5 votes |
def write_video(arr, vname=None, vpath='.'): if not vname: vname = "{}.mp4".format(uuid4()) fname = os.path.join(vpath, vname) paths = [dask.delayed(write_vid_blk)(np.asscalar(a), vpath) for a in arr.data.to_delayed()] paths = dask.compute(paths)[0] streams = [ffmpeg.input(p) for p in paths] (ffmpeg.concat(*streams) .output(fname) .run()) for vp in paths: os.remove(vp) return fname
Example #22
Source File: preprocessing.py From minian with GNU General Public License v3.0 | 5 votes |
def detect_brightspot_perframe(varray, thres=0.95): print("creating parallel schedule") spots = [] for fid, fm in varray.rolling(frame=1): sp = delayed(lambda f: f > f.quantile(thres, interpolation='lower'))( fm) spots.append(sp) with ProgressBar(): print("detecting bright spots by frame") spots, = compute(spots) print("concatenating results") spots = xr.concat(spots, dim='frame') return spots # def correct_dust(varray, dust): # mov_corr = varray.values # nz = np.nonzero(dust) # nz_tp = [(d0, d1) for d0, d1 in zip(nz[0], nz[1])] # for i in range(np.count_nonzero(dust)): # cur_dust = (nz[0][i], nz[1][i]) # cur_sur = set( # itt.product( # range(cur_dust[0] - 1, cur_dust[0] + 2), # range(cur_dust[1] - 1, cur_dust[1] + 2))) - set( # cur_dust) - set(nz_tp) # cur_sur = list( # filter( # lambda d: 0 < d[0] < mov.shape[1] and 0 < d[1] < mov.shape[2], # cur_sur)) # if len(cur_sur) > 0: # sur_arr = np.empty((mov.shape[0], len(cur_sur))) # for si, sur in enumerate(cur_sur): # sur_arr[:, si] = mov[:, sur[0], sur[1]] # mov_corr[:, cur_dust[0], cur_dust[1]] = np.mean(sur_arr, axis=1) # else: # print("unable to correct for point ({}, {})".format( # cur_dust[0], cur_dust[1])) # return mov_corr
Example #23
Source File: test_tasks.py From qmt with MIT License | 5 votes |
def test_run_dask(fix_task_env): import numpy as np from dask import delayed as dl from dask.distributed import Client dc = Client(processes=False) input_task_example, gathered_task_example, post_processing_task_example = ( fix_task_env ) parts = {"a": [0.0, 1.0, 2.0], "b": [-3.0, 10.0, 2.0], "c": [20.0]} numpoints = 20 prefactor = 0.1 input_delayed = dl(input_task_example)(parts) gathered_delayed = dl(gathered_task_example, nout=1)([input_delayed], [numpoints])[ 0 ] post_proc_delayed = dl(post_processing_task_example)( input_delayed, gathered_delayed, prefactor ) input_future = dc.compute(input_delayed) gathered_future = dc.compute(gathered_delayed) post_proc_future = dc.compute(post_proc_delayed) input_data = input_future.result() gathered_data = gathered_future.result() post_proc_data = post_proc_future.result() assert input_data == parts gather_results = {} for part in parts: gather_results[part] = np.linspace(0.0, 1.0, numpoints) for part in gather_results: assert np.all(gathered_data[part] == gather_results[part]) post_proc_results = 0.0 for part in parts: post_proc_results += ( prefactor * np.sum(input_data[part]) * np.sum(gather_results[part]) ) assert post_proc_data == post_proc_results
Example #24
Source File: textfiles.py From intake with BSD 2-Clause "Simplified" License | 5 votes |
def to_dask(self): import dask.bag as db from dask import delayed self._get_schema() dfile = delayed(get_file) return db.from_delayed([dfile(f, self.decoder, self._read) for f in self._files])
Example #25
Source File: czi_reader.py From aicsimageio with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _read_delayed(self) -> da.core.Array: """ Returns ------- Constructed dask array where each chunk is a delayed read from the CZI file. Places dimensions in the native order (i.e. "TZCYX") """ dask_array, _ = CziReader._daread_safe( self._file, chunk_by_dims=self.chunk_by_dims, S=self.specific_s_index ) return dask_array
Example #26
Source File: _label.py From dask-image with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _across_block_label_grouping_delayed(face, structure): """Delayed version of :func:`_across_block_label_grouping`.""" _across_block_label_grouping_ = dask.delayed(_across_block_label_grouping) grouped = _across_block_label_grouping_(face, structure) return dask.array.from_delayed(grouped, shape=(2, numpy.nan), dtype=LABEL_DTYPE)
Example #27
Source File: _label.py From dask-image with BSD 3-Clause "New" or "Revised" License | 5 votes |
def label_adjacency_graph(labels, structure, nlabels): """ Adjacency graph of labels between chunks of ``labels``. Each chunk in ``labels`` has been labeled independently, and the labels in different chunks are guaranteed to be unique. Here we construct a graph connecting labels in different chunks that correspond to the same logical label in the global volume. This is true if the two labels "touch" across the block face as defined by the input ``structure``. Parameters ---------- labels : dask array of int The input labeled array, where each chunk is independently labeled. structure : array of bool Structuring element, shape (3,) * labels.ndim. nlabels : delayed int The total number of labels in ``labels`` *before* correcting for global consistency. Returns ------- mat : delayed scipy.sparse.csr_matrix This matrix has value 1 at (i, j) if label i is connected to label j in the global volume, 0 everywhere else. """ faces = _chunk_faces(labels.chunks, labels.shape) all_mappings = [dask.array.empty((2, 0), dtype=LABEL_DTYPE, chunks=1)] for face_slice in faces: face = labels[face_slice] mapped = _across_block_label_grouping_delayed(face, structure) all_mappings.append(mapped) all_mappings = dask.array.concatenate(all_mappings, axis=1) i, j = all_mappings mat = _to_csr_matrix(i, j, nlabels + 1) return mat
Example #28
Source File: _label.py From dask-image with BSD 3-Clause "New" or "Revised" License | 5 votes |
def connected_components_delayed(csr_matrix): """ Delayed version of scipy.sparse.csgraph.connected_components. This version only returns the (delayed) connected component labelling, not the number of components. """ conn_comp = dask.delayed(scipy.sparse.csgraph.connected_components, nout=2) return dask.array.from_delayed(conn_comp(csr_matrix, directed=False)[1], shape=(numpy.nan,), dtype=CONN_COMP_DTYPE)
Example #29
Source File: aapp_l1b.py From satpy with GNU General Public License v3.0 | 5 votes |
def get_angles(self, angle_id): """Get sun-satellite viewing angles.""" sunz40km = self._data["ang"][:, :, 0] * 1e-2 satz40km = self._data["ang"][:, :, 1] * 1e-2 azidiff40km = self._data["ang"][:, :, 2] * 1e-2 try: from geotiepoints.interpolator import Interpolator except ImportError: logger.warning("Could not interpolate sun-sat angles, " "python-geotiepoints missing.") self.sunz, self.satz, self.azidiff = sunz40km, satz40km, azidiff40km else: cols40km = np.arange(24, 2048, 40) cols1km = np.arange(2048) lines = sunz40km.shape[0] rows40km = np.arange(lines) rows1km = np.arange(lines) along_track_order = 1 cross_track_order = 3 satint = Interpolator( [sunz40km, satz40km, azidiff40km], (rows40km, cols40km), (rows1km, cols1km), along_track_order, cross_track_order) self.sunz, self.satz, self.azidiff = delayed(satint.interpolate, nout=3)() self.sunz = da.from_delayed(self.sunz, (lines, 2048), sunz40km.dtype) self.satz = da.from_delayed(self.satz, (lines, 2048), satz40km.dtype) self.azidiff = da.from_delayed(self.azidiff, (lines, 2048), azidiff40km.dtype) return create_xarray(getattr(self, ANGLES[angle_id]))
Example #30
Source File: utilities.py From minian with GNU General Public License v3.0 | 5 votes |
def load_tif_lazy(fname): with TiffFile(fname) as tif: data = tif.asarray() f = int(data.shape[0]) fmread = da.delayed(load_tif_perframe) flist = [fmread(fname, i) for i in range(f)] sample = flist[0].compute() arr = [da.array.from_delayed( fm, dtype=sample.dtype, shape=sample.shape) for fm in flist] return da.array.stack(arr, axis=0)