Python Examples of dask.compute

Source File: k_means.py From dask-ml with BSD 3-Clause "New" or "Revised" License

6 votes

def fit(self, X, y=None):
        X = self._check_array(X)
        labels, centroids, inertia, n_iter = k_means(
            X,
            self.n_clusters,
            oversampling_factor=self.oversampling_factor,
            random_state=self.random_state,
            init=self.init,
            return_n_iter=True,
            max_iter=self.max_iter,
            init_max_iter=self.init_max_iter,
            tol=self.tol,
        )
        self.cluster_centers_ = centroids
        self.labels_ = labels
        self.inertia_ = inertia.compute().item()
        self.n_iter_ = n_iter
        self.n_features_in_ = X.shape[1]
        return self

Source File: cache.py From intake with BSD 2-Clause "Simplified" License

6 votes

def _load(self, files_in, files_out, urlpath, meta=True):
        """Download a set of files"""
        import dask
        out = []
        outnames = []
        for file_in, file_out in zip(files_in, files_out):
            cache_path = file_out.path
            outnames.append(cache_path)

            # If `_munge_path` did not find a match we want to avoid
            # writing to the urlpath.
            if cache_path == urlpath:
                continue

            if not os.path.isfile(cache_path):
                logger.debug("Caching file: {}".format(file_in.path))
                logger.debug("Original path: {}".format(urlpath))
                logger.debug("Cached at: {}".format(cache_path))
                if meta:
                    self._log_metadata(urlpath, file_in.path, cache_path)
                ddown = dask.delayed(_download)
                out.append(ddown(file_in, file_out, self.blocksize,
                                 self.output))
        dask.compute(*out)
        return outnames

Source File: semistructured.py From intake with BSD 2-Clause "Simplified" License

6 votes

def _data_to_source(b, path, encoder=None, storage_options=None, **kwargs):
        import dask.bag as db
        import posixpath
        from fsspec import open_files
        import dask
        import pickle
        import json
        from intake.source.textfiles import TextFilesSource
        encoder = {None: str, 'str': str, 'json': json.dumps,
           'pickle': pickle.dumps}.get(encoder, encoder)

        if not hasattr(b, 'to_textfiles'):
            try:
                b = db.from_sequence(b, npartitions=1)
            except TypeError:
                raise NotImplementedError

        files = open_files(posixpath.join(path, 'part.*'), mode='wt',
                           num=b.npartitions, **(storage_options or {}))
        dwrite = dask.delayed(write_file)
        out = [dwrite(part, f, encoder)
               for part, f in zip(b.to_delayed(), files)]
        dask.compute(out)
        s = TextFilesSource(posixpath.join(path, 'part.*'), storage_options=storage_options)
        return s

Source File: views.py From AutoOut with MIT License

6 votes

def detect_outliers(request):
    """
    Detect outliers end point
    """
    dataset_id = int(request.GET.get("dataset_id"))

    if dataset_id is None:
        return JsonResponse({"status": "failure", "message": "Dataset id is not provided"})

    dataset = Dataset.objects.get(pk=dataset_id)
    file_path = dataset.path
    delete_features = json.loads(dataset.deleted_features)

    # Create a detection experiment and start outlier detection
    process = Process.objects.get(name='Detection')
    process_status = ProcessStatus.objects.get(name='Running')
    experiment = Experiment(dataset=dataset, process=process, process_status=process_status)
    experiment.save()
    results = delayed(detect_all)(os.path.join(settings.MEDIA_ROOT, file_path), experiment.id, settings.RESULTS_ROOT,
                                  delete_features)
    dask.compute(results)

    return JsonResponse(
        {'status': 'success', 'message': 'Detection started successfully', 'experiment_id': experiment.id})

Source File: catalog.py From nbodykit with GNU General Public License v3.0

6 votes

def persist(self, columns=None):
        """
        Return a CatalogSource, where the selected columns are
        computed and persist in memory.
        """

        import dask.array as da
        if columns is None:
            columns = self.columns

        r = {}
        for key in columns:
            r[key] = self[key]

        r = da.compute(r)[0] # particularity of dask

        from nbodykit.source.catalog.array import ArrayCatalog
        c = ArrayCatalog(r, comm=self.comm)
        c.attrs.update(self.attrs)

        return c

Source File: input_pipeline_dask.py From professional-services with Apache License 2.0

6 votes

def calculate_stats(cls, df, target_var):
        """Calculates descriptive stats of the dataframe required for cleaning.

        Arguments:
                df : dask dataframe, The dataframe at hand
                target_var : string, Dependent variable for the analysis

        Returns:
                mean : dask series, mean of each column
                median : dask series, median of each column
                dict(zip(categorical_cols, mode)) : dict, Dictionary containing
                        categorical column as keys and their modes as values
                std : dask series, standard deviation of each column
        """
        categorical_columns = [
            col for col in df.columns if col != target_var and df[col].dtype == 'object']
        mean_op = df.mean()
        std_op = df.std()
        median_op = df.quantile(0.5)
        mode_op = [df[col].value_counts().idxmax()
                   for col in categorical_columns]
        mean, median, mode, std = dask.compute(
            mean_op, median_op, mode_op, std_op)
        return mean, median, dict(zip(categorical_columns, mode)), std

Source File: input_pipeline_dask.py From professional-services with Apache License 2.0

6 votes

def impute(cls, df, target_var, median, mode):
        """Imputing missing values using median for continuous columns and mode
        for categorical columns.

        Arguments:
                df : dask dataframe, The dataframe at hand
                target_var : string, Dependent variable for the analysis
                median : list, median of all columns in data
                mode : list, mode of all columns in data
        Returns:
                df : dask dataframe, Dataframe without missing values
        """
        missing_stats = df.isna().sum().compute()
        cols = [col for col in df.columns if col != target_var]
        for col in cols:
            if missing_stats[col] > 0 and df[col].dtype == 'object':
                df[col] = df[col].fillna(mode[col])
            elif missing_stats[col] > 0:
                df[col] = df[col].fillna(median[col])
        return df

Source File: input_pipeline_dask.py From professional-services with Apache License 2.0

6 votes

def kmeans_input_fn(self, name, csv_path=None):
        """Input function for kmeans

        Arguments:
                name : string, Name of the data [Train or Eval]
                csv_path : The path of the csv on any storage system

        Returns:
                A batch of features
        """
        pattern = self._get_pattern(name, csv_path)
        tf.logging.info('The Pattern of files is : %s', pattern)
        df = dd.read_csv(pattern)
        vectors = dask.compute(df.values)
        return tf.train.limit_epochs(
            tf.convert_to_tensor(vectors[0], dtype=tf.float32), num_epochs=1)

Source File: tests_input_dask.py From professional-services with Apache License 2.0

6 votes

def test_clean_data(self):
        """
        Testing function clean_csv
        """
        copyfile(CSV_PATH, '/tmp/data.csv')
        iread = self.init_inputreader()
        stats = self.init_basicstats()
        ddf, _ = iread._parse_csv()
        data, mean, std_dev, csv_defaults = stats.clean_data(
            df=ddf,
            task_type=TASK_TYPE,
            target_var=TARGET_VAR,
            name=NAME
        )

        self_computed_mean = dask.compute(ddf.mean())
        self.assertListEqual(list(mean), list(self_computed_mean[0]))
        self_computed_std_dev = dask.compute(ddf.std(axis=0, skipna=True))
        self.assertListEqual(list(std_dev), list(self_computed_std_dev[0]))
        self.assertIsInstance(data, dask.dataframe.core.DataFrame)
        self.assertIsInstance(mean, pd.core.series.Series)
        self.assertIsInstance(std_dev, pd.core.series.Series)
        self.assertIsInstance(csv_defaults, list)

Source File: tests_input_dask.py From professional-services with Apache License 2.0

6 votes

def test_calculate_stats(self):
        """
        Testing function calculate_stats
        """
        iread = self.init_inputreader()
        stats = self.init_basicstats()
        ddf, _ = iread._parse_csv()
        mean, median, mode_dict, std_dev = stats.calculate_stats(
            df=ddf,
            target_var=TARGET_VAR
        )
        self_computed_mean = dask.compute(ddf.mean())
        self.assertListEqual(list(mean), list(self_computed_mean[0]))
        self_computed_std_dev = dask.compute(ddf.std(axis=0, skipna=True))
        self.assertListEqual(list(std_dev), list(self_computed_std_dev[0]))
        self_computed_median = dask.compute(ddf.quantile(0.5))
        self.assertListEqual(list(median), list(self_computed_median[0]))
        self.assertIsInstance(mean, pd.core.series.Series)
        self.assertIsInstance(std_dev, pd.core.series.Series)
        self.assertIsInstance(median, pd.core.series.Series)
        self.assertIsInstance(mode_dict, dict)

Source File: tests_input_dask.py From professional-services with Apache License 2.0

6 votes

def test_impute(self):
        """
        Testing function impute
        """
        iread = self.init_inputreader()
        stats = self.init_basicstats()
        ddf, _ = iread._parse_csv()
        _, median, _, _ = stats.calculate_stats(
            df=ddf,
            target_var=TARGET_VAR
        )
        data = stats.impute(
            df=ddf,
            target_var=TARGET_VAR,
            median=median,
            mode=MODE
        )
        imputed_data = dask.compute(data.isnull().sum())
        rows = ddf.columns
        for row in rows:
            col = imputed_data[0][row]
            self.assertEqual(col, 0)
        self.assertIsInstance(data, dask.dataframe.core.DataFrame)

Source File: cross_registration.py From minian with GNU General Public License v3.0

6 votes

def calculate_centroids_old(cnmds, window, grp_dim=['animal', 'session']):
    print("computing centroids")
    cnt_list = []
    for anm, cur_anm in cnmds.groupby('animal'):
        for ss, cur_ss in cur_anm.groupby('session'):
            # cnt = centroids(cur_ss['A_shifted'], window.sel(animal=anm))
            cnt = da.delayed(centroids)(
                cur_ss['A_shifted'], window.sel(animal=anm))
            cnt_list.append(cnt)
    with ProgressBar():
        cnt_list, = da.compute(cnt_list)
    cnts_ds = pd.concat(cnt_list, ignore_index=True)
    cnts_ds.height = cnts_ds.height.astype(float)
    cnts_ds.width = cnts_ds.width.astype(float)
    cnts_ds.unit_id = cnts_ds.unit_id.astype(int)
    cnts_ds.animal = cnts_ds.animal.astype(str)
    cnts_ds.session = cnts_ds.session.astype(str)
    cnts_ds.session_id = cnts_ds.session_id.astype(str)
    return cnts_ds

Source File: cross_registration.py From minian with GNU General Public License v3.0

6 votes

def centroids_distance_old(cents,
                       A,
                       window,
                       shift,
                       hamming,
                       corr,
                       tile=(50, 50)):
    sessions = cents['session'].unique()
    dim_h = (np.min(cents['height']), np.max(cents['height']))
    dim_w = (np.min(cents['width']), np.max(cents['width']))
    dist_list = []
    for ssA, ssB in itt.combinations(sessions, 2):
        # dist = _calc_cent_dist(ssA, ssB, cents, cnmds, window, tile, dim_h, dim_w)
        dist = da.delayed(_calc_cent_dist)(ssA, ssB, cents, A, window,
                                           tile, dim_h, dim_w, shift, hamming,
                                           corr)
        dist_list.append(dist)
    with ProgressBar():
        dist_list, = da.compute(dist_list)
    dists = pd.concat(dist_list, ignore_index=True)
    return dists

Source File: cnmf.py From minian with GNU General Public License v3.0

6 votes

def get_noise_welch(varr,
                    noise_range=(0.25, 0.5),
                    noise_method='logmexp',
                    compute=True):
    print("estimating noise")
    sn = xr.apply_ufunc(
        noise_welch,
        varr.chunk(dict(frame=-1)),
        input_core_dims=[['frame']],
        dask='parallelized',
        vectorize=True,
        kwargs=dict(noise_range=noise_range, noise_method=noise_method),
        output_dtypes=[varr.dtype])
    if compute:
        sn = sn.compute()
    return sn

Source File: metsim.py From MetSim with GNU General Public License v3.0

6 votes

def run(self):
        self._validate_setup()
        write_locks = {}
        for times in self._times:
            filename = self._get_output_filename(times)
            self.setup_netcdf_output(filename, times)
            write_locks[filename] = combine_locks([NETCDFC_LOCK, get_write_lock(filename)])
        self.logger.info('Starting {} chunks...'.format(len(self.slices)))

        delayed_objs = [wrap_run_slice(self.params, write_locks, dslice)
                        for dslice in self.slices]
        persisted = dask.persist(delayed_objs, num_workers=self.params['num_workers'])
        self.progress_bar(persisted)
        dask.compute(persisted)
        self.logger.info('Cleaning up...')
        try:
            self._client.cluster.close()
            self._client.close()
            if self.params['verbose'] == logging.DEBUG:
                print()
                print('closed dask cluster/client')
        except Exception:
            pass

Source File: benchmark.py From SDV with MIT License

6 votes

def benchmark(datasets=None, datasets_path=None, distributed=True, timeout=None):
    if datasets is None:
        if datasets_path is None:
            datasets = get_available_demos().name
        else:
            datasets = os.listdir(datasets_path)

    if distributed:
        import dask

        global score_dataset
        score_dataset = dask.delayed(score_dataset)

    scores = list()
    for dataset in datasets:
        scores.append(score_dataset(dataset, datasets_path, timeout))

    if distributed:
        scores = dask.compute(*scores)

    return pd.DataFrame(scores)

Source File: _blockwise.py From dask-ml with BSD 3-Clause "New" or "Revised" License

6 votes

def fit(self, X, y, **kwargs):
        X = self._check_array(X)
        estimatord = dask.delayed(self.estimator)

        Xs = X.to_delayed()
        ys = y.to_delayed()
        if isinstance(X, da.Array):
            Xs = Xs.flatten()
        if isinstance(y, da.Array):
            ys = ys.flatten()

        if len(Xs) != len(ys):
            raise ValueError(
                f"The number of blocks in X and y must match. {len(Xs)} != {len(ys)}"
            )

        estimators = [
            dask.delayed(sklearn.base.clone)(estimatord) for _ in range(len(Xs))
        ]
        results = [
            estimator_.fit(X_, y_, **kwargs)
            for estimator_, X_, y_, in zip(estimators, Xs, ys)
        ]
        results = list(dask.compute(*results))
        self.estimators_ = results

Source File: semistructured.py From intake with BSD 2-Clause "Simplified" License

5 votes

def _get_partition(self, i):
        self._load_metadata()
        return self.parts[i].compute()

Source File: preprocessing.py From minian with GNU General Public License v3.0

5 votes

def remove_background_old(varray, window=51):
    print("creating parallel schedule")
    varr_ft = varray.astype(np.float32)
    compute_list = []
    for fid in varr_ft.coords['frame'].values:
        fm = varr_ft.loc[dict(frame=fid)]
        _ = delayed(remove_background_perframe_old)(fid, fm, varr_ft, window)
        compute_list.append(_)
    with ProgressBar():
        print("removing background")
        compute(compute_list)
    print("normalizing result")
    varr_ft = scale_varr(varr_ft, (0, 255)).astype(varray.dtype, copy=False)
    print("background removal done")
    return varr_ft.rename(varray.name + "_Filtered")

Source File: test_merge.py From kartothek with MIT License

5 votes

def _merge_datasets(*args, **kwargs):
    df_list = merge_datasets_as_delayed(*args, **kwargs)
    s = pickle.dumps(df_list, pickle.HIGHEST_PROTOCOL)
    df_list = pickle.loads(s)
    return dask.compute(df_list)[0]

Source File: initialization.py From minian with GNU General Public License v3.0

5 votes

def seeds_init(varr, wnd_size=500, method='rolling', stp_size=200, nchunk=100, max_wnd=10, diff_thres=2):
    print("constructing chunks")
    idx_fm = varr.coords['frame']
    nfm = len(idx_fm)
    if method == 'rolling':
        nstp = np.ceil(nfm / stp_size) + 1
        centers = np.linspace(0, nfm - 1, nstp)
        hwnd = np.ceil(wnd_size / 2)
        max_idx = list(
            map(lambda c: slice(int(np.floor(c - hwnd).clip(0)), int(np.ceil(c + hwnd))),
                centers))
    elif method == 'random':
        max_idx = [
            np.random.randint(0, nfm - 1, wnd_size) for _ in range(nchunk)
        ]
    res = []
    print("creating parallel scheme")
    res = [max_proj_frame(varr, cur_idx) for cur_idx in max_idx]
    max_res = xr.concat(res, 'sample').chunk(dict(sample=10))
    print("computing max projections")
    max_res = max_res.persist()
    print("calculating local maximum")
    loc_max = xr.apply_ufunc(
        local_max_roll,
        max_res.chunk(dict(height=-1, width=-1)),
        input_core_dims=[['height', 'width']],
        output_core_dims=[['height', 'width']],
        vectorize=True,
        dask='parallelized',
        output_dtypes=[np.uint8],
        kwargs=dict(k0=2, k1=max_wnd, diff=diff_thres)).sum('sample')
    loc_max = loc_max.compute()
    loc_max_flt = loc_max.stack(spatial=['height', 'width'])
    seeds = (loc_max_flt.where(loc_max_flt > 0, drop=True)
             .rename('seeds').to_dataframe().reset_index())
    return seeds[['height', 'width', 'seeds']].reset_index()

Source File: test_gc.py From kartothek with MIT License

5 votes

def _run_garbage_collect(*args, **kwargs):
    tasks = garbage_collect_dataset__delayed(*args, **kwargs)
    s = pickle.dumps(tasks, pickle.HIGHEST_PROTOCOL)
    tasks = pickle.loads(s)
    dask.compute(tasks)

Source File: test_delete.py From kartothek with MIT License

5 votes

def _delete(*args, **kwargs):
    tasks = delete_dataset__delayed(*args, **kwargs)
    s = pickle.dumps(tasks, pickle.HIGHEST_PROTOCOL)
    tasks = pickle.loads(s)
    dask.compute(tasks)

Source File: semistructured.py From intake with BSD 2-Clause "Simplified" License

5 votes

def read(self):
        self._load_metadata()
        return self.bag.compute()

Source File: visualization.py From minian with GNU General Public License v3.0

5 votes

def compute_subs(self, clicks=None):
        self.A_sub = self.A_sub.compute()
        self.C_sub = self.C_sub.compute()
        self.S_sub = self.S_sub.compute()
        self.org_sub = self.org_sub.compute()
        self.C_norm_sub = self.C_norm_sub.compute()
        self.S_norm_sub = self.S_norm_sub.compute()

Source File: visualization.py From minian with GNU General Public License v3.0

5 votes

def _temp_comp_sub(self, usub=None):
        if usub is None:
            usub = self.strm_usub.usub
        if self._normalize:
            C, S = self.C_norm_sub, self.S_norm_sub
        else:
            C, S = self.C_sub, self.S_sub
        cur_temp = dict()
        if self._showC:
            cur_temp['C'] = (
                hv.Dataset(C.sel(unit_id=usub)
                           .compute().rename("Intensity (A. U.)")
                           .dropna('frame', how='all')).to(hv.Curve, 'frame'))
        if self._showS:
            cur_temp['S'] = (
                hv.Dataset(S.sel(unit_id=usub)
                           .compute().rename("Intensity (A. U.)")
                           .dropna('frame', how='all')).to(hv.Curve, 'frame'))
        cur_vl = (hv.DynamicMap(
            lambda f, y: hv.VLine(f) if f else hv.VLine(0),
            streams=[self.strm_f])
                  .opts(style=dict(color='red')))
        cur_cv = hv.Curve([], kdims=['frame'], vdims=['Internsity (A.U.)'])
        self.strm_f.source = cur_cv
        h_cv = len(self._w) // 8
        w_cv = len(self._w) * 2
        temp_comp = (cur_cv
                     * datashade_ndcurve(hv.HoloMap(cur_temp, 'trace')
                                         .collate().overlay('trace')
                                         .grid('unit_id')
                                         .add_dimension('time', 0, 0),
                                         'trace')                     
                     .opts(plot=dict(shared_xaxis=True))
                     .map(lambda p: p.opts(
                         plot=dict(frame_height=h_cv,
                                   frame_width=w_cv)),
                          hv.RGB)
                     * cur_vl)
        temp_comp[temp_comp.keys()[0]] = (temp_comp[temp_comp.keys()[0]]
                                           .opts(plot=dict(height=h_cv + 75)))
        return pn.panel(temp_comp)

Source File: visualization.py From minian with GNU General Public License v3.0

5 votes

def update_AC(self, usub=None):
        if usub is None:
            usub = self.strm_usub.usub
        if usub:
            if self._useAC:
                umask = ((self.A_sub.sel(unit_id=usub) > 0)
                         .any('unit_id'))
                A_sub = (self.A_sub.sel(unit_id=usub)
                         .where(umask, drop=True).fillna(0))
                C_sub = self.C_sub.sel(unit_id=usub)
                AC = xr.apply_ufunc(
                    da.dot,
                    A_sub, C_sub,
                    input_core_dims=[['height', 'width', 'unit_id'], ['unit_id', 'frame']],
                    output_core_dims=[['height', 'width', 'frame']],
                    dask='allowed')
                self._AC = AC.compute()
                wndh, wndw = AC.coords['height'].values, AC.coords['width'].values
                window = self.A_sub.sel(
                    height=slice(wndh.min(), wndh.max()),
                    width=slice(wndw.min(), wndw.max()))
                self._AC = self._AC.reindex_like(window).fillna(0)
                self._mov = (self.org_sub.reindex_like(window)).compute()
            else:
                self._AC = self.A_sub.sel(unit_id=usub).sum('unit_id')
                self._mov = self.org_sub
            self.strm_f.event(x=0)
        else:
            self._AC = xr.DataArray([])
            self._mov = xr.DataArray([])
            self.strm_f.event(x=0)

Source File: visualization.py From minian with GNU General Public License v3.0

5 votes

def centroid(A, verbose=False):
    def rel_cent(im):
        im_nan = np.isnan(im)
        if im_nan.all():
            return np.array([np.nan, np.nan])
        if im_nan.any():
            im = np.nan_to_num(im)
        cent = np.array(center_of_mass(im))
        return cent / im.shape
    gu_rel_cent = da.gufunc(
        rel_cent,
        signature='(h,w)->(d)',
        output_dtypes=float,
        output_sizes=dict(d=2),
        vectorize=True
    )
    cents = (xr.apply_ufunc(
        gu_rel_cent, A.chunk(dict(height=-1, width=-1)),
        input_core_dims=[['height', 'width']],
        output_core_dims=[['dim']],
        dask='allowed')
             .assign_coords(dim=['height', 'width']))
    if verbose:
        print("computing centroids")
        with ProgressBar():
            cents=cents.compute()
    cents_df = (cents.rename('cents').to_series().dropna()
                .unstack('dim').rename_axis(None, axis='columns')
                .reset_index())
    h_rg = (A.coords['height'].min().values, A.coords['height'].max().values)
    w_rg = (A.coords['width'].min().values, A.coords['width'].max().values)
    cents_df['height'] = cents_df['height'] * (h_rg[1] - h_rg[0]) + h_rg[0]
    cents_df['width'] = cents_df['width'] * (w_rg[1] - w_rg[0]) + w_rg[0]
    return cents_df

Source File: visualization_ply.py From minian with GNU General Public License v3.0

5 votes

def _calculate_contours_centroids(self):
        cnts_df_list = []
        cts_df_list = []
        A = self.cnmf['A'].load()
        for uid in range(self._u):
            cur_A = A.sel(unit_id=uid)
            cur_idxs = cur_A.squeeze().dims
            cur_thres = dask.delayed(cur_A.max)()
            cur_thres = dask.delayed(float)(cur_thres * .3)
            cur_cnts = dask.delayed(find_contours)(cur_A, cur_thres)
            cur_cnts = dask.delayed(np.concatenate)(cur_cnts)
            cur_cnts = dask.delayed(pd.DataFrame)(cur_cnts, columns=cur_idxs)
            cur_cnts = cur_cnts.assign(unit_id=uid)
            cur_cts = dask.delayed(center_of_mass)(cur_A.values)
            cur_cts = dask.delayed(pd.Series)(cur_cts, index=cur_idxs)
            cur_cts = cur_cts.append(pd.Series(dict(unit_id=uid)))
            cnts_df_list.append(cur_cnts)
            cts_df_list.append(cur_cts)
        cnts_df_list = dask.compute(*cnts_df_list)
        cts_df_list = dask.compute(*cts_df_list)
        cnts_df = pd.concat(cnts_df_list)
        cts_df = pd.concat(cts_df_list, axis=1).T
        for dim in cur_idxs:
            cnts_df[dim].update(cnts_df[dim] / A.sizes[dim] * self._dims[dim])
            cts_df[dim].update(cts_df[dim] / A.sizes[dim] * self._dims[dim])
        return cnts_df, cts_df

Source File: preprocessing.py From minian with GNU General Public License v3.0

5 votes

def detect_brightspot_perframe(varray, thres=0.95):
    print("creating parallel schedule")
    spots = []
    for fid, fm in varray.rolling(frame=1):
        sp = delayed(lambda f: f > f.quantile(thres, interpolation='lower'))(
            fm)
        spots.append(sp)
    with ProgressBar():
        print("detecting bright spots by frame")
        spots, = compute(spots)
    print("concatenating results")
    spots = xr.concat(spots, dim='frame')
    return spots


# def correct_dust(varray, dust):
#     mov_corr = varray.values
#     nz = np.nonzero(dust)
#     nz_tp = [(d0, d1) for d0, d1 in zip(nz[0], nz[1])]
#     for i in range(np.count_nonzero(dust)):
#         cur_dust = (nz[0][i], nz[1][i])
#         cur_sur = set(
#             itt.product(
#                 range(cur_dust[0] - 1, cur_dust[0] + 2),
#                 range(cur_dust[1] - 1, cur_dust[1] + 2))) - set(
#                     cur_dust) - set(nz_tp)
#         cur_sur = list(
#             filter(
#                 lambda d: 0 < d[0] < mov.shape[1] and 0 < d[1] < mov.shape[2],
#                 cur_sur))
#         if len(cur_sur) > 0:
#             sur_arr = np.empty((mov.shape[0], len(cur_sur)))
#             for si, sur in enumerate(cur_sur):
#                 sur_arr[:, si] = mov[:, sur[0], sur[1]]
#             mov_corr[:, cur_dust[0], cur_dust[1]] = np.mean(sur_arr, axis=1)
#         else:
#             print("unable to correct for point ({}, {})".format(
#                 cur_dust[0], cur_dust[1]))
#     return mov_corr

Python dask.compute() Examples