Python dask.compute() Examples

The following are 30 code examples of dask.compute(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module dask , or try the search function .
Example #1
Source File: k_means.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fit(self, X, y=None):
        X = self._check_array(X)
        labels, centroids, inertia, n_iter = k_means(
            X,
            self.n_clusters,
            oversampling_factor=self.oversampling_factor,
            random_state=self.random_state,
            init=self.init,
            return_n_iter=True,
            max_iter=self.max_iter,
            init_max_iter=self.init_max_iter,
            tol=self.tol,
        )
        self.cluster_centers_ = centroids
        self.labels_ = labels
        self.inertia_ = inertia.compute().item()
        self.n_iter_ = n_iter
        self.n_features_in_ = X.shape[1]
        return self 
Example #2
Source File: cache.py    From intake with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def _load(self, files_in, files_out, urlpath, meta=True):
        """Download a set of files"""
        import dask
        out = []
        outnames = []
        for file_in, file_out in zip(files_in, files_out):
            cache_path = file_out.path
            outnames.append(cache_path)

            # If `_munge_path` did not find a match we want to avoid
            # writing to the urlpath.
            if cache_path == urlpath:
                continue

            if not os.path.isfile(cache_path):
                logger.debug("Caching file: {}".format(file_in.path))
                logger.debug("Original path: {}".format(urlpath))
                logger.debug("Cached at: {}".format(cache_path))
                if meta:
                    self._log_metadata(urlpath, file_in.path, cache_path)
                ddown = dask.delayed(_download)
                out.append(ddown(file_in, file_out, self.blocksize,
                                 self.output))
        dask.compute(*out)
        return outnames 
Example #3
Source File: semistructured.py    From intake with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def _data_to_source(b, path, encoder=None, storage_options=None, **kwargs):
        import dask.bag as db
        import posixpath
        from fsspec import open_files
        import dask
        import pickle
        import json
        from intake.source.textfiles import TextFilesSource
        encoder = {None: str, 'str': str, 'json': json.dumps,
           'pickle': pickle.dumps}.get(encoder, encoder)

        if not hasattr(b, 'to_textfiles'):
            try:
                b = db.from_sequence(b, npartitions=1)
            except TypeError:
                raise NotImplementedError

        files = open_files(posixpath.join(path, 'part.*'), mode='wt',
                           num=b.npartitions, **(storage_options or {}))
        dwrite = dask.delayed(write_file)
        out = [dwrite(part, f, encoder)
               for part, f in zip(b.to_delayed(), files)]
        dask.compute(out)
        s = TextFilesSource(posixpath.join(path, 'part.*'), storage_options=storage_options)
        return s 
Example #4
Source File: views.py    From AutoOut with MIT License 6 votes vote down vote up
def detect_outliers(request):
    """
    Detect outliers end point
    """
    dataset_id = int(request.GET.get("dataset_id"))

    if dataset_id is None:
        return JsonResponse({"status": "failure", "message": "Dataset id is not provided"})

    dataset = Dataset.objects.get(pk=dataset_id)
    file_path = dataset.path
    delete_features = json.loads(dataset.deleted_features)

    # Create a detection experiment and start outlier detection
    process = Process.objects.get(name='Detection')
    process_status = ProcessStatus.objects.get(name='Running')
    experiment = Experiment(dataset=dataset, process=process, process_status=process_status)
    experiment.save()
    results = delayed(detect_all)(os.path.join(settings.MEDIA_ROOT, file_path), experiment.id, settings.RESULTS_ROOT,
                                  delete_features)
    dask.compute(results)

    return JsonResponse(
        {'status': 'success', 'message': 'Detection started successfully', 'experiment_id': experiment.id}) 
Example #5
Source File: catalog.py    From nbodykit with GNU General Public License v3.0 6 votes vote down vote up
def persist(self, columns=None):
        """
        Return a CatalogSource, where the selected columns are
        computed and persist in memory.
        """

        import dask.array as da
        if columns is None:
            columns = self.columns

        r = {}
        for key in columns:
            r[key] = self[key]

        r = da.compute(r)[0] # particularity of dask

        from nbodykit.source.catalog.array import ArrayCatalog
        c = ArrayCatalog(r, comm=self.comm)
        c.attrs.update(self.attrs)

        return c 
Example #6
Source File: input_pipeline_dask.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def calculate_stats(cls, df, target_var):
        """Calculates descriptive stats of the dataframe required for cleaning.

        Arguments:
                df : dask dataframe, The dataframe at hand
                target_var : string, Dependent variable for the analysis

        Returns:
                mean : dask series, mean of each column
                median : dask series, median of each column
                dict(zip(categorical_cols, mode)) : dict, Dictionary containing
                        categorical column as keys and their modes as values
                std : dask series, standard deviation of each column
        """
        categorical_columns = [
            col for col in df.columns if col != target_var and df[col].dtype == 'object']
        mean_op = df.mean()
        std_op = df.std()
        median_op = df.quantile(0.5)
        mode_op = [df[col].value_counts().idxmax()
                   for col in categorical_columns]
        mean, median, mode, std = dask.compute(
            mean_op, median_op, mode_op, std_op)
        return mean, median, dict(zip(categorical_columns, mode)), std 
Example #7
Source File: input_pipeline_dask.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def impute(cls, df, target_var, median, mode):
        """Imputing missing values using median for continuous columns and mode
        for categorical columns.

        Arguments:
                df : dask dataframe, The dataframe at hand
                target_var : string, Dependent variable for the analysis
                median : list, median of all columns in data
                mode : list, mode of all columns in data
        Returns:
                df : dask dataframe, Dataframe without missing values
        """
        missing_stats = df.isna().sum().compute()
        cols = [col for col in df.columns if col != target_var]
        for col in cols:
            if missing_stats[col] > 0 and df[col].dtype == 'object':
                df[col] = df[col].fillna(mode[col])
            elif missing_stats[col] > 0:
                df[col] = df[col].fillna(median[col])
        return df 
Example #8
Source File: input_pipeline_dask.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def kmeans_input_fn(self, name, csv_path=None):
        """Input function for kmeans

        Arguments:
                name : string, Name of the data [Train or Eval]
                csv_path : The path of the csv on any storage system

        Returns:
                A batch of features
        """
        pattern = self._get_pattern(name, csv_path)
        tf.logging.info('The Pattern of files is : %s', pattern)
        df = dd.read_csv(pattern)
        vectors = dask.compute(df.values)
        return tf.train.limit_epochs(
            tf.convert_to_tensor(vectors[0], dtype=tf.float32), num_epochs=1) 
Example #9
Source File: tests_input_dask.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def test_clean_data(self):
        """
        Testing function clean_csv
        """
        copyfile(CSV_PATH, '/tmp/data.csv')
        iread = self.init_inputreader()
        stats = self.init_basicstats()
        ddf, _ = iread._parse_csv()
        data, mean, std_dev, csv_defaults = stats.clean_data(
            df=ddf,
            task_type=TASK_TYPE,
            target_var=TARGET_VAR,
            name=NAME
        )

        self_computed_mean = dask.compute(ddf.mean())
        self.assertListEqual(list(mean), list(self_computed_mean[0]))
        self_computed_std_dev = dask.compute(ddf.std(axis=0, skipna=True))
        self.assertListEqual(list(std_dev), list(self_computed_std_dev[0]))
        self.assertIsInstance(data, dask.dataframe.core.DataFrame)
        self.assertIsInstance(mean, pd.core.series.Series)
        self.assertIsInstance(std_dev, pd.core.series.Series)
        self.assertIsInstance(csv_defaults, list) 
Example #10
Source File: tests_input_dask.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def test_calculate_stats(self):
        """
        Testing function calculate_stats
        """
        iread = self.init_inputreader()
        stats = self.init_basicstats()
        ddf, _ = iread._parse_csv()
        mean, median, mode_dict, std_dev = stats.calculate_stats(
            df=ddf,
            target_var=TARGET_VAR
        )
        self_computed_mean = dask.compute(ddf.mean())
        self.assertListEqual(list(mean), list(self_computed_mean[0]))
        self_computed_std_dev = dask.compute(ddf.std(axis=0, skipna=True))
        self.assertListEqual(list(std_dev), list(self_computed_std_dev[0]))
        self_computed_median = dask.compute(ddf.quantile(0.5))
        self.assertListEqual(list(median), list(self_computed_median[0]))
        self.assertIsInstance(mean, pd.core.series.Series)
        self.assertIsInstance(std_dev, pd.core.series.Series)
        self.assertIsInstance(median, pd.core.series.Series)
        self.assertIsInstance(mode_dict, dict) 
Example #11
Source File: tests_input_dask.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def test_impute(self):
        """
        Testing function impute
        """
        iread = self.init_inputreader()
        stats = self.init_basicstats()
        ddf, _ = iread._parse_csv()
        _, median, _, _ = stats.calculate_stats(
            df=ddf,
            target_var=TARGET_VAR
        )
        data = stats.impute(
            df=ddf,
            target_var=TARGET_VAR,
            median=median,
            mode=MODE
        )
        imputed_data = dask.compute(data.isnull().sum())
        rows = ddf.columns
        for row in rows:
            col = imputed_data[0][row]
            self.assertEqual(col, 0)
        self.assertIsInstance(data, dask.dataframe.core.DataFrame) 
Example #12
Source File: cross_registration.py    From minian with GNU General Public License v3.0 6 votes vote down vote up
def calculate_centroids_old(cnmds, window, grp_dim=['animal', 'session']):
    print("computing centroids")
    cnt_list = []
    for anm, cur_anm in cnmds.groupby('animal'):
        for ss, cur_ss in cur_anm.groupby('session'):
            # cnt = centroids(cur_ss['A_shifted'], window.sel(animal=anm))
            cnt = da.delayed(centroids)(
                cur_ss['A_shifted'], window.sel(animal=anm))
            cnt_list.append(cnt)
    with ProgressBar():
        cnt_list, = da.compute(cnt_list)
    cnts_ds = pd.concat(cnt_list, ignore_index=True)
    cnts_ds.height = cnts_ds.height.astype(float)
    cnts_ds.width = cnts_ds.width.astype(float)
    cnts_ds.unit_id = cnts_ds.unit_id.astype(int)
    cnts_ds.animal = cnts_ds.animal.astype(str)
    cnts_ds.session = cnts_ds.session.astype(str)
    cnts_ds.session_id = cnts_ds.session_id.astype(str)
    return cnts_ds 
Example #13
Source File: cross_registration.py    From minian with GNU General Public License v3.0 6 votes vote down vote up
def centroids_distance_old(cents,
                       A,
                       window,
                       shift,
                       hamming,
                       corr,
                       tile=(50, 50)):
    sessions = cents['session'].unique()
    dim_h = (np.min(cents['height']), np.max(cents['height']))
    dim_w = (np.min(cents['width']), np.max(cents['width']))
    dist_list = []
    for ssA, ssB in itt.combinations(sessions, 2):
        # dist = _calc_cent_dist(ssA, ssB, cents, cnmds, window, tile, dim_h, dim_w)
        dist = da.delayed(_calc_cent_dist)(ssA, ssB, cents, A, window,
                                           tile, dim_h, dim_w, shift, hamming,
                                           corr)
        dist_list.append(dist)
    with ProgressBar():
        dist_list, = da.compute(dist_list)
    dists = pd.concat(dist_list, ignore_index=True)
    return dists 
Example #14
Source File: cnmf.py    From minian with GNU General Public License v3.0 6 votes vote down vote up
def get_noise_welch(varr,
                    noise_range=(0.25, 0.5),
                    noise_method='logmexp',
                    compute=True):
    print("estimating noise")
    sn = xr.apply_ufunc(
        noise_welch,
        varr.chunk(dict(frame=-1)),
        input_core_dims=[['frame']],
        dask='parallelized',
        vectorize=True,
        kwargs=dict(noise_range=noise_range, noise_method=noise_method),
        output_dtypes=[varr.dtype])
    if compute:
        sn = sn.compute()
    return sn 
Example #15
Source File: metsim.py    From MetSim with GNU General Public License v3.0 6 votes vote down vote up
def run(self):
        self._validate_setup()
        write_locks = {}
        for times in self._times:
            filename = self._get_output_filename(times)
            self.setup_netcdf_output(filename, times)
            write_locks[filename] = combine_locks([NETCDFC_LOCK, get_write_lock(filename)])
        self.logger.info('Starting {} chunks...'.format(len(self.slices)))

        delayed_objs = [wrap_run_slice(self.params, write_locks, dslice)
                        for dslice in self.slices]
        persisted = dask.persist(delayed_objs, num_workers=self.params['num_workers'])
        self.progress_bar(persisted)
        dask.compute(persisted)
        self.logger.info('Cleaning up...')
        try:
            self._client.cluster.close()
            self._client.close()
            if self.params['verbose'] == logging.DEBUG:
                print()
                print('closed dask cluster/client')
        except Exception:
            pass 
Example #16
Source File: benchmark.py    From SDV with MIT License 6 votes vote down vote up
def benchmark(datasets=None, datasets_path=None, distributed=True, timeout=None):
    if datasets is None:
        if datasets_path is None:
            datasets = get_available_demos().name
        else:
            datasets = os.listdir(datasets_path)

    if distributed:
        import dask

        global score_dataset
        score_dataset = dask.delayed(score_dataset)

    scores = list()
    for dataset in datasets:
        scores.append(score_dataset(dataset, datasets_path, timeout))

    if distributed:
        scores = dask.compute(*scores)

    return pd.DataFrame(scores) 
Example #17
Source File: _blockwise.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fit(self, X, y, **kwargs):
        X = self._check_array(X)
        estimatord = dask.delayed(self.estimator)

        Xs = X.to_delayed()
        ys = y.to_delayed()
        if isinstance(X, da.Array):
            Xs = Xs.flatten()
        if isinstance(y, da.Array):
            ys = ys.flatten()

        if len(Xs) != len(ys):
            raise ValueError(
                f"The number of blocks in X and y must match. {len(Xs)} != {len(ys)}"
            )

        estimators = [
            dask.delayed(sklearn.base.clone)(estimatord) for _ in range(len(Xs))
        ]
        results = [
            estimator_.fit(X_, y_, **kwargs)
            for estimator_, X_, y_, in zip(estimators, Xs, ys)
        ]
        results = list(dask.compute(*results))
        self.estimators_ = results 
Example #18
Source File: semistructured.py    From intake with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def _get_partition(self, i):
        self._load_metadata()
        return self.parts[i].compute() 
Example #19
Source File: preprocessing.py    From minian with GNU General Public License v3.0 5 votes vote down vote up
def remove_background_old(varray, window=51):
    print("creating parallel schedule")
    varr_ft = varray.astype(np.float32)
    compute_list = []
    for fid in varr_ft.coords['frame'].values:
        fm = varr_ft.loc[dict(frame=fid)]
        _ = delayed(remove_background_perframe_old)(fid, fm, varr_ft, window)
        compute_list.append(_)
    with ProgressBar():
        print("removing background")
        compute(compute_list)
    print("normalizing result")
    varr_ft = scale_varr(varr_ft, (0, 255)).astype(varray.dtype, copy=False)
    print("background removal done")
    return varr_ft.rename(varray.name + "_Filtered") 
Example #20
Source File: test_merge.py    From kartothek with MIT License 5 votes vote down vote up
def _merge_datasets(*args, **kwargs):
    df_list = merge_datasets_as_delayed(*args, **kwargs)
    s = pickle.dumps(df_list, pickle.HIGHEST_PROTOCOL)
    df_list = pickle.loads(s)
    return dask.compute(df_list)[0] 
Example #21
Source File: initialization.py    From minian with GNU General Public License v3.0 5 votes vote down vote up
def seeds_init(varr, wnd_size=500, method='rolling', stp_size=200, nchunk=100, max_wnd=10, diff_thres=2):
    print("constructing chunks")
    idx_fm = varr.coords['frame']
    nfm = len(idx_fm)
    if method == 'rolling':
        nstp = np.ceil(nfm / stp_size) + 1
        centers = np.linspace(0, nfm - 1, nstp)
        hwnd = np.ceil(wnd_size / 2)
        max_idx = list(
            map(lambda c: slice(int(np.floor(c - hwnd).clip(0)), int(np.ceil(c + hwnd))),
                centers))
    elif method == 'random':
        max_idx = [
            np.random.randint(0, nfm - 1, wnd_size) for _ in range(nchunk)
        ]
    res = []
    print("creating parallel scheme")
    res = [max_proj_frame(varr, cur_idx) for cur_idx in max_idx]
    max_res = xr.concat(res, 'sample').chunk(dict(sample=10))
    print("computing max projections")
    max_res = max_res.persist()
    print("calculating local maximum")
    loc_max = xr.apply_ufunc(
        local_max_roll,
        max_res.chunk(dict(height=-1, width=-1)),
        input_core_dims=[['height', 'width']],
        output_core_dims=[['height', 'width']],
        vectorize=True,
        dask='parallelized',
        output_dtypes=[np.uint8],
        kwargs=dict(k0=2, k1=max_wnd, diff=diff_thres)).sum('sample')
    loc_max = loc_max.compute()
    loc_max_flt = loc_max.stack(spatial=['height', 'width'])
    seeds = (loc_max_flt.where(loc_max_flt > 0, drop=True)
             .rename('seeds').to_dataframe().reset_index())
    return seeds[['height', 'width', 'seeds']].reset_index() 
Example #22
Source File: test_gc.py    From kartothek with MIT License 5 votes vote down vote up
def _run_garbage_collect(*args, **kwargs):
    tasks = garbage_collect_dataset__delayed(*args, **kwargs)
    s = pickle.dumps(tasks, pickle.HIGHEST_PROTOCOL)
    tasks = pickle.loads(s)
    dask.compute(tasks) 
Example #23
Source File: test_delete.py    From kartothek with MIT License 5 votes vote down vote up
def _delete(*args, **kwargs):
    tasks = delete_dataset__delayed(*args, **kwargs)
    s = pickle.dumps(tasks, pickle.HIGHEST_PROTOCOL)
    tasks = pickle.loads(s)
    dask.compute(tasks) 
Example #24
Source File: semistructured.py    From intake with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def read(self):
        self._load_metadata()
        return self.bag.compute() 
Example #25
Source File: visualization.py    From minian with GNU General Public License v3.0 5 votes vote down vote up
def compute_subs(self, clicks=None):
        self.A_sub = self.A_sub.compute()
        self.C_sub = self.C_sub.compute()
        self.S_sub = self.S_sub.compute()
        self.org_sub = self.org_sub.compute()
        self.C_norm_sub = self.C_norm_sub.compute()
        self.S_norm_sub = self.S_norm_sub.compute() 
Example #26
Source File: visualization.py    From minian with GNU General Public License v3.0 5 votes vote down vote up
def _temp_comp_sub(self, usub=None):
        if usub is None:
            usub = self.strm_usub.usub
        if self._normalize:
            C, S = self.C_norm_sub, self.S_norm_sub
        else:
            C, S = self.C_sub, self.S_sub
        cur_temp = dict()
        if self._showC:
            cur_temp['C'] = (
                hv.Dataset(C.sel(unit_id=usub)
                           .compute().rename("Intensity (A. U.)")
                           .dropna('frame', how='all')).to(hv.Curve, 'frame'))
        if self._showS:
            cur_temp['S'] = (
                hv.Dataset(S.sel(unit_id=usub)
                           .compute().rename("Intensity (A. U.)")
                           .dropna('frame', how='all')).to(hv.Curve, 'frame'))
        cur_vl = (hv.DynamicMap(
            lambda f, y: hv.VLine(f) if f else hv.VLine(0),
            streams=[self.strm_f])
                  .opts(style=dict(color='red')))
        cur_cv = hv.Curve([], kdims=['frame'], vdims=['Internsity (A.U.)'])
        self.strm_f.source = cur_cv
        h_cv = len(self._w) // 8
        w_cv = len(self._w) * 2
        temp_comp = (cur_cv
                     * datashade_ndcurve(hv.HoloMap(cur_temp, 'trace')
                                         .collate().overlay('trace')
                                         .grid('unit_id')
                                         .add_dimension('time', 0, 0),
                                         'trace')                     
                     .opts(plot=dict(shared_xaxis=True))
                     .map(lambda p: p.opts(
                         plot=dict(frame_height=h_cv,
                                   frame_width=w_cv)),
                          hv.RGB)
                     * cur_vl)
        temp_comp[temp_comp.keys()[0]] = (temp_comp[temp_comp.keys()[0]]
                                           .opts(plot=dict(height=h_cv + 75)))
        return pn.panel(temp_comp) 
Example #27
Source File: visualization.py    From minian with GNU General Public License v3.0 5 votes vote down vote up
def update_AC(self, usub=None):
        if usub is None:
            usub = self.strm_usub.usub
        if usub:
            if self._useAC:
                umask = ((self.A_sub.sel(unit_id=usub) > 0)
                         .any('unit_id'))
                A_sub = (self.A_sub.sel(unit_id=usub)
                         .where(umask, drop=True).fillna(0))
                C_sub = self.C_sub.sel(unit_id=usub)
                AC = xr.apply_ufunc(
                    da.dot,
                    A_sub, C_sub,
                    input_core_dims=[['height', 'width', 'unit_id'], ['unit_id', 'frame']],
                    output_core_dims=[['height', 'width', 'frame']],
                    dask='allowed')
                self._AC = AC.compute()
                wndh, wndw = AC.coords['height'].values, AC.coords['width'].values
                window = self.A_sub.sel(
                    height=slice(wndh.min(), wndh.max()),
                    width=slice(wndw.min(), wndw.max()))
                self._AC = self._AC.reindex_like(window).fillna(0)
                self._mov = (self.org_sub.reindex_like(window)).compute()
            else:
                self._AC = self.A_sub.sel(unit_id=usub).sum('unit_id')
                self._mov = self.org_sub
            self.strm_f.event(x=0)
        else:
            self._AC = xr.DataArray([])
            self._mov = xr.DataArray([])
            self.strm_f.event(x=0) 
Example #28
Source File: visualization.py    From minian with GNU General Public License v3.0 5 votes vote down vote up
def centroid(A, verbose=False):
    def rel_cent(im):
        im_nan = np.isnan(im)
        if im_nan.all():
            return np.array([np.nan, np.nan])
        if im_nan.any():
            im = np.nan_to_num(im)
        cent = np.array(center_of_mass(im))
        return cent / im.shape
    gu_rel_cent = da.gufunc(
        rel_cent,
        signature='(h,w)->(d)',
        output_dtypes=float,
        output_sizes=dict(d=2),
        vectorize=True
    )
    cents = (xr.apply_ufunc(
        gu_rel_cent, A.chunk(dict(height=-1, width=-1)),
        input_core_dims=[['height', 'width']],
        output_core_dims=[['dim']],
        dask='allowed')
             .assign_coords(dim=['height', 'width']))
    if verbose:
        print("computing centroids")
        with ProgressBar():
            cents=cents.compute()
    cents_df = (cents.rename('cents').to_series().dropna()
                .unstack('dim').rename_axis(None, axis='columns')
                .reset_index())
    h_rg = (A.coords['height'].min().values, A.coords['height'].max().values)
    w_rg = (A.coords['width'].min().values, A.coords['width'].max().values)
    cents_df['height'] = cents_df['height'] * (h_rg[1] - h_rg[0]) + h_rg[0]
    cents_df['width'] = cents_df['width'] * (w_rg[1] - w_rg[0]) + w_rg[0]
    return cents_df 
Example #29
Source File: visualization_ply.py    From minian with GNU General Public License v3.0 5 votes vote down vote up
def _calculate_contours_centroids(self):
        cnts_df_list = []
        cts_df_list = []
        A = self.cnmf['A'].load()
        for uid in range(self._u):
            cur_A = A.sel(unit_id=uid)
            cur_idxs = cur_A.squeeze().dims
            cur_thres = dask.delayed(cur_A.max)()
            cur_thres = dask.delayed(float)(cur_thres * .3)
            cur_cnts = dask.delayed(find_contours)(cur_A, cur_thres)
            cur_cnts = dask.delayed(np.concatenate)(cur_cnts)
            cur_cnts = dask.delayed(pd.DataFrame)(cur_cnts, columns=cur_idxs)
            cur_cnts = cur_cnts.assign(unit_id=uid)
            cur_cts = dask.delayed(center_of_mass)(cur_A.values)
            cur_cts = dask.delayed(pd.Series)(cur_cts, index=cur_idxs)
            cur_cts = cur_cts.append(pd.Series(dict(unit_id=uid)))
            cnts_df_list.append(cur_cnts)
            cts_df_list.append(cur_cts)
        cnts_df_list = dask.compute(*cnts_df_list)
        cts_df_list = dask.compute(*cts_df_list)
        cnts_df = pd.concat(cnts_df_list)
        cts_df = pd.concat(cts_df_list, axis=1).T
        for dim in cur_idxs:
            cnts_df[dim].update(cnts_df[dim] / A.sizes[dim] * self._dims[dim])
            cts_df[dim].update(cts_df[dim] / A.sizes[dim] * self._dims[dim])
        return cnts_df, cts_df 
Example #30
Source File: preprocessing.py    From minian with GNU General Public License v3.0 5 votes vote down vote up
def detect_brightspot_perframe(varray, thres=0.95):
    print("creating parallel schedule")
    spots = []
    for fid, fm in varray.rolling(frame=1):
        sp = delayed(lambda f: f > f.quantile(thres, interpolation='lower'))(
            fm)
        spots.append(sp)
    with ProgressBar():
        print("detecting bright spots by frame")
        spots, = compute(spots)
    print("concatenating results")
    spots = xr.concat(spots, dim='frame')
    return spots


# def correct_dust(varray, dust):
#     mov_corr = varray.values
#     nz = np.nonzero(dust)
#     nz_tp = [(d0, d1) for d0, d1 in zip(nz[0], nz[1])]
#     for i in range(np.count_nonzero(dust)):
#         cur_dust = (nz[0][i], nz[1][i])
#         cur_sur = set(
#             itt.product(
#                 range(cur_dust[0] - 1, cur_dust[0] + 2),
#                 range(cur_dust[1] - 1, cur_dust[1] + 2))) - set(
#                     cur_dust) - set(nz_tp)
#         cur_sur = list(
#             filter(
#                 lambda d: 0 < d[0] < mov.shape[1] and 0 < d[1] < mov.shape[2],
#                 cur_sur))
#         if len(cur_sur) > 0:
#             sur_arr = np.empty((mov.shape[0], len(cur_sur)))
#             for si, sur in enumerate(cur_sur):
#                 sur_arr[:, si] = mov[:, sur[0], sur[1]]
#             mov_corr[:, cur_dust[0], cur_dust[1]] = np.mean(sur_arr, axis=1)
#         else:
#             print("unable to correct for point ({}, {})".format(
#                 cur_dust[0], cur_dust[1]))
#     return mov_corr