Python dask.array.from_delayed() Examples

The following are 24 code examples of dask.array.from_delayed(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module dask.array , or try the search function .
Example #1
Source File: test_dask_layers.py    From napari with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def delayed_dask_stack():
    """A 4D (20, 10, 10, 10) delayed dask array, simulates disk io."""
    # we will return a dict with a 'calls' variable that tracks call count
    output = {'calls': 0}

    # create a delayed version of function that simply generates np.arrays
    # but also counts when it has been called
    @dask.delayed
    def get_array():
        nonlocal output
        output['calls'] += 1
        return np.random.rand(10, 10, 10)

    # then make a mock "timelapse" of 3D stacks
    # see https://napari.org/tutorials/applications/dask.html for details
    _list = [get_array() for fn in range(20)]
    output['stack'] = da.stack(
        [da.from_delayed(i, shape=(10, 10, 10), dtype=np.float) for i in _list]
    )
    assert output['stack'].shape == (20, 10, 10, 10)
    return output 
Example #2
Source File: viirs.py    From satpy with GNU General Public License v3.0 6 votes vote down vote up
def __call__(self, datasets, **info):
        """Create the composite by scaling the DNB data using a histogram equalization method.

        :param datasets: 2-element tuple (Day/Night Band data, Solar Zenith Angle data)
        :param **info: Miscellaneous metadata for the newly produced composite
        """
        if len(datasets) != 2:
            raise ValueError("Expected 2 datasets, got %d" % (len(datasets), ))

        dnb_data = datasets[0]
        sza_data = datasets[1]
        delayed = dask.delayed(self._run_dnb_normalization)(dnb_data.data, sza_data.data)
        output_dataset = dnb_data.copy()
        output_data = da.from_delayed(delayed, dnb_data.shape, dnb_data.dtype)
        output_dataset.data = output_data.rechunk(dnb_data.data.chunks)

        info = dnb_data.attrs.copy()
        info.update(self.attrs)
        info["standard_name"] = "equalized_radiance"
        info["mode"] = "L"
        output_dataset.attrs = info
        return output_dataset 
Example #3
Source File: eps_l1b.py    From satpy with GNU General Public License v3.0 6 votes vote down vote up
def get_full_lonlats(self):
        """Get the interpolated lons/lats."""
        if self.lons is not None and self.lats is not None:
            return self.lons, self.lats

        raw_lats = np.hstack((self["EARTH_LOCATION_FIRST"][:, [0]],
                              self["EARTH_LOCATIONS"][:, :, 0],
                              self["EARTH_LOCATION_LAST"][:, [0]]))

        raw_lons = np.hstack((self["EARTH_LOCATION_FIRST"][:, [1]],
                              self["EARTH_LOCATIONS"][:, :, 1],
                              self["EARTH_LOCATION_LAST"][:, [1]]))
        self.lons, self.lats = self._get_full_lonlats(raw_lons, raw_lats)
        self.lons = da.from_delayed(self.lons, dtype=self["EARTH_LOCATIONS"].dtype,
                                    shape=(self.scanlines, self.pixels))
        self.lats = da.from_delayed(self.lats, dtype=self["EARTH_LOCATIONS"].dtype,
                                    shape=(self.scanlines, self.pixels))
        return self.lons, self.lats 
Example #4
Source File: xrft.py    From xrft with MIT License 6 votes vote down vote up
def _apply_window(da, dims, window_type='hanning'):
    """Creating windows in dimensions dims."""

    if window_type not in ['hanning']:
        raise NotImplementedError("Only hanning window is supported for now.")

    numpy_win_func = getattr(np, window_type)

    if da.chunks:
        def dask_win_func(n):
            return dsar.from_delayed(
                delayed(numpy_win_func, pure=True)(n),
                (n,), float)
        win_func = dask_win_func
    else:
        win_func = numpy_win_func

    windows = [xr.DataArray(win_func(len(da[d])),
               dims=da[d].dims, coords=da[d].coords) for d in dims]

    return da * reduce(operator.mul, windows[::-1]) 
Example #5
Source File: __init__.py    From satpy with GNU General Public License v3.0 6 votes vote down vote up
def three_d_effect(img, **kwargs):
    """Create 3D effect using convolution."""
    w = kwargs.get('weight', 1)
    LOG.debug("Applying 3D effect with weight %.2f", w)
    kernel = np.array([[-w, 0, w],
                       [-w, 1, w],
                       [-w, 0, w]])
    mode = kwargs.get('convolve_mode', 'same')

    def func(band_data, kernel=kernel, mode=mode, index=None):
        del index

        delay = dask.delayed(_three_d_effect_delayed)(band_data, kernel, mode)
        new_data = da.from_delayed(delay, shape=band_data.shape, dtype=band_data.dtype)
        return new_data

    return apply_enhancement(img.data, func, separate=True, pass_dask=True) 
Example #6
Source File: wrappers.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _first_block(dask_object):
    """Extract the first block / partition from a dask object
    """
    if isinstance(dask_object, da.Array):
        if dask_object.ndim > 1 and dask_object.numblocks[-1] != 1:
            raise NotImplementedError(
                "IID estimators require that the array "
                "blocked only along the first axis. "
                "Rechunk your array before fitting."
            )
        shape = (dask_object.chunks[0][0],)
        if dask_object.ndim > 1:
            shape = shape + (dask_object.chunks[1][0],)

        return da.from_delayed(
            dask_object.to_delayed().flatten()[0], shape, dask_object.dtype
        )

    if isinstance(dask_object, dd._Frame):
        return dask_object.get_partition(0)

    else:
        return dask_object 
Example #7
Source File: __init__.py    From satpy with GNU General Public License v3.0 5 votes vote down vote up
def lookup(img, **kwargs):
    """Assign values to channels based on a table."""
    luts = np.array(kwargs['luts'], dtype=np.float32) / 255.0

    def func(band_data, luts=luts, index=-1):
        # NaN/null values will become 0
        lut = luts[:, index] if len(luts.shape) == 2 else luts
        band_data = band_data.clip(0, lut.size - 1).astype(np.uint8)

        new_delay = dask.delayed(_lookup_delayed)(lut, band_data)
        new_data = da.from_delayed(new_delay, shape=band_data.shape,
                                   dtype=luts.dtype)
        return new_data

    return apply_enhancement(img.data, func, separate=True, pass_dask=True) 
Example #8
Source File: naive_bayes.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fit(self, X, y=None):
        if self.classes is None:
            # TODO: delayed
            (self.classes_,) = dask.compute(np.unique(y))

        thetas = []
        sigmas = []
        counts = []
        N, P = X.shape
        K = len(self.classes_)

        for i, c in enumerate(self.classes_):
            X_c = X[y == c]
            thetas.append(X_c.mean(axis=0))
            sigmas.append(X_c.var(axis=0))
            counts.append(delayed(len)(X_c))

        thetas = da.from_delayed(delayed(np.array)(thetas), (K, P), np.float64)
        sigmas = da.from_delayed(delayed(np.array)(sigmas), (K, P), np.float64)
        counts = da.from_delayed(
            delayed(np.array)(counts, np.float64), (P,), np.float64
        )
        priors = counts / N

        # Should these be explicitly cached on self?
        self.theta_ = thetas
        self.sigma_ = sigmas
        self.class_count_ = counts
        self.class_prior_ = priors

        return self 
Example #9
Source File: data.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fit(
        self,
        X: Union[ArrayLike, DataFrameType],
        y: Optional[Union[ArrayLike, SeriesType]] = None,
    ) -> "RobustScaler":
        q_min, q_max = self.quantile_range
        if not 0 <= q_min <= q_max <= 100:
            raise ValueError("Invalid quantile range: %s" % str(self.quantile_range))

        if isinstance(X, dd.DataFrame):
            n_columns = len(X.columns)
            partition_lengths = X.map_partitions(len).compute()
            dtype = np.find_common_type(X.dtypes, [])
            blocks = X.to_delayed()
            X = da.vstack(
                [
                    da.from_delayed(
                        block.values, shape=(length, n_columns), dtype=dtype
                    )
                    for block, length in zip(blocks, partition_lengths)
                ]
            )

        quantiles: Any = [da.percentile(col, [q_min, 50.0, q_max]) for col in X.T]
        quantiles = da.vstack(quantiles).compute()
        self.center_: List[float] = quantiles[:, 1]
        self.scale_: List[float] = quantiles[:, 2] - quantiles[:, 0]
        self.scale_ = _handle_zeros_in_scale(self.scale_, copy=False)
        self.n_features_in_ = X.shape[1]
        return self 
Example #10
Source File: incremental_pca.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def svd_flip(u, v):
    """
    This is a replicate of svd_flip() which calls svd_flip_fixed()
    instead of skm.svd_flip()
    """
    u2, v2 = delayed(_svd_flip_copy, nout=2)(u, v, False)
    u = da.from_delayed(u2, shape=u.shape, dtype=u.dtype)
    v = da.from_delayed(v2, shape=v.shape, dtype=v.dtype)
    return u, v 
Example #11
Source File: _split.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _blockwise_slice(arr, idx):
    """Slice an array that is blockwise-aligned with idx.

    Parameters
    ----------
    arr : Dask array
    idx : Dask array
        Should have the following properties

        * Same blocks as `arr` along the first dimension
        * Contains only integers
        * Each block's values should be between ``[0, len(block))``

    Returns
    -------
    sliced : dask.Array
    """
    objs = []
    offsets = np.hstack([0, np.cumsum(arr.chunks[0])[:-1]])

    for i, (x, idx2) in enumerate(
        zip(arr.to_delayed().ravel(), idx.to_delayed().ravel())
    ):
        idx3 = idx2 - offsets[i]
        objs.append(x[idx3])

    shapes = idx.chunks[0]
    if arr.ndim == 2:
        P = arr.shape[1]
        shapes = [(x, P) for x in shapes]
    else:
        shapes = [(x,) for x in shapes]

    sliced = da.concatenate(
        [
            da.from_delayed(x, shape=shape, dtype=arr.dtype)
            for x, shape in zip(objs, shapes)
        ]
    )
    return sliced 
Example #12
Source File: _split.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _split_blockwise(self, X, seeds):
        chunks = X.chunks[0]

        train_pct, test_pct = _maybe_normalize_split_sizes(
            self.train_size, self.test_size
        )
        sizes = [_validate_shuffle_split(c, test_pct, train_pct) for c in chunks]

        objs = [
            dask.delayed(_generate_idx, nout=2)(chunksize, seed, n_train, n_test)
            for chunksize, seed, (n_train, n_test) in zip(chunks, seeds, sizes)
        ]

        train_objs, test_objs = zip(*objs)
        offsets = np.hstack([0, np.cumsum(chunks)])
        train_idx = da.concatenate(
            [
                da.from_delayed(x + offset, (train_size,), np.dtype("int"))
                for x, chunksize, (train_size, _), offset in zip(
                    train_objs, chunks, sizes, offsets
                )
            ]
        )
        test_idx = da.concatenate(
            [
                da.from_delayed(x + offset, (test_size,), np.dtype("int"))
                for x, chunksize, (_, test_size), offset in zip(
                    test_objs, chunks, sizes, offsets
                )
            ]
        )

        return train_idx, test_idx 
Example #13
Source File: utils.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def svd_flip(u, v):
    u2, v2 = delayed(_svd_flip_copy, nout=2)(u, v)
    u = da.from_delayed(u2, shape=u.shape, dtype=u.dtype)
    v = da.from_delayed(v2, shape=v.shape, dtype=v.dtype)
    return u, v 
Example #14
Source File: text.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def transform(self, raw_X):
        msg = "'X' should be a 1-dimensional array with length 'num_samples'."

        if not dask.is_dask_collection(raw_X):
            return self._hasher(**self.get_params()).transform(raw_X)

        if isinstance(raw_X, db.Bag):
            bag2 = raw_X.map_partitions(self._transformer)
            objs = bag2.to_delayed()
            arrs = [
                da.from_delayed(obj, (np.nan, self.n_features), self.dtype)
                for obj in objs
            ]
            result = da.concatenate(arrs, axis=0)
        elif isinstance(raw_X, dd.Series):
            result = raw_X.map_partitions(self._transformer)
        elif isinstance(raw_X, da.Array):
            # dask.Array
            chunks = ((np.nan,) * raw_X.numblocks[0], (self.n_features,))
            if raw_X.ndim == 1:
                result = raw_X.map_blocks(
                    self._transformer, dtype="f8", chunks=chunks, new_axis=1
                )
            else:
                raise ValueError(msg)
        else:
            raise ValueError(msg)

        meta = scipy.sparse.eye(0, format="csr")
        result._meta = meta
        return result 
Example #15
Source File: pairwise.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def pairwise_distances_argmin_min(
    X: ArrayLike,
    Y: ArrayLike,
    axis: int = 1,
    metric: Union[str, Callable[[ArrayLike, ArrayLike], float]] = "euclidean",
    batch_size: Optional[int] = None,
    metric_kwargs: Optional[Dict[str, Any]] = None,
):
    if batch_size is not None:
        msg = "'batch_size' is deprecated. Use sklearn.config_context instead.'"
        warnings.warn(msg, FutureWarning)

    XD = X.to_delayed().flatten().tolist()
    func = delayed(metrics.pairwise_distances_argmin_min, pure=True, nout=2)
    blocks = [func(x, Y, metric=metric, metric_kwargs=metric_kwargs) for x in XD]
    argmins, mins = zip(*blocks)

    argmins = [
        da.from_delayed(block, (chunksize,), np.int64)
        for block, chunksize in zip(argmins, X.chunks[0])
    ]
    # Scikit-learn seems to always use float64
    mins = [
        da.from_delayed(block, (chunksize,), "f8")
        for block, chunksize in zip(mins, X.chunks[0])
    ]
    argmins = da.concatenate(argmins)
    mins = da.concatenate(mins)
    return argmins, mins 
Example #16
Source File: msi_safe.py    From satpy with GNU General Public License v3.0 5 votes vote down vote up
def get_dataset(self, key, info):
        """Load a dataset."""
        if self._channel != key.name:
            return

        logger.debug('Reading %s.', key.name)
        # FIXME: get this from MTD_MSIL1C.xml
        quantification_value = 10000.
        jp2 = glymur.Jp2k(self.filename)
        bitdepth = 0
        for seg in jp2.codestream.segment:
            try:
                bitdepth = max(bitdepth, seg.bitdepth[0])
            except AttributeError:
                pass

        jp2.dtype = (np.uint8 if bitdepth <= 8 else np.uint16)

        # Initialize the jp2 reader / doesn't work in a multi-threaded context.
        # jp2[0, 0]
        # data = da.from_array(jp2, chunks=CHUNK_SIZE) / quantification_value * 100

        data = da.from_delayed(delayed(jp2.read)(), jp2.shape, jp2.dtype)
        data = data.rechunk(CHUNK_SIZE) / quantification_value * 100

        proj = DataArray(data, dims=['y', 'x'])
        proj.attrs = info.copy()
        proj.attrs['units'] = '%'
        proj.attrs['platform_name'] = self.platform_name
        return proj 
Example #17
Source File: aapp_l1b.py    From satpy with GNU General Public License v3.0 5 votes vote down vote up
def navigate(self):
        """Get the longitudes and latitudes of the scene."""
        lons40km = self._data["pos"][:, :, 1] * 1e-4
        lats40km = self._data["pos"][:, :, 0] * 1e-4

        try:
            from geotiepoints import SatelliteInterpolator
        except ImportError:
            logger.warning("Could not interpolate lon/lats, "
                           "python-geotiepoints missing.")
            self.lons, self.lats = lons40km, lats40km
        else:
            cols40km = np.arange(24, 2048, 40)
            cols1km = np.arange(2048)
            lines = lons40km.shape[0]
            rows40km = np.arange(lines)
            rows1km = np.arange(lines)

            along_track_order = 1
            cross_track_order = 3

            satint = SatelliteInterpolator(
                (lons40km, lats40km), (rows40km, cols40km), (rows1km, cols1km),
                along_track_order, cross_track_order)
            self.lons, self.lats = delayed(satint.interpolate, nout=2)()
            self.lons = da.from_delayed(self.lons, (lines, 2048), lons40km.dtype)
            self.lats = da.from_delayed(self.lats, (lines, 2048), lats40km.dtype) 
Example #18
Source File: aapp_l1b.py    From satpy with GNU General Public License v3.0 5 votes vote down vote up
def get_angles(self, angle_id):
        """Get sun-satellite viewing angles."""
        sunz40km = self._data["ang"][:, :, 0] * 1e-2
        satz40km = self._data["ang"][:, :, 1] * 1e-2
        azidiff40km = self._data["ang"][:, :, 2] * 1e-2

        try:
            from geotiepoints.interpolator import Interpolator
        except ImportError:
            logger.warning("Could not interpolate sun-sat angles, "
                           "python-geotiepoints missing.")
            self.sunz, self.satz, self.azidiff = sunz40km, satz40km, azidiff40km
        else:
            cols40km = np.arange(24, 2048, 40)
            cols1km = np.arange(2048)
            lines = sunz40km.shape[0]
            rows40km = np.arange(lines)
            rows1km = np.arange(lines)

            along_track_order = 1
            cross_track_order = 3

            satint = Interpolator(
                [sunz40km, satz40km, azidiff40km], (rows40km, cols40km),
                (rows1km, cols1km), along_track_order, cross_track_order)
            self.sunz, self.satz, self.azidiff = delayed(satint.interpolate, nout=3)()
            self.sunz = da.from_delayed(self.sunz, (lines, 2048), sunz40km.dtype)
            self.satz = da.from_delayed(self.satz, (lines, 2048), satz40km.dtype)
            self.azidiff = da.from_delayed(self.azidiff, (lines, 2048), azidiff40km.dtype)

        return create_xarray(getattr(self, ANGLES[angle_id])) 
Example #19
Source File: __init__.py    From pyresample with GNU Lesser General Public License v3.0 5 votes vote down vote up
def parallel_gradient_search(data, src_x, src_y, dst_x, dst_y,
                             src_gradient_xl, src_gradient_xp,
                             src_gradient_yl, src_gradient_yp,
                             dst_mosaic_locations, dst_slices,
                             **kwargs):
    """Run gradient search in parallel in input area coordinates."""
    method = kwargs.get('method', 'bilinear')
    # Determine the number of bands
    bands = np.array([arr.shape[0] for arr in data if arr is not None])
    num_bands = np.max(bands)
    if np.any(bands != num_bands):
        raise ValueError("All source data chunks have to have the same number of bands")
    chunks = {}
    is_pad = False
    # Collect co-located target chunks
    for i, arr in enumerate(data):
        if arr is None:
            is_pad = True
            res = da.full((num_bands, dst_slices[i][1] - dst_slices[i][0],
                           dst_slices[i][3] - dst_slices[i][2]), np.nan)
        else:
            is_pad = False
            res = dask.delayed(_gradient_resample_data)(
                arr.astype(np.float64),
                src_x[i], src_y[i],
                src_gradient_xl[i], src_gradient_xp[i],
                src_gradient_yl[i], src_gradient_yp[i],
                dst_x[i], dst_y[i],
                method=method)
            res = da.from_delayed(res, (num_bands, ) + dst_x[i].shape,
                                  dtype=np.float64)
        if dst_mosaic_locations[i] in chunks:
            if not is_pad:
                chunks[dst_mosaic_locations[i]].append(res)
        else:
            chunks[dst_mosaic_locations[i]] = [res, ]

    return _concatenate_chunks(chunks) 
Example #20
Source File: _bed_read.py    From pandas-plink with MIT License 5 votes vote down vote up
def read_bed(filepath, nrows, ncols):
    from dask.array import concatenate, from_delayed
    from dask.delayed import delayed

    chunk_size = 1024

    row_start = 0
    col_xs = []
    while row_start < nrows:
        row_end = min(row_start + chunk_size, nrows)
        col_start = 0
        row_xs = []
        while col_start < ncols:
            col_end = min(col_start + chunk_size, ncols)

            x = delayed(_read_bed_chunk)(
                filepath, nrows, ncols, row_start, row_end, col_start, col_end
            )

            shape = (row_end - row_start, col_end - col_start)
            row_xs += [from_delayed(x, shape, float64)]
            col_start = col_end
        col_xs += [concatenate(row_xs, axis=1)]
        row_start = row_end
    X = concatenate(col_xs, axis=0)
    return X 
Example #21
Source File: fixtures.py    From xhistogram with MIT License 5 votes vote down vote up
def empty_dask_array(shape, dtype=float, chunks=None):
    # a dask array that errors if you try to comput it
    def raise_if_computed():
        raise ValueError('Triggered forbidden computation')

    a = dsa.from_delayed(dask.delayed(raise_if_computed)(), shape, dtype)
    if chunks is not None:
        a = a.rechunk(chunks)

    return a 
Example #22
Source File: default_reader.py    From aicsimageio with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _read_delayed(self) -> da.core.Array:
        with imageio.get_reader(self._file) as reader:
            # Store length as it is used a bunch
            image_length = reader.get_length()

            # Handle single image formats like png, jpeg, etc
            if image_length == 1:
                return da.from_array(self._get_data(self._file, 0))

            # Handle many image formats like gif, mp4, etc
            elif image_length > 1:
                # Get a sample image
                sample = self._get_data(self._file, 0)

                # Create operating shape for the final dask array by prepending
                # image length to a tuple of ones that is the same length as
                # the sample shape
                operating_shape = (image_length,) + ((1,) * len(sample.shape))
                # Create numpy array of empty arrays for delayed get data
                # functions
                lazy_arrays = np.ndarray(operating_shape, dtype=object)
                for indicies, _ in np.ndenumerate(lazy_arrays):
                    lazy_arrays[indicies] = da.from_delayed(
                        delayed(self._get_data)(self._file, indicies[0]),
                        shape=sample.shape,
                        dtype=sample.dtype,
                    )

                # Block them into a single dask array
                return da.block(lazy_arrays.tolist())

            # Catch all other image types as unsupported
            # https://imageio.readthedocs.io/en/stable/userapi.html#imageio.core.format.Reader.get_length
            else:
                raise exceptions.UnsupportedFileFormatError(self._file) 
Example #23
Source File: resample.py    From satpy with GNU General Public License v3.0 4 votes vote down vote up
def compute(self, data, cache_id=None, fill_value=0, weight_count=10000,
                weight_min=0.01, weight_distance_max=1.0,
                weight_delta_max=1.0, weight_sum_min=-1.0,
                maximum_weight_mode=False, grid_coverage=0, **kwargs):
        """Resample the data according to the precomputed X/Y coordinates."""
        rows = self.cache["rows"]
        cols = self.cache["cols"]

        # if the data is scan based then check its metadata or the passed
        # kwargs otherwise assume the entire input swath is one large
        # "scanline"
        rows_per_scan = kwargs.get('rows_per_scan',
                                   data.attrs.get("rows_per_scan",
                                                  data.shape[0]))

        if data.ndim == 3 and 'bands' in data.dims:
            data_in = tuple(data.sel(bands=band).data
                            for band in data['bands'])
        elif data.ndim == 2:
            data_in = data.data
        else:
            raise ValueError("Unsupported data shape for EWA resampling.")

        res = dask.delayed(self._call_fornav)(
            cols, rows, self.target_geo_def, data_in,
            grid_coverage=grid_coverage,
            rows_per_scan=rows_per_scan, weight_count=weight_count,
            weight_min=weight_min, weight_distance_max=weight_distance_max,
            weight_delta_max=weight_delta_max, weight_sum_min=weight_sum_min,
            maximum_weight_mode=maximum_weight_mode)
        if isinstance(data_in, tuple):
            new_shape = (len(data_in),) + self.target_geo_def.shape
        else:
            new_shape = self.target_geo_def.shape
        data_arr = da.from_delayed(res, new_shape, data.dtype)
        # from delayed creates one large chunk, break it up a bit if we can
        data_arr = data_arr.rechunk([CHUNK_SIZE] * data_arr.ndim)
        if data.ndim == 3 and data.dims[0] == 'bands':
            dims = ('bands', 'y', 'x')
        elif data.ndim == 2:
            dims = ('y', 'x')
        else:
            dims = data.dims

        res = xr.DataArray(data_arr, dims=dims, attrs=data.attrs.copy())
        return update_resampled_coords(data, res, self.target_geo_def) 
Example #24
Source File: tiff_reader.py    From aicsimageio with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def _read_delayed(self) -> da.core.Array:
        # Load Tiff
        with TiffFile(self._file) as tiff:
            # Check each scene has the same shape
            # If scene shape checking fails, use the specified scene and update
            # operating shape
            scenes = tiff.series
            operating_shape = scenes[0].shape
            if not self._scene_shape_is_consistent(tiff, S=self.specific_s_index):
                operating_shape = scenes[self.specific_s_index].shape
                scenes = [scenes[self.specific_s_index]]

            # Get sample yx plane
            sample = scenes[0].pages[0].asarray()

            # Combine length of scenes and operating shape
            # Replace YX dims with empty dimensions
            operating_shape = (len(scenes), *operating_shape)
            operating_shape = operating_shape[:-2] + (1, 1)

            # Make ndarray for lazy arrays to fill
            lazy_arrays = np.ndarray(operating_shape, dtype=object)
            for all_page_index, (np_index, _) in enumerate(np.ndenumerate(lazy_arrays)):
                # Scene index is the first index in np_index
                scene_index = np_index[0]

                # This page index is current enumeration divided by scene index + 1
                # For example if the image has 10 Z slices and 5 scenes, there
                # would be 50 total pages
                this_page_index = all_page_index // (scene_index + 1)

                # Fill the numpy array with the delayed arrays
                lazy_arrays[np_index] = da.from_delayed(
                    delayed(TiffReader._imread)(
                        self._file, scene_index, this_page_index
                    ),
                    shape=sample.shape,
                    dtype=sample.dtype,
                )

            # Convert the numpy array of lazy readers into a dask array
            data = da.block(lazy_arrays.tolist())

            # Only return the scene dimension if multiple scenes are present
            if len(scenes) == 1:
                data = data[0, :]

            return data