Python Examples of xarray.decode

Source File: test_ensembles.py From xclim with Apache License 2.0

6 votes

def test_create_ensemble(self):
        ens = ensembles.create_ensemble(self.nc_files_simple)
        assert len(ens.realization) == len(self.nc_files_simple)
        assert len(ens.time) == 151

        # create again using xr.Dataset objects
        ds_all = []
        for n in self.nc_files_simple:
            ds = xr.open_dataset(n, decode_times=False)
            ds["time"] = xr.decode_cf(ds).time
            ds_all.append(ds)

        ens1 = ensembles.create_ensemble(ds_all)
        coords = list(ens1.coords)
        coords.extend(list(ens1.data_vars))
        for c in coords:
            np.testing.assert_array_equal(ens[c], ens1[c])

        for i in np.arange(0, len(ens1.realization)):
            np.testing.assert_array_equal(
                ens1.isel(realization=i).tg_mean.values, ds_all[i].tg_mean.values
            )

Source File: llcmodel.py From xmitgcm with MIT License

6 votes

def _make_coords_faces(self, all_iters):
        time = self.delta_t * all_iters
        time_attrs = {'units': self.time_units,
                      'calendar': self.calendar}
        coords = {'face': ('face', np.arange(self.nface)),
                  'i': ('i', np.arange(self.nx)),
                  'i_g': ('i_g', np.arange(self.nx)),
                  'j': ('j', np.arange(self.nx)),
                  'j_g': ('j_g', np.arange(self.nx)),
                  'k': ('k', np.arange(self.nz)),
                  'k_u': ('k_u', np.arange(self.nz)),
                  'k_l': ('k_l', np.arange(self.nz)),
                  'k_p1': ('k_p1', np.arange(self.nz + 1)),
                  'niter': ('time', all_iters),
                  'time': ('time', time, time_attrs)
                 }
        return xr.decode_cf(xr.Dataset(coords=coords))

Source File: test_data_loader.py From aospy with Apache License 2.0

6 votes

def test_maybe_apply_time_shift_inst(gfdl_data_loader, ds_inst, var_name,
                                     generate_file_set_args):
    ds_inst = xr.decode_cf(ds_inst)
    generate_file_set_args['dtype_in_time'] = 'inst'
    generate_file_set_args['intvl_in'] = '3hr'
    da = ds_inst[var_name]
    result = gfdl_data_loader._maybe_apply_time_shift(
        da.copy(), **generate_file_set_args)[TIME_STR]

    expected = da[TIME_STR] + np.timedelta64(-3, 'h')
    expected[TIME_STR] = expected
    assert result.identical(expected)

    generate_file_set_args['intvl_in'] = 'daily'
    da = ds_inst[var_name]
    result = gfdl_data_loader._maybe_apply_time_shift(
        da.copy(), **generate_file_set_args)[TIME_STR]

    expected = da[TIME_STR]
    expected[TIME_STR] = expected
    assert result.identical(expected)

Source File: test_io_odim.py From wradlib with MIT License

6 votes

def create_range(i, decode=False):
    where = create_dset_where(i)
    ngates = where["nbins"]
    range_start = where["rstart"] * 1000.0
    bin_range = where["rscale"]
    cent_first = range_start + bin_range / 2.0
    range_data = np.arange(
        cent_first, range_start + bin_range * ngates, bin_range, dtype="float32"
    )
    range_attrs = io.xarray.range_attrs
    range_attrs["meters_to_center_of_first_gate"] = cent_first[0]
    range_attrs["meters_between_gates"] = bin_range[0]
    da = xr.DataArray(range_data, dims=["range"], attrs=range_attrs)
    if decode:
        da = xr.decode_cf(xr.Dataset({"arr": da})).arr
    return da

Source File: test_utils_times.py From aospy with Apache License 2.0

5 votes

def test_assert_has_data_for_time():
    time_bounds = np.array([[0, 31], [31, 59], [59, 90]])
    nv = np.array([0, 1])
    time = np.array([15, 46, 74])
    data = np.zeros((3))
    var_name = 'a'
    ds = xr.DataArray(data,
                      coords=[time],
                      dims=[TIME_STR],
                      name=var_name).to_dataset()
    ds[TIME_BOUNDS_STR] = xr.DataArray(time_bounds,
                                       coords=[time, nv],
                                       dims=[TIME_STR, BOUNDS_STR],
                                       name=TIME_BOUNDS_STR)
    units_str = 'days since 2000-01-01 00:00:00'
    ds[TIME_STR].attrs['units'] = units_str
    ds = ensure_time_avg_has_cf_metadata(ds)
    ds = set_grid_attrs_as_coords(ds)
    ds = xr.decode_cf(ds)
    da = ds[var_name]

    start_date = np.datetime64('2000-01-01')
    end_date = np.datetime64('2000-03-31')
    _assert_has_data_for_time(da, start_date, end_date)

    start_date_bad = np.datetime64('1999-12-31')
    end_date_bad = np.datetime64('2000-04-01')

    with pytest.raises(AssertionError):
        _assert_has_data_for_time(da, start_date_bad, end_date)

    with pytest.raises(AssertionError):
        _assert_has_data_for_time(da, start_date, end_date_bad)

    with pytest.raises(AssertionError):
        _assert_has_data_for_time(da, start_date_bad, end_date_bad)

Source File: date_utils.py From cosima-cookbook with Apache License 2.0

5 votes

def rebase_dataset(ds, target_units=None, timevar='time', offset=None):
    """
    Rebase the time dimension variable in a dataset to a different start date.
    This is useful to overcome limitations in pandas datetime indices used in 
    xarray, and to place two datasets with different date indices onto a common
    date index
    """

    # The units are defined as the units used by timevar
    units = ds[timevar].attrs['units']
    calendar = ds[timevar].attrs['calendar']

    newds = ds.copy()

    # Cycle through all variables, setting a flag if they are a bounds variable
    flag_bounds(newds)

    for name in newds.variables:
        if is_bounds(newds[name]):
            # This is a bounds variable and has been flagged as such so ignore
            # as it will be processed by the variable for which it is the bounds
            continue
        if newds[name].attrs['units'] == units:
            newds[name] = rebase_variable(newds[name], calendar, target_units, offset=offset)
            if bounds in newds[name].attrs:
                # Must make the same adjustment to the bounds variable
                bvarname = newds[name].attrs[bounds]
                try:
                    newds[bvarname] = rebase_variable(newds[bvarname], calendar, target_units, src_units=units, offset=offset)
                except KeyError:
                    # Ignore if bounds_var missing
                    pass

    # Unset bounds flags
    unflag_bounds(newds)

    # newds = xr.decode_cf(newds, decode_coords=False, decode_times=True)

    return newds

Source File: test_ensembles.py From xclim with Apache License 2.0

5 votes

def test_no_time(self):

        # create again using xr.Dataset objects
        ds_all = []
        for n in self.nc_files_simple:
            ds = xr.open_dataset(n, decode_times=False)
            ds["time"] = xr.decode_cf(ds).time
            ds_all.append(ds.groupby(ds.time.dt.month).mean("time", keep_attrs=True))

        ens = ensembles.create_ensemble(ds_all)
        assert len(ens.realization) == len(self.nc_files_simple)

Source File: radolan.py From wradlib with MIT License

5 votes

def radolan_to_xarray(data, attrs):
    """Converts RADOLAN data to xarray Dataset

    Parameters
    ----------
    data : :func:`numpy:numpy.array`
        array of shape (number of rows, number of columns)
    attrs : dict
        dictionary of metadata information from the file header

    Returns
    -------
    dset : xarray.Dataset
        RADOLAN data and coordinates
    """
    product = attrs["producttype"]
    pattrs = _get_radolan_product_attributes(attrs)
    radolan_grid_xy = rect.get_radolan_grid(attrs["nrow"], attrs["ncol"])
    x0 = radolan_grid_xy[0, :, 0]
    y0 = radolan_grid_xy[:, 0, 1]
    if pattrs:
        if "nodatamask" in attrs:
            data.flat[attrs["nodatamask"]] = pattrs["_FillValue"]
        if "cluttermask" in attrs:
            data.flat[attrs["cluttermask"]] = pattrs["_FillValue"]
    darr = xr.DataArray(
        data,
        attrs=pattrs,
        dims=["y", "x"],
        coords={"time": attrs["datetime"], "x": x0, "y": y0},
    )
    dset = xr.Dataset({product: darr})
    dset = dset.pipe(xr.decode_cf)

    return dset

Source File: xarray.py From wradlib with MIT License

5 votes

def _decode_cf(self, obj):
        if isinstance(obj, xr.DataArray):
            out = xr.decode_cf(xr.Dataset({"arr": obj}), self._kwargs).arr
        else:
            out = xr.decode_cf(obj, self._kwargs)
        return out

Source File: test_io_odim.py From wradlib with MIT License

5 votes

def create_elevation(i, decode=False, nrays=360):
    startel = create_startelA(i, nrays=nrays)
    stopel = create_stopelA(i, nrays=nrays)
    elevation_data = (startel + stopel) / 2.0
    da = xr.DataArray(elevation_data, dims=["azimuth"], attrs=io.xarray.el_attrs)
    if decode:
        da = xr.decode_cf(xr.Dataset({"arr": da})).arr
    return da

Source File: test_io_odim.py From wradlib with MIT License

5 votes

def create_azimuth(decode=False, nrays=360):
    startaz = create_startazA(nrays=nrays)
    stopaz = create_stopazA(nrays=nrays)
    zero_index = np.where(stopaz < startaz)
    stopaz[zero_index[0]] += 360
    azimuth_data = (startaz + stopaz) / 2.0
    da = xr.DataArray(azimuth_data, dims=["azimuth"], attrs=io.xarray.az_attrs)
    if decode:
        da = xr.decode_cf(xr.Dataset({"arr": da})).arr
    return da

Source File: test_io_odim.py From wradlib with MIT License

5 votes

def create_ray_time(i, decode=False, nrays=360):
    time_data = (create_startazT(i, nrays=nrays) + create_stopazT(i, nrays=nrays)) / 2.0
    da = xr.DataArray(time_data, dims=["azimuth"], attrs=io.xarray.time_attrs)
    if decode:
        da = xr.decode_cf(xr.Dataset({"arr": da})).arr
    return da

Source File: hoaps.py From typhon with MIT License

5 votes

def read(self, filename, **kwargs):
        """Read and parse a NetCDF file and load it to a xarray.Dataset

        Args:
            filename: Path and name of the file as string or FileInfo object.
            **kwargs: Additional key word arguments that are allowed for the
                :class:`~typhon.files.handlers.common.NetCDF4` class.

        Returns:
            A xarray.Dataset object.
        """

        # Make sure that the standard fields are always gonna be imported:
        fields = kwargs.pop("fields", None)
        if fields is not None:
            fields = {"time", "lat", "lon"} | set(fields)

        # xarray has problems with decoding the time variable correctly. Hence,
        # we disable it here:
        decode_cf = kwargs.pop("decode_cf", True)

        data = super().read(filename, fields=fields, decode_cf=False, **kwargs)

        # Then we fix the problem (we need integer64 instead of integer 32):
        attrs = data["time"].attrs.copy()
        data["time"] = data["time"].astype(int)
        data["time"].attrs = attrs

        # Do decoding now (just if the user wanted it!)
        if decode_cf:
            return xr.decode_cf(data)

        return data

Source File: test_data_loader.py From aospy with Apache License 2.0

5 votes

def test_maybe_apply_time_shift_ts(gfdl_data_loader, ds_with_time_bounds,
                                   var_name, generate_file_set_args):
    ds = xr.decode_cf(ds_with_time_bounds)
    da = ds[var_name]
    result = gfdl_data_loader._maybe_apply_time_shift(
        da.copy(), **generate_file_set_args)[TIME_STR]
    assert result.identical(da[TIME_STR])

Source File: test_utils_times.py From aospy with Apache License 2.0

5 votes

def test_sel_time():
    time_bounds = np.array([[0, 31], [31, 59], [59, 90]])
    nv = np.array([0, 1])
    time = np.array([15, 46, 74])
    data = np.zeros((3))
    var_name = 'a'
    ds = xr.DataArray(data,
                      coords=[time],
                      dims=[TIME_STR],
                      name=var_name).to_dataset()
    ds[TIME_BOUNDS_STR] = xr.DataArray(time_bounds,
                                       coords=[time, nv],
                                       dims=[TIME_STR, BOUNDS_STR],
                                       name=TIME_BOUNDS_STR)
    units_str = 'days since 2000-01-01 00:00:00'
    ds[TIME_STR].attrs['units'] = units_str
    ds = ensure_time_avg_has_cf_metadata(ds)
    ds = set_grid_attrs_as_coords(ds)
    ds = xr.decode_cf(ds)
    da = ds[var_name]

    start_date = np.datetime64('2000-02-01')
    end_date = np.datetime64('2000-03-31')
    result = sel_time(da, start_date, end_date)
    assert result[SUBSET_START_DATE_STR].values == start_date
    assert result[SUBSET_END_DATE_STR].values == end_date

Source File: test_utils_times.py From aospy with Apache License 2.0

5 votes

def test_assert_has_data_for_time_str_input():
    time_bounds = np.array([[0, 31], [31, 59], [59, 90]])
    nv = np.array([0, 1])
    time = np.array([15, 46, 74])
    data = np.zeros((3))
    var_name = 'a'
    ds = xr.DataArray(data,
                      coords=[time],
                      dims=[TIME_STR],
                      name=var_name).to_dataset()
    ds[TIME_BOUNDS_STR] = xr.DataArray(time_bounds,
                                       coords=[time, nv],
                                       dims=[TIME_STR, BOUNDS_STR],
                                       name=TIME_BOUNDS_STR)
    units_str = 'days since 2000-01-01 00:00:00'
    ds[TIME_STR].attrs['units'] = units_str
    ds = ensure_time_avg_has_cf_metadata(ds)
    ds = set_grid_attrs_as_coords(ds)
    ds = xr.decode_cf(ds)
    da = ds[var_name]

    start_date = '2000-01-01'
    end_date = '2000-03-31'
    _assert_has_data_for_time(da, start_date, end_date)

    start_date_bad = '1999-12-31'
    end_date_bad = '2000-04-01'

    # With strings these checks are disabled
    _assert_has_data_for_time(da, start_date_bad, end_date)
    _assert_has_data_for_time(da, start_date, end_date_bad)
    _assert_has_data_for_time(da, start_date_bad, end_date_bad)

Source File: test_utils_times.py From aospy with Apache License 2.0

5 votes

def test_assert_has_data_for_time_cftime_datetimes(calendar, date_type):
    time_bounds = np.array([[0, 2], [2, 4], [4, 6]])
    nv = np.array([0, 1])
    time = np.array([1, 3, 5])
    data = np.zeros((3))
    var_name = 'a'
    ds = xr.DataArray(data,
                      coords=[time],
                      dims=[TIME_STR],
                      name=var_name).to_dataset()
    ds[TIME_BOUNDS_STR] = xr.DataArray(time_bounds,
                                       coords=[time, nv],
                                       dims=[TIME_STR, BOUNDS_STR],
                                       name=TIME_BOUNDS_STR)
    units_str = 'days since 0002-01-02 00:00:00'
    ds[TIME_STR].attrs['units'] = units_str
    ds[TIME_STR].attrs['calendar'] = calendar
    ds = ensure_time_avg_has_cf_metadata(ds)
    ds = set_grid_attrs_as_coords(ds)

    ds = xr.decode_cf(ds)
    da = ds[var_name]

    start_date = date_type(2, 1, 2)
    end_date = date_type(2, 1, 8)

    _assert_has_data_for_time(da, start_date, end_date)

    start_date_bad = date_type(2, 1, 1)
    end_date_bad = date_type(2, 1, 9)

    with pytest.raises(AssertionError):
        _assert_has_data_for_time(da, start_date_bad, end_date)

    with pytest.raises(AssertionError):
        _assert_has_data_for_time(da, start_date, end_date_bad)

    with pytest.raises(AssertionError):
        _assert_has_data_for_time(da, start_date_bad, end_date_bad)

Source File: times.py From aospy with Apache License 2.0

5 votes

def prep_time_data(ds):
    """Prepare time coordinate information in Dataset for use in aospy.

    1. If the Dataset contains a time bounds coordinate, add attributes
       representing the true beginning and end dates of the time interval used
       to construct the Dataset
    2. If the Dataset contains a time bounds coordinate, overwrite the time
       coordinate values with the averages of the time bounds at each timestep
    3. Decode the times into np.datetime64 objects for time indexing

    Parameters
    ----------
    ds : Dataset
        Pre-processed Dataset with time coordinate renamed to
        internal_names.TIME_STR

    Returns
    -------
    Dataset
        The processed Dataset

    """
    ds = ensure_time_as_index(ds)
    if TIME_BOUNDS_STR in ds:
        ds = ensure_time_avg_has_cf_metadata(ds)
        ds[TIME_STR] = average_time_bounds(ds)
    else:
        logging.warning("dt array not found.  Assuming equally spaced "
                        "values in time, even though this may not be "
                        "the case")
        ds = add_uniform_time_weights(ds)
    return xr.decode_cf(ds, decode_times=True, decode_coords=False,
                        mask_and_scale=True)

Source File: common.py From typhon with MIT License

4 votes

def read(self, file_info, fields=None, mapping=None, **kwargs):
        """Read and parse NetCDF files and load them to a xarray.Dataset

        Args:
            file_info: Path and name of the file as string or FileInfo object.
                This can also be a tuple/list of file names or a path with
                asterisk (this is still not implemented!).
            fields: List of field names that should be read. The other fields
                will be ignored. If `mapping` is given, this should contain the
                new field names.
            mapping: A dictionary which is used for renaming the fields. If
                given, `fields` must contain the old field names.
            **kwargs: Additional keyword arguments for
                :func:`xarray.decode_cf` such as `mask_and_scale`, etc.

        Returns:
            A xarray.Dataset object.

        Examples:

            .. code-block:: python

                from typhon.files import NetCDF4

                fh = NetCDF4()
                data = fh.read("filename.nc")

                # OR if you want to load only some fields:
                data = fh.read("filename.nc", fields=["temp", "lat", "lon"])

        """
        # xr.open_dataset does still not support loading all groups from a
        # file except a very cumbersome (and expensive) way by using the
        # parameter `group`. To avoid this, we load all groups and their
        # variables by using the netCDF4 directly and load them later into a
        # xarray dataset.

        with netCDF4.Dataset(file_info.path, "r") as root:
            # xarray decode_cf scales, don't do it twice!
            root.set_auto_scale(False)
            dataset = xr.Dataset()
            self._load_group(dataset, None, root, fields)

            dataset = xr.decode_cf(dataset, **kwargs)

        return _xarray_rename_fields(dataset, mapping)

Source File: test_io_odim.py From wradlib with MIT License

4 votes

def test_sweep_data(self, get_loader):
        if isinstance(self, MeasuredDataVolume):
            pytest.skip("requires synthetic data")
        if get_loader == "netcdf4" and self.format == "GAMIC":
            pytest.skip("gamic needs hdf-based loader")
        with self.get_volume_data(
            get_loader,
            decode_coords=False,
            mask_and_scale=False,
            decode_times=False,
            chunks=None,
            parallel=False,
        ) as vol:
            for i, ts in enumerate(vol):
                if "02" in self.name:
                    ds = create_dataset(i, nrays=361)
                else:
                    ds = create_dataset(i)
                for j, swp in enumerate(ts):
                    xr.testing.assert_equal(swp.data, ds)
        with self.get_volume_data(
            get_loader,
            decode_coords=True,
            mask_and_scale=False,
            decode_times=True,
            chunks=None,
            parallel=False,
        ) as vol:
            for i, ts in enumerate(vol):
                for j, swp in enumerate(ts):
                    data = create_dataset(i)
                    data = data.assign_coords(create_coords(i).coords)
                    data = data.assign_coords(
                        create_site(self.data["where"]["attrs"]).coords
                    )
                    data = data.assign_coords({"sweep_mode": "azimuth_surveillance"})
                    data = xr.decode_cf(data, mask_and_scale=False)
                    xr.testing.assert_equal(swp.data, data)
        with self.get_volume_data(
            get_loader,
            decode_coords=True,
            mask_and_scale=True,
            decode_times=True,
            chunks=None,
            parallel=False,
        ) as vol:
            for i, ts in enumerate(vol):
                for j, swp in enumerate(ts):
                    data = create_dataset(i, type=self.format)
                    data = data.assign_coords(create_coords(i).coords)
                    data = data.assign_coords(
                        create_site(self.data["where"]["attrs"]).coords
                    )
                    data = data.assign_coords({"sweep_mode": "azimuth_surveillance"})
                    data = xr.decode_cf(data)
                    xr.testing.assert_equal(swp.data, data)
        del swp
        del ts
        del vol
        gc.collect()

Source File: common.py From typhon with MIT License

4 votes

def read(self, file_info, fields=None, mapping=None, **kwargs):
        """Read SEVIRI HDF5 files and load them to a xarray.Dataset

        Args:
            file_info: Path and name of the file as string or FileInfo object.
                This can also be a tuple/list of file names or a path with
                asterisk.
            fields: ...
            **kwargs: Additional keyword arguments that are valid for
                :class:`typhon.files.handlers.common.NetCDF4`.

        Returns:
            A xrarray.Dataset object.
        """

        # Here, the user fields overwrite the standard fields:
        if fields is None:
            raise NotImplementedError(
                "Loading complete HDF5 files without giving explicit field "
                "names is not yet implemented!"
            )

        # keys are dimension size, values are dimension names
        dim_dict = {}

        # Load the dataset from the file:
        with h5py.File(file_info.path, 'r') as file:
            dataset = xr.Dataset()

            for field in fields:
                if field not in file:
                    raise KeyError(f"No field named '{field}'!")

                dims = []
                for dim_size in file[field].shape:
                    dim_name = dim_dict.get(dim_size, None)
                    if dim_name is None:
                        dim_name = f"dim_{len(dim_dict)}"
                        dim_dict[dim_size] = dim_name

                    dims.append(dim_name)

                dataset[field] = xr.DataArray(
                    file[field], dims=dims,
                    # Currently, some attributes may contain byte-strings that
                    # are not nice for further processing
                    attrs={}, #dict(file[field].attrs)
                )

            xr.decode_cf(dataset, **kwargs)
            dataset.load()

        return _xarray_rename_fields(dataset, mapping)

Source File: glm.py From glmtools with BSD 3-Clause "New" or "Revised" License

4 votes

def _check_and_fix_missing_unsigned_time(self, filename):
        """ Check for the missing _Unsigned attribute on files created as part of the
        D0.07 build of the operational environment. Correct if present. The problem
        was only present for less than a month, and this function does nothing if
        the data file is outside that time range.

        Modifies self.dataset to have the correct times.
        """
        vars_to_correct = ['event_time_offset','group_time_offset',
                           'group_frame_time_offset',
                           'flash_time_offset_of_first_event',
                           'flash_time_offset_of_last_event',
                           'flash_frame_time_offset_of_first_event',
                           'flash_frame_time_offset_of_last_event']

        start_g16_problem_date = np.datetime64('2018-10-15T00:00:00')
        end_g16_problem_date = np.datetime64('2018-11-06T00:00:00')
        prod_tmin, prod_tmax = self.dataset.product_time_bounds.data
        in_time = (prod_tmin >= start_g16_problem_date) & (prod_tmax <= end_g16_problem_date)
        # The date range is approximate, and corresponds to when D0.07 first was
        # in production in the OE, and before the _Unsigned attribute was added to
        # the production data files. We don't know the exact start time, but fortunately
        # the group_frame_time_offset was also added to D0.07, so we look for that attribute
        # within the date range. Later, adding the attribute to a file that already has it
        # is no problem, so we don't worry about the last time on Nov 6 when the fix is needed.
        if (in_time & hasattr(self.dataset,'group_frame_time_offset')):
            unmod = xr.open_dataset(filename, mask_and_scale=False, decode_cf=False)
            time_dataset = xr.Dataset()
            for var in vars_to_correct:
                # Add the _Unsigned attribute
                da = getattr(unmod,var)
                da.attrs['_Unsigned']='true'
                time_dataset[var] = da

            decoded = xr.decode_cf(time_dataset)

            # Copy corrected time variables over to the new dataset
            for var in vars_to_correct:
                self.dataset[var] = decoded[var]
            unmod.close()
            return True
        else:
            return False

Python xarray.decode_cf() Examples