Python xarray.decode_cf() Examples
The following are 22
code examples of xarray.decode_cf().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
xarray
, or try the search function
.
Example #1
Source File: test_ensembles.py From xclim with Apache License 2.0 | 6 votes |
def test_create_ensemble(self): ens = ensembles.create_ensemble(self.nc_files_simple) assert len(ens.realization) == len(self.nc_files_simple) assert len(ens.time) == 151 # create again using xr.Dataset objects ds_all = [] for n in self.nc_files_simple: ds = xr.open_dataset(n, decode_times=False) ds["time"] = xr.decode_cf(ds).time ds_all.append(ds) ens1 = ensembles.create_ensemble(ds_all) coords = list(ens1.coords) coords.extend(list(ens1.data_vars)) for c in coords: np.testing.assert_array_equal(ens[c], ens1[c]) for i in np.arange(0, len(ens1.realization)): np.testing.assert_array_equal( ens1.isel(realization=i).tg_mean.values, ds_all[i].tg_mean.values )
Example #2
Source File: llcmodel.py From xmitgcm with MIT License | 6 votes |
def _make_coords_faces(self, all_iters): time = self.delta_t * all_iters time_attrs = {'units': self.time_units, 'calendar': self.calendar} coords = {'face': ('face', np.arange(self.nface)), 'i': ('i', np.arange(self.nx)), 'i_g': ('i_g', np.arange(self.nx)), 'j': ('j', np.arange(self.nx)), 'j_g': ('j_g', np.arange(self.nx)), 'k': ('k', np.arange(self.nz)), 'k_u': ('k_u', np.arange(self.nz)), 'k_l': ('k_l', np.arange(self.nz)), 'k_p1': ('k_p1', np.arange(self.nz + 1)), 'niter': ('time', all_iters), 'time': ('time', time, time_attrs) } return xr.decode_cf(xr.Dataset(coords=coords))
Example #3
Source File: test_data_loader.py From aospy with Apache License 2.0 | 6 votes |
def test_maybe_apply_time_shift_inst(gfdl_data_loader, ds_inst, var_name, generate_file_set_args): ds_inst = xr.decode_cf(ds_inst) generate_file_set_args['dtype_in_time'] = 'inst' generate_file_set_args['intvl_in'] = '3hr' da = ds_inst[var_name] result = gfdl_data_loader._maybe_apply_time_shift( da.copy(), **generate_file_set_args)[TIME_STR] expected = da[TIME_STR] + np.timedelta64(-3, 'h') expected[TIME_STR] = expected assert result.identical(expected) generate_file_set_args['intvl_in'] = 'daily' da = ds_inst[var_name] result = gfdl_data_loader._maybe_apply_time_shift( da.copy(), **generate_file_set_args)[TIME_STR] expected = da[TIME_STR] expected[TIME_STR] = expected assert result.identical(expected)
Example #4
Source File: test_io_odim.py From wradlib with MIT License | 6 votes |
def create_range(i, decode=False): where = create_dset_where(i) ngates = where["nbins"] range_start = where["rstart"] * 1000.0 bin_range = where["rscale"] cent_first = range_start + bin_range / 2.0 range_data = np.arange( cent_first, range_start + bin_range * ngates, bin_range, dtype="float32" ) range_attrs = io.xarray.range_attrs range_attrs["meters_to_center_of_first_gate"] = cent_first[0] range_attrs["meters_between_gates"] = bin_range[0] da = xr.DataArray(range_data, dims=["range"], attrs=range_attrs) if decode: da = xr.decode_cf(xr.Dataset({"arr": da})).arr return da
Example #5
Source File: test_utils_times.py From aospy with Apache License 2.0 | 5 votes |
def test_assert_has_data_for_time(): time_bounds = np.array([[0, 31], [31, 59], [59, 90]]) nv = np.array([0, 1]) time = np.array([15, 46, 74]) data = np.zeros((3)) var_name = 'a' ds = xr.DataArray(data, coords=[time], dims=[TIME_STR], name=var_name).to_dataset() ds[TIME_BOUNDS_STR] = xr.DataArray(time_bounds, coords=[time, nv], dims=[TIME_STR, BOUNDS_STR], name=TIME_BOUNDS_STR) units_str = 'days since 2000-01-01 00:00:00' ds[TIME_STR].attrs['units'] = units_str ds = ensure_time_avg_has_cf_metadata(ds) ds = set_grid_attrs_as_coords(ds) ds = xr.decode_cf(ds) da = ds[var_name] start_date = np.datetime64('2000-01-01') end_date = np.datetime64('2000-03-31') _assert_has_data_for_time(da, start_date, end_date) start_date_bad = np.datetime64('1999-12-31') end_date_bad = np.datetime64('2000-04-01') with pytest.raises(AssertionError): _assert_has_data_for_time(da, start_date_bad, end_date) with pytest.raises(AssertionError): _assert_has_data_for_time(da, start_date, end_date_bad) with pytest.raises(AssertionError): _assert_has_data_for_time(da, start_date_bad, end_date_bad)
Example #6
Source File: date_utils.py From cosima-cookbook with Apache License 2.0 | 5 votes |
def rebase_dataset(ds, target_units=None, timevar='time', offset=None): """ Rebase the time dimension variable in a dataset to a different start date. This is useful to overcome limitations in pandas datetime indices used in xarray, and to place two datasets with different date indices onto a common date index """ # The units are defined as the units used by timevar units = ds[timevar].attrs['units'] calendar = ds[timevar].attrs['calendar'] newds = ds.copy() # Cycle through all variables, setting a flag if they are a bounds variable flag_bounds(newds) for name in newds.variables: if is_bounds(newds[name]): # This is a bounds variable and has been flagged as such so ignore # as it will be processed by the variable for which it is the bounds continue if newds[name].attrs['units'] == units: newds[name] = rebase_variable(newds[name], calendar, target_units, offset=offset) if bounds in newds[name].attrs: # Must make the same adjustment to the bounds variable bvarname = newds[name].attrs[bounds] try: newds[bvarname] = rebase_variable(newds[bvarname], calendar, target_units, src_units=units, offset=offset) except KeyError: # Ignore if bounds_var missing pass # Unset bounds flags unflag_bounds(newds) # newds = xr.decode_cf(newds, decode_coords=False, decode_times=True) return newds
Example #7
Source File: test_ensembles.py From xclim with Apache License 2.0 | 5 votes |
def test_no_time(self): # create again using xr.Dataset objects ds_all = [] for n in self.nc_files_simple: ds = xr.open_dataset(n, decode_times=False) ds["time"] = xr.decode_cf(ds).time ds_all.append(ds.groupby(ds.time.dt.month).mean("time", keep_attrs=True)) ens = ensembles.create_ensemble(ds_all) assert len(ens.realization) == len(self.nc_files_simple)
Example #8
Source File: radolan.py From wradlib with MIT License | 5 votes |
def radolan_to_xarray(data, attrs): """Converts RADOLAN data to xarray Dataset Parameters ---------- data : :func:`numpy:numpy.array` array of shape (number of rows, number of columns) attrs : dict dictionary of metadata information from the file header Returns ------- dset : xarray.Dataset RADOLAN data and coordinates """ product = attrs["producttype"] pattrs = _get_radolan_product_attributes(attrs) radolan_grid_xy = rect.get_radolan_grid(attrs["nrow"], attrs["ncol"]) x0 = radolan_grid_xy[0, :, 0] y0 = radolan_grid_xy[:, 0, 1] if pattrs: if "nodatamask" in attrs: data.flat[attrs["nodatamask"]] = pattrs["_FillValue"] if "cluttermask" in attrs: data.flat[attrs["cluttermask"]] = pattrs["_FillValue"] darr = xr.DataArray( data, attrs=pattrs, dims=["y", "x"], coords={"time": attrs["datetime"], "x": x0, "y": y0}, ) dset = xr.Dataset({product: darr}) dset = dset.pipe(xr.decode_cf) return dset
Example #9
Source File: xarray.py From wradlib with MIT License | 5 votes |
def _decode_cf(self, obj): if isinstance(obj, xr.DataArray): out = xr.decode_cf(xr.Dataset({"arr": obj}), self._kwargs).arr else: out = xr.decode_cf(obj, self._kwargs) return out
Example #10
Source File: test_io_odim.py From wradlib with MIT License | 5 votes |
def create_elevation(i, decode=False, nrays=360): startel = create_startelA(i, nrays=nrays) stopel = create_stopelA(i, nrays=nrays) elevation_data = (startel + stopel) / 2.0 da = xr.DataArray(elevation_data, dims=["azimuth"], attrs=io.xarray.el_attrs) if decode: da = xr.decode_cf(xr.Dataset({"arr": da})).arr return da
Example #11
Source File: test_io_odim.py From wradlib with MIT License | 5 votes |
def create_azimuth(decode=False, nrays=360): startaz = create_startazA(nrays=nrays) stopaz = create_stopazA(nrays=nrays) zero_index = np.where(stopaz < startaz) stopaz[zero_index[0]] += 360 azimuth_data = (startaz + stopaz) / 2.0 da = xr.DataArray(azimuth_data, dims=["azimuth"], attrs=io.xarray.az_attrs) if decode: da = xr.decode_cf(xr.Dataset({"arr": da})).arr return da
Example #12
Source File: test_io_odim.py From wradlib with MIT License | 5 votes |
def create_ray_time(i, decode=False, nrays=360): time_data = (create_startazT(i, nrays=nrays) + create_stopazT(i, nrays=nrays)) / 2.0 da = xr.DataArray(time_data, dims=["azimuth"], attrs=io.xarray.time_attrs) if decode: da = xr.decode_cf(xr.Dataset({"arr": da})).arr return da
Example #13
Source File: hoaps.py From typhon with MIT License | 5 votes |
def read(self, filename, **kwargs): """Read and parse a NetCDF file and load it to a xarray.Dataset Args: filename: Path and name of the file as string or FileInfo object. **kwargs: Additional key word arguments that are allowed for the :class:`~typhon.files.handlers.common.NetCDF4` class. Returns: A xarray.Dataset object. """ # Make sure that the standard fields are always gonna be imported: fields = kwargs.pop("fields", None) if fields is not None: fields = {"time", "lat", "lon"} | set(fields) # xarray has problems with decoding the time variable correctly. Hence, # we disable it here: decode_cf = kwargs.pop("decode_cf", True) data = super().read(filename, fields=fields, decode_cf=False, **kwargs) # Then we fix the problem (we need integer64 instead of integer 32): attrs = data["time"].attrs.copy() data["time"] = data["time"].astype(int) data["time"].attrs = attrs # Do decoding now (just if the user wanted it!) if decode_cf: return xr.decode_cf(data) return data
Example #14
Source File: test_data_loader.py From aospy with Apache License 2.0 | 5 votes |
def test_maybe_apply_time_shift_ts(gfdl_data_loader, ds_with_time_bounds, var_name, generate_file_set_args): ds = xr.decode_cf(ds_with_time_bounds) da = ds[var_name] result = gfdl_data_loader._maybe_apply_time_shift( da.copy(), **generate_file_set_args)[TIME_STR] assert result.identical(da[TIME_STR])
Example #15
Source File: test_utils_times.py From aospy with Apache License 2.0 | 5 votes |
def test_sel_time(): time_bounds = np.array([[0, 31], [31, 59], [59, 90]]) nv = np.array([0, 1]) time = np.array([15, 46, 74]) data = np.zeros((3)) var_name = 'a' ds = xr.DataArray(data, coords=[time], dims=[TIME_STR], name=var_name).to_dataset() ds[TIME_BOUNDS_STR] = xr.DataArray(time_bounds, coords=[time, nv], dims=[TIME_STR, BOUNDS_STR], name=TIME_BOUNDS_STR) units_str = 'days since 2000-01-01 00:00:00' ds[TIME_STR].attrs['units'] = units_str ds = ensure_time_avg_has_cf_metadata(ds) ds = set_grid_attrs_as_coords(ds) ds = xr.decode_cf(ds) da = ds[var_name] start_date = np.datetime64('2000-02-01') end_date = np.datetime64('2000-03-31') result = sel_time(da, start_date, end_date) assert result[SUBSET_START_DATE_STR].values == start_date assert result[SUBSET_END_DATE_STR].values == end_date
Example #16
Source File: test_utils_times.py From aospy with Apache License 2.0 | 5 votes |
def test_assert_has_data_for_time_str_input(): time_bounds = np.array([[0, 31], [31, 59], [59, 90]]) nv = np.array([0, 1]) time = np.array([15, 46, 74]) data = np.zeros((3)) var_name = 'a' ds = xr.DataArray(data, coords=[time], dims=[TIME_STR], name=var_name).to_dataset() ds[TIME_BOUNDS_STR] = xr.DataArray(time_bounds, coords=[time, nv], dims=[TIME_STR, BOUNDS_STR], name=TIME_BOUNDS_STR) units_str = 'days since 2000-01-01 00:00:00' ds[TIME_STR].attrs['units'] = units_str ds = ensure_time_avg_has_cf_metadata(ds) ds = set_grid_attrs_as_coords(ds) ds = xr.decode_cf(ds) da = ds[var_name] start_date = '2000-01-01' end_date = '2000-03-31' _assert_has_data_for_time(da, start_date, end_date) start_date_bad = '1999-12-31' end_date_bad = '2000-04-01' # With strings these checks are disabled _assert_has_data_for_time(da, start_date_bad, end_date) _assert_has_data_for_time(da, start_date, end_date_bad) _assert_has_data_for_time(da, start_date_bad, end_date_bad)
Example #17
Source File: test_utils_times.py From aospy with Apache License 2.0 | 5 votes |
def test_assert_has_data_for_time_cftime_datetimes(calendar, date_type): time_bounds = np.array([[0, 2], [2, 4], [4, 6]]) nv = np.array([0, 1]) time = np.array([1, 3, 5]) data = np.zeros((3)) var_name = 'a' ds = xr.DataArray(data, coords=[time], dims=[TIME_STR], name=var_name).to_dataset() ds[TIME_BOUNDS_STR] = xr.DataArray(time_bounds, coords=[time, nv], dims=[TIME_STR, BOUNDS_STR], name=TIME_BOUNDS_STR) units_str = 'days since 0002-01-02 00:00:00' ds[TIME_STR].attrs['units'] = units_str ds[TIME_STR].attrs['calendar'] = calendar ds = ensure_time_avg_has_cf_metadata(ds) ds = set_grid_attrs_as_coords(ds) ds = xr.decode_cf(ds) da = ds[var_name] start_date = date_type(2, 1, 2) end_date = date_type(2, 1, 8) _assert_has_data_for_time(da, start_date, end_date) start_date_bad = date_type(2, 1, 1) end_date_bad = date_type(2, 1, 9) with pytest.raises(AssertionError): _assert_has_data_for_time(da, start_date_bad, end_date) with pytest.raises(AssertionError): _assert_has_data_for_time(da, start_date, end_date_bad) with pytest.raises(AssertionError): _assert_has_data_for_time(da, start_date_bad, end_date_bad)
Example #18
Source File: times.py From aospy with Apache License 2.0 | 5 votes |
def prep_time_data(ds): """Prepare time coordinate information in Dataset for use in aospy. 1. If the Dataset contains a time bounds coordinate, add attributes representing the true beginning and end dates of the time interval used to construct the Dataset 2. If the Dataset contains a time bounds coordinate, overwrite the time coordinate values with the averages of the time bounds at each timestep 3. Decode the times into np.datetime64 objects for time indexing Parameters ---------- ds : Dataset Pre-processed Dataset with time coordinate renamed to internal_names.TIME_STR Returns ------- Dataset The processed Dataset """ ds = ensure_time_as_index(ds) if TIME_BOUNDS_STR in ds: ds = ensure_time_avg_has_cf_metadata(ds) ds[TIME_STR] = average_time_bounds(ds) else: logging.warning("dt array not found. Assuming equally spaced " "values in time, even though this may not be " "the case") ds = add_uniform_time_weights(ds) return xr.decode_cf(ds, decode_times=True, decode_coords=False, mask_and_scale=True)
Example #19
Source File: common.py From typhon with MIT License | 4 votes |
def read(self, file_info, fields=None, mapping=None, **kwargs): """Read and parse NetCDF files and load them to a xarray.Dataset Args: file_info: Path and name of the file as string or FileInfo object. This can also be a tuple/list of file names or a path with asterisk (this is still not implemented!). fields: List of field names that should be read. The other fields will be ignored. If `mapping` is given, this should contain the new field names. mapping: A dictionary which is used for renaming the fields. If given, `fields` must contain the old field names. **kwargs: Additional keyword arguments for :func:`xarray.decode_cf` such as `mask_and_scale`, etc. Returns: A xarray.Dataset object. Examples: .. code-block:: python from typhon.files import NetCDF4 fh = NetCDF4() data = fh.read("filename.nc") # OR if you want to load only some fields: data = fh.read("filename.nc", fields=["temp", "lat", "lon"]) """ # xr.open_dataset does still not support loading all groups from a # file except a very cumbersome (and expensive) way by using the # parameter `group`. To avoid this, we load all groups and their # variables by using the netCDF4 directly and load them later into a # xarray dataset. with netCDF4.Dataset(file_info.path, "r") as root: # xarray decode_cf scales, don't do it twice! root.set_auto_scale(False) dataset = xr.Dataset() self._load_group(dataset, None, root, fields) dataset = xr.decode_cf(dataset, **kwargs) return _xarray_rename_fields(dataset, mapping)
Example #20
Source File: test_io_odim.py From wradlib with MIT License | 4 votes |
def test_sweep_data(self, get_loader): if isinstance(self, MeasuredDataVolume): pytest.skip("requires synthetic data") if get_loader == "netcdf4" and self.format == "GAMIC": pytest.skip("gamic needs hdf-based loader") with self.get_volume_data( get_loader, decode_coords=False, mask_and_scale=False, decode_times=False, chunks=None, parallel=False, ) as vol: for i, ts in enumerate(vol): if "02" in self.name: ds = create_dataset(i, nrays=361) else: ds = create_dataset(i) for j, swp in enumerate(ts): xr.testing.assert_equal(swp.data, ds) with self.get_volume_data( get_loader, decode_coords=True, mask_and_scale=False, decode_times=True, chunks=None, parallel=False, ) as vol: for i, ts in enumerate(vol): for j, swp in enumerate(ts): data = create_dataset(i) data = data.assign_coords(create_coords(i).coords) data = data.assign_coords( create_site(self.data["where"]["attrs"]).coords ) data = data.assign_coords({"sweep_mode": "azimuth_surveillance"}) data = xr.decode_cf(data, mask_and_scale=False) xr.testing.assert_equal(swp.data, data) with self.get_volume_data( get_loader, decode_coords=True, mask_and_scale=True, decode_times=True, chunks=None, parallel=False, ) as vol: for i, ts in enumerate(vol): for j, swp in enumerate(ts): data = create_dataset(i, type=self.format) data = data.assign_coords(create_coords(i).coords) data = data.assign_coords( create_site(self.data["where"]["attrs"]).coords ) data = data.assign_coords({"sweep_mode": "azimuth_surveillance"}) data = xr.decode_cf(data) xr.testing.assert_equal(swp.data, data) del swp del ts del vol gc.collect()
Example #21
Source File: common.py From typhon with MIT License | 4 votes |
def read(self, file_info, fields=None, mapping=None, **kwargs): """Read SEVIRI HDF5 files and load them to a xarray.Dataset Args: file_info: Path and name of the file as string or FileInfo object. This can also be a tuple/list of file names or a path with asterisk. fields: ... **kwargs: Additional keyword arguments that are valid for :class:`typhon.files.handlers.common.NetCDF4`. Returns: A xrarray.Dataset object. """ # Here, the user fields overwrite the standard fields: if fields is None: raise NotImplementedError( "Loading complete HDF5 files without giving explicit field " "names is not yet implemented!" ) # keys are dimension size, values are dimension names dim_dict = {} # Load the dataset from the file: with h5py.File(file_info.path, 'r') as file: dataset = xr.Dataset() for field in fields: if field not in file: raise KeyError(f"No field named '{field}'!") dims = [] for dim_size in file[field].shape: dim_name = dim_dict.get(dim_size, None) if dim_name is None: dim_name = f"dim_{len(dim_dict)}" dim_dict[dim_size] = dim_name dims.append(dim_name) dataset[field] = xr.DataArray( file[field], dims=dims, # Currently, some attributes may contain byte-strings that # are not nice for further processing attrs={}, #dict(file[field].attrs) ) xr.decode_cf(dataset, **kwargs) dataset.load() return _xarray_rename_fields(dataset, mapping)
Example #22
Source File: glm.py From glmtools with BSD 3-Clause "New" or "Revised" License | 4 votes |
def _check_and_fix_missing_unsigned_time(self, filename): """ Check for the missing _Unsigned attribute on files created as part of the D0.07 build of the operational environment. Correct if present. The problem was only present for less than a month, and this function does nothing if the data file is outside that time range. Modifies self.dataset to have the correct times. """ vars_to_correct = ['event_time_offset','group_time_offset', 'group_frame_time_offset', 'flash_time_offset_of_first_event', 'flash_time_offset_of_last_event', 'flash_frame_time_offset_of_first_event', 'flash_frame_time_offset_of_last_event'] start_g16_problem_date = np.datetime64('2018-10-15T00:00:00') end_g16_problem_date = np.datetime64('2018-11-06T00:00:00') prod_tmin, prod_tmax = self.dataset.product_time_bounds.data in_time = (prod_tmin >= start_g16_problem_date) & (prod_tmax <= end_g16_problem_date) # The date range is approximate, and corresponds to when D0.07 first was # in production in the OE, and before the _Unsigned attribute was added to # the production data files. We don't know the exact start time, but fortunately # the group_frame_time_offset was also added to D0.07, so we look for that attribute # within the date range. Later, adding the attribute to a file that already has it # is no problem, so we don't worry about the last time on Nov 6 when the fix is needed. if (in_time & hasattr(self.dataset,'group_frame_time_offset')): unmod = xr.open_dataset(filename, mask_and_scale=False, decode_cf=False) time_dataset = xr.Dataset() for var in vars_to_correct: # Add the _Unsigned attribute da = getattr(unmod,var) da.attrs['_Unsigned']='true' time_dataset[var] = da decoded = xr.decode_cf(time_dataset) # Copy corrected time variables over to the new dataset for var in vars_to_correct: self.dataset[var] = decoded[var] unmod.close() return True else: return False