Python xarray.Dataset() Examples
The following are 30
code examples of xarray.Dataset().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
xarray
, or try the search function
.
Example #1
Source File: test_utils.py From typhon with MIT License | 6 votes |
def test_undo_xarray_floatification(self): ds = xarray.Dataset( {"a": (["x"], numpy.array([1, 2, 3], dtype="f4")), "b": (["x"], numpy.array([2.0, 3.0, 4.0])), "c": (["x"], numpy.array(["2010-01-01", "2010-01-02", "2010-01-03"], dtype="M8"))}) ds["a"].encoding = {"dtype": numpy.dtype("i4"), "_FillValue": 1234} # c should NOT be converted because it's a time ds["c"].encoding = {"dtype": numpy.dtype("i8"), "_FillValue": 12345} ds2 = utils.undo_xarray_floatification(ds) assert ds is not ds2 # has to be a copy assert ds["a"].encoding == ds2["a"].encoding assert numpy.allclose(ds["a"], ds2["a"]) assert ds2["a"].dtype == ds2["a"].encoding["dtype"] assert (ds2["c"] == ds["c"]).all() assert ds2["c"].dtype == ds["c"].dtype assert ds2["b"].dtype == ds["b"].dtype
Example #2
Source File: statistics.py From esmlab with Apache License 2.0 | 6 votes |
def rmse(x, y, dim): """ Compute Root Mean Squared Error. Parameters ---------- x : Dataset, DataArray, GroupBy, Variable, numpy/dask arrays or scalars Mix of labeled and/or unlabeled arrays to which to apply the function. y : Dataset, DataArray, GroupBy, Variable, numpy/dask arrays or scalars Mix of labeled and/or unlabeled arrays to which to apply the function. dim : str The dimension to apply the correlation along. Returns ------- Root Mean Squared Error Single value or tuple of Dataset, DataArray, Variable, dask.array.Array or numpy.ndarray, the first type on that list to appear on an input. """ return xs.rmse(x, y, dim)
Example #3
Source File: data_loader.py From aospy with Apache License 2.0 | 6 votes |
def set_grid_attrs_as_coords(ds): """Set available grid attributes as coordinates in a given Dataset. Grid attributes are assumed to have their internal aospy names. Grid attributes are set as coordinates, such that they are carried by all selected DataArrays with overlapping index dimensions. Parameters ---------- ds : Dataset Input data Returns ------- Dataset Dataset with grid attributes set as coordinates """ grid_attrs_in_ds = set(GRID_ATTRS.keys()).intersection( set(ds.coords) | set(ds.data_vars)) ds = ds.set_coords(grid_attrs_in_ds) return ds
Example #4
Source File: test_mask.py From verde with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_distance_mask_grid(): "Check that the mask works for grid input" region = (0, 5, -10, -4) shape = (7, 6) east, north = grid_coordinates(region, shape=shape) coords = {"easting": east[0, :], "northing": north[:, 0]} data_vars = {"scalars": (["northing", "easting"], np.ones(shape))} grid = xr.Dataset(data_vars, coords=coords) masked = distance_mask((2.5, -7.5), maxdist=2, grid=grid) true = [ [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, 1, 1, np.nan, np.nan], [np.nan, 1, 1, 1, 1, np.nan], [np.nan, 1, 1, 1, 1, np.nan], [np.nan, np.nan, 1, 1, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], ] npt.assert_array_equal(true, masked.scalars.values)
Example #5
Source File: statistics.py From esmlab with Apache License 2.0 | 6 votes |
def mae(x, y, dim): """ Compute Mean Absolute Error. Parameters ---------- x : Dataset, DataArray, GroupBy, Variable, numpy/dask arrays or scalars Mix of labeled and/or unlabeled arrays to which to apply the function. y : Dataset, DataArray, GroupBy, Variable, numpy/dask arrays or scalars Mix of labeled and/or unlabeled arrays to which to apply the function. dim : str The dimension to apply the correlation along. Returns ------- Mean Absolute Error Single value or tuple of Dataset, DataArray, Variable, dask.array.Array or numpy.ndarray, the first type on that list to appear on an input. """ return xs.mae(x, y, dim)
Example #6
Source File: test_accessor_probabilistic.py From xskillscore with Apache License 2.0 | 6 votes |
def test_crps_gaussian_accessor(o, f, dask_bool, outer_bool): if dask_bool: o = o.chunk() f = f.chunk() mu = f.mean('member') sig = f.std('member') actual = crps_gaussian(o, mu, sig) ds = xr.Dataset() ds['o'] = o ds['mu'] = mu ds['sig'] = sig if outer_bool: ds = ds.drop_vars('mu') expected = ds.xs.crps_gaussian('o', mu, sig) else: expected = ds.xs.crps_gaussian('o', 'mu', 'sig') assert_allclose(actual, expected)
Example #7
Source File: dataset.py From typhon with MIT License | 6 votes |
def find_granules_sorted(self, dt_start=None, dt_end=None, include_last_before=False, **extra): """Yield all granules, sorted by times. For documentation, see :func:`~Dataset.find_granules`. """ allgran = list(self.find_granules(dt_start, dt_end, include_last_before, **extra)) # I've been through all granules at least once, so all should be # cached now; no need for additional hints when granule timeinfo # obtainable only with hints from subdir, which is not included in # the re-matching method if extra.get("return_time", False): yield from sorted(allgran) else: yield from sorted(allgran, key=self.get_times_for_granule)
Example #8
Source File: dataset.py From typhon with MIT License | 6 votes |
def get_additional_field(self, M, fld): """Get additional field. Get field from other dataset, original objects, or otherwise. To be implemented by subclass implementations. Exact fields depend on subclass. Arguments: M (ndarray): ndarray with existing data A (masked) array with a dtype such as returned from `self.read <Dataset.read>`. fld (str): Additional field to read from original data Returns: ndarray with fields of M + fld. """ raise NotImplementedError("Must be implemented by child-class")
Example #9
Source File: dataset.py From typhon with MIT License | 6 votes |
def _add_cont_to_arr(self, arr, N, cont): """Changes arr in-situ, does not return""" if isinstance(cont, xarray.Dataset): # we should already know it's large enough # for arr[self.time_field] I start at N # for the other time coordinates at the relative "speed" they # are behind N # but this is not guaranteed to be regular so I would need to # keep trac of each individually, or inspect it on-the-fly # this approximation may be good enough for pre-allocation # (which is approximate anyway), when actually storing we need # to do a better job… for each time coordinate, check when it # “dies” raise NotImplementedError("This is not used for xarrays. " "But see comment in source-code for some thoughts.") else: arr[N:(N+cont.size)] = cont #arr = self._finalise_arr(arr, N)
Example #10
Source File: dataset.py From typhon with MIT License | 6 votes |
def __init__(self, **kwargs): """Initialise a Dataset object. All keyword arguments will be translated into attributes. Does not take positional arguments. Note that if you create a dataset with a name that already exists, the existing object is returned, but __init__ is still called (Python does this, see https://docs.python.org/3.7/reference/datamodel.html#object.__new__). """ self.mandatory_fields = set() for (k, v) in kwargs.items(): setattr(self, k, v) self.setlocal() if self.my_pseudo_fields is None: self.my_pseudo_fields = collections.OrderedDict()
Example #11
Source File: test_netcdf4.py From typhon with MIT License | 6 votes |
def test_scalefactor(self): """Test if scale factors written/read correctly """ fh = NetCDF4() with tempfile.TemporaryDirectory() as tdir: tfile = os.path.join(tdir, "testfile.nc") before = xr.Dataset( {"a": xr.DataArray( np.array([0.1, 0.2]))}) before["a"].encoding = { "scale_factor": 0.1, "_FillValue": 42, "dtype": "int16"} fh.write(before, tfile) after = fh.read(tfile) assert np.allclose(before["a"], after["a"])
Example #12
Source File: test_netcdf4.py From typhon with MIT License | 6 votes |
def test_times(self): """Test if times are read correctly """ fh = NetCDF4() with tempfile.TemporaryDirectory() as tdir: tfile = os.path.join(tdir, "testfile.nc") before = xr.Dataset( {"a": xr.DataArray( np.array( ["2019-02-14T09:00:00", "2019-02-14T09:00:01"], dtype="M8[ns]"))}) before["a"].encoding = { "units": "seconds since 2019-02-14 09:00:00", "scale_factor": 0.1} fh.write(before, tfile) after = fh.read(tfile) assert np.array_equal(before["a"], after["a"])
Example #13
Source File: common.py From typhon with MIT License | 6 votes |
def get_xarray_group(dataset, group): """Get pseudo group from xarray.Dataset Args: dataset: A xarray.Dataset object with pseudo groups. group: The name of the group (can also be a subgroup). Returns: A xarray.Dataset with the pseudo group. """ if not group.endswith("/"): group += "/" group_vars = [ var for var in dataset.variables if var.startswith(group) ] if not group_vars: raise KeyError(f"The group {group} was not found!") return dataset[group_vars]
Example #14
Source File: dataset.py From typhon with MIT License | 6 votes |
def _apply_limits_and_filters(self, cont, limits, simple_filters): if isinstance(cont, xarray.Dataset): if len(limits)>0: raise NotImplementedError( "limits not implemented on xarray datasets") oldsize = cont[self.time_field].size for f in simple_filters: cont = f(cont) logger.debug("Filters reduced number from " "{:d} to {:d}".format(oldsize, cont[self.time_field].size)) return cont oldsize = cont.size cont = tpmath.array.limit_ndarray(cont, limits) for f in simple_filters: cont = f(cont) if cont.size < oldsize: logger.debug("Applying limitations, reducing " "{:d} to {:d}".format(oldsize, cont.size)) return cont
Example #15
Source File: test_collocations.py From typhon with MIT License | 6 votes |
def test_collocate_collapse_expand(self): """Test whether collocating, collapsing and expanding work""" collocator = Collocator() test = xr.Dataset({ "time": ("time", np.arange("2000", "2010", dtype="M8[Y]")), "lat": ("time", np.arange(10)), "lon": ("time", np.arange(10)), }) collocations = collocator.collocate( test, test, max_interval="30 days", max_distance="150 miles" ) collapsed = collapse(collocations) expanded = expand(collocations)
Example #16
Source File: common.py From typhon with MIT License | 6 votes |
def read(self, file_info, fields=None, **kwargs): """Read a CSV file and return an xarray.Dataset with its content Args: file_info: Path and name of the file as string or FileInfo object. fields: Field that you want to extract from the file. If not given, all fields are going to be extracted. **kwargs: Additional keyword arguments for the pandas function `pandas.read_csv`. See for more details: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html Returns: A xarray.Dataset object. """ data = pd.read_csv(file_info.path, **kwargs).to_xarray() if fields is None: return data else: return data[fields]
Example #17
Source File: collocator.py From typhon with MIT License | 6 votes |
def check_collocation_data(dataset): """Check whether the dataset fulfills the standard of collocated data Args: dataset: A xarray.Dataset object Raises: A InvalidCollocationData Error if the dataset did not pass the test. """ mandatory_fields = ["Collocations/pairs", "Collocations/group"] for mandatory_field in mandatory_fields: if mandatory_field not in dataset.variables: raise InvalidCollocationData( f"Could not find the field '{mandatory_field}'!" )
Example #18
Source File: common.py From typhon with MIT License | 5 votes |
def write(self, data, file_info, **kwargs): """Write a xarray.Dataset to a CSV file. Args: data: An DataGroup object that should be saved. file_info: Path and name of the file as string or FileInfo object. **kwargs: Additional keyword arguments for `pandas.Dataframe.to_csv`. See for more details: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_csv.html Returns: None """ data.to_dataframe().to_csv(file_info.path, **kwargs)
Example #19
Source File: calc.py From aospy with Apache License 2.0 | 5 votes |
def region_calcs(self, arr, func): """Perform a calculation for all regions.""" # Get pressure values for data output on hybrid vertical coordinates. bool_pfull = (self.def_vert and self.dtype_in_vert == internal_names.ETA_STR and self.dtype_out_vert is False) if bool_pfull: pfull_data = self._get_input_data(_P_VARS[self.dtype_in_vert], self.start_date, self.end_date) pfull = self._full_to_yearly_ts( pfull_data, arr[internal_names.TIME_WEIGHTS_STR] ).rename('pressure') # Loop over the regions, performing the calculation. reg_dat = {} for reg in self.region: # Just pass along the data if averaged already. if 'av' in self.dtype_in_time: data_out = reg.ts(arr) # Otherwise perform the calculation. else: method = getattr(reg, func) data_out = method(arr) if bool_pfull: # Don't apply e.g. standard deviation to coordinates. if func not in ['av', 'ts']: method = reg.ts # Convert Pa to hPa coord = method(pfull) * 1e-2 data_out = data_out.assign_coords( **{reg.name + '_pressure': coord} ) reg_dat.update(**{reg.name: data_out}) return xr.Dataset(reg_dat)
Example #20
Source File: common.py From typhon with MIT License | 5 votes |
def read(self, file_info, fields=None, mapping=None): """Read and parse HDF4 files and load them to a xarray.Dataset Args: file_info: Path and name of the file as string or FileInfo object. fields: Field names that you want to extract from this file as a list. mapping: A dictionary that maps old field names to new field names. If given, `fields` must contain the old field names. Returns: A xarray.Dataset object. """ if fields is None: raise NotImplementedError( "You have to set field names. Loading the complete file is not" " yet implemented!" ) dataset = xr.Dataset() # Files in HDF4 format are not very pretty. This code is taken from # http://hdfeos.org/zoo/OTHER/2010128055614_21420_CS_2B-GEOPROF_GRANULE_P_R04_E03.hdf.py # and adapted by John Mrziglod. file = HDF.HDF(file_info.path) try: vs = file.vstart() for field in fields: # Add the field data to the dataset. dataset[field] = self._get_field(vs, field) except Exception as e: raise e finally: file.close() return _xarray_rename_fields(dataset, mapping)
Example #21
Source File: probabilistic.py From xskillscore with Apache License 2.0 | 5 votes |
def xr_crps_gaussian(observations, mu, sig): """ xarray version of properscoring.crps_gaussian: Continuous Ranked Probability Score with a Gaussian distribution. Parameters ---------- observations : xarray.Dataset or xarray.DataArray The observations or set of observations. mu : xarray.Dataset or xarray.DataArray The mean of the forecast normal distribution. sig : xarray.Dataset or xarray.DataArray The standard deviation of the forecast distribution. Returns ------- xarray.Dataset or xarray.DataArray See Also -------- properscoring.crps_gaussian xarray.apply_ufunc """ # check if same dimensions if isinstance(mu, (int, float)): mu = xr.DataArray(mu) if isinstance(sig, (int, float)): sig = xr.DataArray(sig) if mu.dims != observations.dims: observations, mu = xr.broadcast(observations, mu) if sig.dims != observations.dims: observations, sig = xr.broadcast(observations, sig) return xr.apply_ufunc( crps_gaussian, observations, mu, sig, input_core_dims=[[], [], []], dask='parallelized', output_dtypes=[float], )
Example #22
Source File: dataset.py From typhon with MIT License | 5 votes |
def _ensure_large_enough(self, arr, cont, N, newsize, frac_done): """Allocate new space while adding gran to data Helper for _add_gran_to_data, part of the read_period family of helpers. Does NOT add cont to arr!""" if isinstance(cont, xarray.Dataset): raise NotImplementedError("Not used for xarray datasets. " "But see version history at " "https://arts.mi.uni-hamburg.de/trac/rt/browser/typhon/trunk/typhon/datasets/dataset.py?rev=10396#L462 " "if there is a wish to reimplemented!") else: if newsize * arr.itemsize > self.maxsize: raise MemoryError("This dataset is too large " "for typhons little mind. Continuing might " "ultimately need {:,.0f} MiB of RAM. This exceeds my " "maximum (self.maxsize) of {:,.0f} MiB. " "Sorry! ".format( newsize*arr.itemsize/MiB, self.maxsize/MiB)) logger.debug( "New size ({:d} items, {:,.0f} MiB) would exceed allocated " "size ({:d} items, {:,.0f} MiB). I'm {:.3%} " "through. Allocating new: {:d} items, {:,.0f} " "MiB. New size: {:d} items, {:,.0f} " "MiB.".format(N+cont.size, (cont.nbytes+arr.nbytes)/MiB, arr.size, arr.nbytes/MiB, frac_done, newsize-arr.size, (newsize-arr.size)*arr.itemsize/MiB, newsize, newsize*arr.itemsize/MiB)) mod = (numpy.ma if hasattr(arr, "mask") else numpy) arr = mod.concatenate( (arr, mod.zeros(dtype=arr.dtype, shape=newsize-arr.size))) return arr
Example #23
Source File: probabilistic.py From xskillscore with Apache License 2.0 | 5 votes |
def xr_crps_quadrature(x, cdf_or_dist, xmin=None, xmax=None, tol=1e-6): """ xarray version of properscoring.crps_quadrature: Continuous Ranked Probability Score with numerical integration of the normal distribution Parameters ---------- x : xarray.Dataset or xarray.DataArray Observations associated with the forecast distribution ``cdf_or_dist``. cdf_or_dist : callable or scipy.stats.distribution Function which returns the cumulative density of the forecast distribution at value x. Returns ------- xarray.Dataset or xarray.DataArray See Also -------- properscoring.crps_quadrature xarray.apply_ufunc """ return xr.apply_ufunc( crps_quadrature, x, cdf_or_dist, xmin, xmax, tol, input_core_dims=[[], [], [], [], []], dask='parallelized', output_dtypes=[float], )
Example #24
Source File: probabilistic.py From xskillscore with Apache License 2.0 | 5 votes |
def xr_brier_score(observations, forecasts): """ xarray version of properscoring.brier_score: Calculate Brier score (BS). ..math: BS(p, k) = (p_1 - k)^2, Parameters ---------- observations : xarray.Dataset or xarray.DataArray The observations or set of observations. forecasts : xarray.Dataset or xarray.DataArray The forecasts associated with the observations. Returns ------- xarray.Dataset or xarray.DataArray References ---------- Gneiting, Tilmann, and Adrian E Raftery. “Strictly Proper Scoring Rules, Prediction, and Estimation.” Journal of the American Statistical Association 102, no. 477 (March 1, 2007): 359–78. https://doi.org/10/c6758w. See Also -------- properscoring.brier_score xarray.apply_ufunc """ return xr.apply_ufunc( brier_score, observations, forecasts, input_core_dims=[[], []], dask='parallelized', output_dtypes=[float], )
Example #25
Source File: test_frame.py From vnpy_crypto with MIT License | 5 votes |
def test_to_xarray_index_types(self, index): from xarray import Dataset index = getattr(tm, 'make{}'.format(index)) df = DataFrame({'a': list('abc'), 'b': list(range(1, 4)), 'c': np.arange(3, 6).astype('u1'), 'd': np.arange(4.0, 7.0, dtype='float64'), 'e': [True, False, True], 'f': pd.Categorical(list('abc')), 'g': pd.date_range('20130101', periods=3), 'h': pd.date_range('20130101', periods=3, tz='US/Eastern')} ) df.index = index(3) df.index.name = 'foo' df.columns.name = 'bar' result = df.to_xarray() assert result.dims['foo'] == 3 assert len(result.coords) == 1 assert len(result.data_vars) == 8 assert_almost_equal(list(result.coords.keys()), ['foo']) assert isinstance(result, Dataset) # idempotency # categoricals are not preserved # datetimes w/tz are not preserved # column names are lost expected = df.copy() expected['f'] = expected['f'].astype(object) expected['h'] = expected['h'].astype('datetime64[ns]') expected.columns.name = None assert_frame_equal(result.to_dataframe(), expected, check_index_type=False, check_categorical=False)
Example #26
Source File: test_frame.py From vnpy_crypto with MIT License | 5 votes |
def test_to_xarray(self): from xarray import Dataset df = DataFrame({'a': list('abc'), 'b': list(range(1, 4)), 'c': np.arange(3, 6).astype('u1'), 'd': np.arange(4.0, 7.0, dtype='float64'), 'e': [True, False, True], 'f': pd.Categorical(list('abc')), 'g': pd.date_range('20130101', periods=3), 'h': pd.date_range('20130101', periods=3, tz='US/Eastern')} ) df.index.name = 'foo' result = df[0:0].to_xarray() assert result.dims['foo'] == 0 assert isinstance(result, Dataset) # available in 0.7.1 # MultiIndex df.index = pd.MultiIndex.from_product([['a'], range(3)], names=['one', 'two']) result = df.to_xarray() assert result.dims['one'] == 1 assert result.dims['two'] == 3 assert len(result.coords) == 2 assert len(result.data_vars) == 8 assert_almost_equal(list(result.coords.keys()), ['one', 'two']) assert isinstance(result, Dataset) result = result.to_dataframe() expected = df.copy() expected['f'] = expected['f'].astype(object) expected['h'] = expected['h'].astype('datetime64[ns]') expected.columns.name = None assert_frame_equal(result, expected, check_index_type=False)
Example #27
Source File: test_netcdf4.py From typhon with MIT License | 5 votes |
def test_scalar_masked(self): """Test if scalar masked values read OK Test for issue #277 """ fh = NetCDF4() with tempfile.TemporaryDirectory() as tdir: tfile = os.path.join(tdir, "testfile.nc") before = xr.Dataset({"a": xr.DataArray(42)}) before["a"].encoding = {"_FillValue": 42} fh.write(before, tfile) after = fh.read(tfile) assert np.isnan(after["a"]) # fill value should become nan
Example #28
Source File: test_nsview.py From spyder-kernels with MIT License | 5 votes |
def test_default_display(): """Tests for default_display.""" # Display of defaultdict assert (value_to_display(COMPLEX_OBJECT) == 'defaultdict object of collections module') # Display of array of COMPLEX_OBJECT assert (value_to_display(np.array(COMPLEX_OBJECT)) == 'ndarray object of numpy module') # Display of Dataset assert (value_to_display(DATASET) == 'Dataset object of xarray.core.dataset module')
Example #29
Source File: test_nsview.py From spyder-kernels with MIT License | 5 votes |
def test_list_display(): """Tests for display of lists.""" long_list = list(range(100)) # Simple list assert value_to_display([1, 2, 3]) == '[1, 2, 3]' # Long list assert (value_to_display(long_list) == '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ...]') # Short list of lists assert (value_to_display([long_list] * 3) == '[[0, 1, 2, 3, 4, ...], [0, 1, 2, 3, 4, ...], [0, 1, 2, 3, 4, ...]]') # Long list of lists result = '[' + ''.join('[0, 1, 2, 3, 4, ...], '*10)[:-2] + ']' assert value_to_display([long_list] * 10) == result[:70] + ' ...' # Multiple level lists assert (value_to_display([[1, 2, 3, [4], 5]] + long_list) == '[[1, 2, 3, [...], 5], 0, 1, 2, 3, 4, 5, 6, 7, 8, ...]') assert value_to_display([1, 2, [DF]]) == '[1, 2, [Dataframe]]' assert value_to_display([1, 2, [[DF], DATASET]]) == '[1, 2, [[...], Dataset]]' # List of complex object assert value_to_display([COMPLEX_OBJECT]) == '[defaultdict]' # List of composed objects li = [COMPLEX_OBJECT, DATASET, 1, {1:2, 3:4}, DF] result = '[defaultdict, Dataset, 1, {1:2, 3:4}, Dataframe]' assert value_to_display(li) == result # List starting with a non-supported object (#5313) supported_types = tuple(get_supported_types()['editable']) li = [len, 1] assert value_to_display(li) == '[builtin_function_or_method, 1]' assert is_supported(li, filters=supported_types)
Example #30
Source File: dataset.py From typhon with MIT License | 5 votes |
def _correct_overallocation(arr, N): if isinstance(arr, xarray.Dataset): raise RuntimeError("We shouldn't be here. Ever.") logger.debug("Correcting overallocation ({:d}->{:d})".format( arr.size, N)) return arr[:N]