Python Examples of xarray.Dataset

Source File: test_utils.py From typhon with MIT License

6 votes

def test_undo_xarray_floatification(self):
        ds = xarray.Dataset(
            {"a": (["x"], numpy.array([1, 2, 3], dtype="f4")),
             "b": (["x"], numpy.array([2.0, 3.0, 4.0])),
             "c": (["x"], numpy.array(["2010-01-01", "2010-01-02",
                                       "2010-01-03"], dtype="M8"))})
        ds["a"].encoding = {"dtype": numpy.dtype("i4"),
                            "_FillValue": 1234}
        # c should NOT be converted because it's a time
        ds["c"].encoding = {"dtype": numpy.dtype("i8"),
                            "_FillValue": 12345}
        ds2 = utils.undo_xarray_floatification(ds)
        assert ds is not ds2  # has to be a copy
        assert ds["a"].encoding == ds2["a"].encoding
        assert numpy.allclose(ds["a"], ds2["a"])
        assert ds2["a"].dtype == ds2["a"].encoding["dtype"]
        assert (ds2["c"] == ds["c"]).all()
        assert ds2["c"].dtype == ds["c"].dtype
        assert ds2["b"].dtype == ds["b"].dtype

Source File: statistics.py From esmlab with Apache License 2.0

6 votes

def rmse(x, y, dim):
    """
    Compute Root Mean Squared Error.

    Parameters
    ----------
    x : Dataset, DataArray, GroupBy, Variable, numpy/dask arrays or scalars
        Mix of labeled and/or unlabeled arrays to which to apply the function.
    y : Dataset, DataArray, GroupBy, Variable, numpy/dask arrays or scalars
        Mix of labeled and/or unlabeled arrays to which to apply the function.
    dim : str
        The dimension to apply the correlation along.

    Returns
    -------
    Root Mean Squared Error
        Single value or tuple of Dataset, DataArray, Variable, dask.array.Array or
        numpy.ndarray, the first type on that list to appear on an input.

    """

    return xs.rmse(x, y, dim)

Source File: data_loader.py From aospy with Apache License 2.0

6 votes

def set_grid_attrs_as_coords(ds):
    """Set available grid attributes as coordinates in a given Dataset.

    Grid attributes are assumed to have their internal aospy names. Grid
    attributes are set as coordinates, such that they are carried by all
    selected DataArrays with overlapping index dimensions.

    Parameters
    ----------
    ds : Dataset
        Input data

    Returns
    -------
    Dataset
        Dataset with grid attributes set as coordinates

    """
    grid_attrs_in_ds = set(GRID_ATTRS.keys()).intersection(
        set(ds.coords) | set(ds.data_vars))
    ds = ds.set_coords(grid_attrs_in_ds)
    return ds

Source File: test_mask.py From verde with BSD 3-Clause "New" or "Revised" License

6 votes

def test_distance_mask_grid():
    "Check that the mask works for grid input"
    region = (0, 5, -10, -4)
    shape = (7, 6)
    east, north = grid_coordinates(region, shape=shape)
    coords = {"easting": east[0, :], "northing": north[:, 0]}
    data_vars = {"scalars": (["northing", "easting"], np.ones(shape))}
    grid = xr.Dataset(data_vars, coords=coords)
    masked = distance_mask((2.5, -7.5), maxdist=2, grid=grid)
    true = [
        [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
        [np.nan, np.nan, 1, 1, np.nan, np.nan],
        [np.nan, 1, 1, 1, 1, np.nan],
        [np.nan, 1, 1, 1, 1, np.nan],
        [np.nan, np.nan, 1, 1, np.nan, np.nan],
        [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
        [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
    ]
    npt.assert_array_equal(true, masked.scalars.values)

Source File: statistics.py From esmlab with Apache License 2.0

6 votes

def mae(x, y, dim):
    """
    Compute Mean Absolute Error.

    Parameters
    ----------
    x : Dataset, DataArray, GroupBy, Variable, numpy/dask arrays or scalars
        Mix of labeled and/or unlabeled arrays to which to apply the function.
    y : Dataset, DataArray, GroupBy, Variable, numpy/dask arrays or scalars
        Mix of labeled and/or unlabeled arrays to which to apply the function.
    dim : str
        The dimension to apply the correlation along.

    Returns
    -------
    Mean Absolute Error
         Single value or tuple of Dataset, DataArray, Variable, dask.array.Array or
         numpy.ndarray, the first type on that list to appear on an input.

    """
    return xs.mae(x, y, dim)

Source File: test_accessor_probabilistic.py From xskillscore with Apache License 2.0

6 votes

def test_crps_gaussian_accessor(o, f, dask_bool, outer_bool):
    if dask_bool:
        o = o.chunk()
        f = f.chunk()
    mu = f.mean('member')
    sig = f.std('member')
    actual = crps_gaussian(o, mu, sig)

    ds = xr.Dataset()
    ds['o'] = o
    ds['mu'] = mu
    ds['sig'] = sig
    if outer_bool:
        ds = ds.drop_vars('mu')
        expected = ds.xs.crps_gaussian('o', mu, sig)
    else:
        expected = ds.xs.crps_gaussian('o', 'mu', 'sig')
    assert_allclose(actual, expected)

Source File: dataset.py From typhon with MIT License

6 votes

def find_granules_sorted(self, dt_start=None, dt_end=None, 
                include_last_before=False, **extra):
        """Yield all granules, sorted by times.

        For documentation, see :func:`~Dataset.find_granules`.
        """

        allgran = list(self.find_granules(dt_start, dt_end, 
                       include_last_before, **extra))

        # I've been through all granules at least once, so all should be
        # cached now; no need for additional hints when granule timeinfo
        # obtainable only with hints from subdir, which is not included in
        # the re-matching method
        if extra.get("return_time", False):
            yield from sorted(allgran)
        else:
            yield from sorted(allgran, key=self.get_times_for_granule)

Source File: dataset.py From typhon with MIT License

6 votes

def get_additional_field(self, M, fld):
        """Get additional field.

        Get field from other dataset, original objects, or otherwise.
        To be implemented by subclass implementations.

        Exact fields depend on subclass.

        Arguments:

            M (ndarray): ndarray with existing data
                A (masked) array with a dtype such as returned from
                `self.read <Dataset.read>`.

            fld (str): Additional field to read from original data

        Returns:
            
            ndarray with fields of M + fld.
        """
        raise NotImplementedError("Must be implemented by child-class")

Source File: dataset.py From typhon with MIT License

6 votes

def _add_cont_to_arr(self, arr, N, cont):
        """Changes arr in-situ, does not return"""
        if isinstance(cont, xarray.Dataset):
            # we should already know it's large enough
            # for arr[self.time_field] I start at N
            # for the other time coordinates at the relative "speed" they
            # are behind N
            # but this is not guaranteed to be regular so I would need to
            # keep trac of each individually, or inspect it on-the-fly
            # this approximation may be good enough for pre-allocation
            # (which is approximate anyway), when actually storing we need
            # to do a better job… for each time coordinate, check when it
            # “dies”
            raise NotImplementedError("This is not used for xarrays. "
                "But see comment in source-code for some thoughts.")
        else:
            arr[N:(N+cont.size)] = cont
            #arr = self._finalise_arr(arr, N)

Source File: dataset.py From typhon with MIT License

6 votes

def __init__(self, **kwargs):
        """Initialise a Dataset object.

        All keyword arguments will be translated into attributes.
        Does not take positional arguments.

        Note that if you create a dataset with a name that already exists,
        the existing object is returned, but __init__ is still called
        (Python does this, see
        https://docs.python.org/3.7/reference/datamodel.html#object.__new__).
        """
        self.mandatory_fields = set()
        for (k, v) in kwargs.items():
            setattr(self, k, v)
        self.setlocal()
        if self.my_pseudo_fields is None:
            self.my_pseudo_fields = collections.OrderedDict()

Source File: test_netcdf4.py From typhon with MIT License

6 votes

def test_scalefactor(self):
        """Test if scale factors written/read correctly
        """

        fh = NetCDF4()

        with tempfile.TemporaryDirectory() as tdir:
            tfile = os.path.join(tdir, "testfile.nc")
            before = xr.Dataset(
                    {"a":
                        xr.DataArray(
                            np.array([0.1, 0.2]))})
            before["a"].encoding = {
                    "scale_factor": 0.1,
                    "_FillValue": 42,
                    "dtype": "int16"}
            fh.write(before, tfile)
            after = fh.read(tfile)
            assert np.allclose(before["a"], after["a"])

Source File: test_netcdf4.py From typhon with MIT License

6 votes

def test_times(self):
        """Test if times are read correctly
        """

        fh = NetCDF4()

        with tempfile.TemporaryDirectory() as tdir:
            tfile = os.path.join(tdir, "testfile.nc")
            before = xr.Dataset(
                    {"a":
                        xr.DataArray(
                            np.array(
                                ["2019-02-14T09:00:00", "2019-02-14T09:00:01"],
                                dtype="M8[ns]"))})
            before["a"].encoding = {
                    "units": "seconds since 2019-02-14 09:00:00",
                    "scale_factor": 0.1}
            fh.write(before, tfile)
            after = fh.read(tfile)
            assert np.array_equal(before["a"], after["a"])

Source File: common.py From typhon with MIT License

6 votes

def get_xarray_group(dataset, group):
    """Get pseudo group from xarray.Dataset

    Args:
        dataset: A xarray.Dataset object with pseudo groups.
        group: The name of the group (can also be a subgroup).

    Returns:
        A xarray.Dataset with the pseudo group.
    """

    if not group.endswith("/"):
        group += "/"

    group_vars = [
        var
        for var in dataset.variables
        if var.startswith(group)
    ]

    if not group_vars:
        raise KeyError(f"The group {group} was not found!")

    return dataset[group_vars]

Source File: dataset.py From typhon with MIT License

6 votes

def _apply_limits_and_filters(self, cont, limits, simple_filters):
        if isinstance(cont, xarray.Dataset):
            if len(limits)>0:
                raise NotImplementedError( 
                    "limits not implemented on xarray datasets")
            oldsize = cont[self.time_field].size
            for f in simple_filters:
                cont = f(cont)
            logger.debug("Filters reduced number from "
                "{:d} to {:d}".format(oldsize, cont[self.time_field].size))
            return cont
        oldsize = cont.size
        cont = tpmath.array.limit_ndarray(cont, limits)
        for f in simple_filters:
            cont = f(cont)
        if cont.size < oldsize:
            logger.debug("Applying limitations, reducing "
                "{:d} to {:d}".format(oldsize, cont.size))
        return cont

Source File: test_collocations.py From typhon with MIT License

6 votes

def test_collocate_collapse_expand(self):
        """Test whether collocating, collapsing and expanding work"""
        collocator = Collocator()

        test = xr.Dataset({
            "time": ("time", np.arange("2000", "2010", dtype="M8[Y]")),
            "lat": ("time", np.arange(10)),
            "lon": ("time", np.arange(10)),
        })

        collocations = collocator.collocate(
            test, test, max_interval="30 days",
            max_distance="150 miles"
        )

        collapsed = collapse(collocations)
        expanded = expand(collocations)

Source File: common.py From typhon with MIT License

6 votes

def read(self, file_info, fields=None, **kwargs):
        """Read a CSV file and return an xarray.Dataset with its content

        Args:
            file_info: Path and name of the file as string or FileInfo object.
            fields: Field that you want to extract from the file. If not given,
                all fields are going to be extracted.
            **kwargs: Additional keyword arguments for the pandas function
                `pandas.read_csv`. See for more details:
                https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html

        Returns:
            A xarray.Dataset object.
        """

        data = pd.read_csv(file_info.path, **kwargs).to_xarray()

        if fields is None:
            return data
        else:
            return data[fields]

Source File: collocator.py From typhon with MIT License

6 votes

def check_collocation_data(dataset):
    """Check whether the dataset fulfills the standard of collocated data

    Args:
        dataset: A xarray.Dataset object

    Raises:
        A InvalidCollocationData Error if the dataset did not pass the test.
    """
    mandatory_fields = ["Collocations/pairs", "Collocations/group"]

    for mandatory_field in mandatory_fields:
        if mandatory_field not in dataset.variables:
            raise InvalidCollocationData(
                f"Could not find the field '{mandatory_field}'!"
            )

Source File: common.py From typhon with MIT License

5 votes

def write(self, data, file_info, **kwargs):
        """Write a xarray.Dataset to a CSV file.

        Args:
            data: An DataGroup object that should be saved.
            file_info: Path and name of the file as string or FileInfo object.
            **kwargs: Additional keyword arguments for
                `pandas.Dataframe.to_csv`. See for more details:
                https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_csv.html

        Returns:
            None
        """
        data.to_dataframe().to_csv(file_info.path, **kwargs)

Source File: calc.py From aospy with Apache License 2.0

5 votes

def region_calcs(self, arr, func):
        """Perform a calculation for all regions."""
        # Get pressure values for data output on hybrid vertical coordinates.
        bool_pfull = (self.def_vert and self.dtype_in_vert ==
                      internal_names.ETA_STR and self.dtype_out_vert is False)
        if bool_pfull:
            pfull_data = self._get_input_data(_P_VARS[self.dtype_in_vert],
                                              self.start_date,
                                              self.end_date)
            pfull = self._full_to_yearly_ts(
                pfull_data, arr[internal_names.TIME_WEIGHTS_STR]
            ).rename('pressure')
        # Loop over the regions, performing the calculation.
        reg_dat = {}
        for reg in self.region:
            # Just pass along the data if averaged already.
            if 'av' in self.dtype_in_time:
                data_out = reg.ts(arr)
            # Otherwise perform the calculation.
            else:
                method = getattr(reg, func)
                data_out = method(arr)
                if bool_pfull:
                    # Don't apply e.g. standard deviation to coordinates.
                    if func not in ['av', 'ts']:
                        method = reg.ts
                    # Convert Pa to hPa
                    coord = method(pfull) * 1e-2
                    data_out = data_out.assign_coords(
                        **{reg.name + '_pressure': coord}
                    )
            reg_dat.update(**{reg.name: data_out})
        return xr.Dataset(reg_dat)

Source File: common.py From typhon with MIT License

5 votes

def read(self, file_info, fields=None, mapping=None):
        """Read and parse HDF4 files and load them to a xarray.Dataset

        Args:
            file_info: Path and name of the file as string or FileInfo object.
            fields: Field names that you want to extract from this file as a
                list.
            mapping: A dictionary that maps old field names to new field names.
                If given, `fields` must contain the old field names.

        Returns:
            A xarray.Dataset object.
        """

        if fields is None:
            raise NotImplementedError(
                "You have to set field names. Loading the complete file is not"
                " yet implemented!"
            )

        dataset = xr.Dataset()

        # Files in HDF4 format are not very pretty. This code is taken from
        # http://hdfeos.org/zoo/OTHER/2010128055614_21420_CS_2B-GEOPROF_GRANULE_P_R04_E03.hdf.py
        # and adapted by John Mrziglod.

        file = HDF.HDF(file_info.path)

        try:
            vs = file.vstart()

            for field in fields:
                # Add the field data to the dataset.
                dataset[field] = self._get_field(vs, field)
        except Exception as e:
            raise e
        finally:
            file.close()

        return _xarray_rename_fields(dataset, mapping)

Source File: probabilistic.py From xskillscore with Apache License 2.0

5 votes

def xr_crps_gaussian(observations, mu, sig):
    """
    xarray version of properscoring.crps_gaussian: Continuous Ranked
     Probability Score with a Gaussian distribution.
    Parameters
    ----------
    observations : xarray.Dataset or xarray.DataArray
        The observations or set of observations.
    mu : xarray.Dataset or xarray.DataArray
        The mean of the forecast normal distribution.
    sig : xarray.Dataset or xarray.DataArray
        The standard deviation of the forecast distribution.
    Returns
    -------
    xarray.Dataset or xarray.DataArray
    See Also
    --------
    properscoring.crps_gaussian
    xarray.apply_ufunc
    """
    # check if same dimensions
    if isinstance(mu, (int, float)):
        mu = xr.DataArray(mu)
    if isinstance(sig, (int, float)):
        sig = xr.DataArray(sig)
    if mu.dims != observations.dims:
        observations, mu = xr.broadcast(observations, mu)
    if sig.dims != observations.dims:
        observations, sig = xr.broadcast(observations, sig)
    return xr.apply_ufunc(
        crps_gaussian,
        observations,
        mu,
        sig,
        input_core_dims=[[], [], []],
        dask='parallelized',
        output_dtypes=[float],
    )

Source File: dataset.py From typhon with MIT License

5 votes

def _ensure_large_enough(self, arr, cont, N, newsize, frac_done):
        """Allocate new space while adding gran to data
        
        Helper for _add_gran_to_data, part of the read_period family of
        helpers.  Does NOT add cont to arr!"""
        
        if isinstance(cont, xarray.Dataset):
            raise NotImplementedError("Not used for xarray datasets. "
                "But see version history at "
                "https://arts.mi.uni-hamburg.de/trac/rt/browser/typhon/trunk/typhon/datasets/dataset.py?rev=10396#L462 "
                "if there is a wish to reimplemented!")
        else:
            if newsize * arr.itemsize > self.maxsize:
                raise MemoryError("This dataset is too large "
                    "for typhons little mind.  Continuing might "
                    "ultimately need {:,.0f} MiB of RAM.  This exceeds my "
                    "maximum (self.maxsize) of {:,.0f} MiB. "
                    "Sorry! ".format(
                        newsize*arr.itemsize/MiB,
                        self.maxsize/MiB))
            logger.debug(
                "New size ({:d} items, {:,.0f} MiB) would exceed allocated "
                "size ({:d} items, {:,.0f} MiB).  I'm {:.3%} "
                "through.  Allocating new: {:d} items, {:,.0f} "
                "MiB.  New size: {:d} items, {:,.0f} "
                "MiB.".format(N+cont.size,
                    (cont.nbytes+arr.nbytes)/MiB,
                    arr.size, arr.nbytes/MiB, frac_done,
                    newsize-arr.size, (newsize-arr.size)*arr.itemsize/MiB,
                    newsize, newsize*arr.itemsize/MiB))
            mod = (numpy.ma if hasattr(arr, "mask") else numpy)
            arr = mod.concatenate(
                (arr, mod.zeros(dtype=arr.dtype, shape=newsize-arr.size)))
        return arr

Source File: probabilistic.py From xskillscore with Apache License 2.0

5 votes

def xr_crps_quadrature(x, cdf_or_dist, xmin=None, xmax=None, tol=1e-6):
    """
    xarray version of properscoring.crps_quadrature: Continuous Ranked
     Probability Score with numerical integration of the normal distribution
    Parameters
    ----------
    x : xarray.Dataset or xarray.DataArray
        Observations associated with the forecast distribution ``cdf_or_dist``.
    cdf_or_dist : callable or scipy.stats.distribution
        Function which returns the cumulative density of the forecast
        distribution at value x.
    Returns
    -------
    xarray.Dataset or xarray.DataArray
    See Also
    --------
    properscoring.crps_quadrature
    xarray.apply_ufunc
    """
    return xr.apply_ufunc(
        crps_quadrature,
        x,
        cdf_or_dist,
        xmin,
        xmax,
        tol,
        input_core_dims=[[], [], [], [], []],
        dask='parallelized',
        output_dtypes=[float],
    )

Source File: probabilistic.py From xskillscore with Apache License 2.0

5 votes

def xr_brier_score(observations, forecasts):
    """
    xarray version of properscoring.brier_score: Calculate Brier score (BS).
    ..math:
        BS(p, k) = (p_1 - k)^2,
    Parameters
    ----------
    observations : xarray.Dataset or xarray.DataArray
        The observations or set of observations.
    forecasts : xarray.Dataset or xarray.DataArray
        The forecasts associated with the observations.
    Returns
    -------
    xarray.Dataset or xarray.DataArray
    References
    ----------
    Gneiting, Tilmann, and Adrian E Raftery. “Strictly Proper Scoring Rules,
      Prediction, and Estimation.” Journal of the American Statistical
      Association 102, no. 477 (March 1, 2007): 359–78.
      https://doi.org/10/c6758w.
    See Also
    --------
    properscoring.brier_score
    xarray.apply_ufunc
    """
    return xr.apply_ufunc(
        brier_score,
        observations,
        forecasts,
        input_core_dims=[[], []],
        dask='parallelized',
        output_dtypes=[float],
    )

Source File: test_frame.py From vnpy_crypto with MIT License

5 votes

def test_to_xarray_index_types(self, index):
        from xarray import Dataset

        index = getattr(tm, 'make{}'.format(index))
        df = DataFrame({'a': list('abc'),
                        'b': list(range(1, 4)),
                        'c': np.arange(3, 6).astype('u1'),
                        'd': np.arange(4.0, 7.0, dtype='float64'),
                        'e': [True, False, True],
                        'f': pd.Categorical(list('abc')),
                        'g': pd.date_range('20130101', periods=3),
                        'h': pd.date_range('20130101',
                                           periods=3,
                                           tz='US/Eastern')}
                       )

        df.index = index(3)
        df.index.name = 'foo'
        df.columns.name = 'bar'
        result = df.to_xarray()
        assert result.dims['foo'] == 3
        assert len(result.coords) == 1
        assert len(result.data_vars) == 8
        assert_almost_equal(list(result.coords.keys()), ['foo'])
        assert isinstance(result, Dataset)

        # idempotency
        # categoricals are not preserved
        # datetimes w/tz are not preserved
        # column names are lost
        expected = df.copy()
        expected['f'] = expected['f'].astype(object)
        expected['h'] = expected['h'].astype('datetime64[ns]')
        expected.columns.name = None
        assert_frame_equal(result.to_dataframe(), expected,
                           check_index_type=False, check_categorical=False)

Source File: test_frame.py From vnpy_crypto with MIT License

5 votes

def test_to_xarray(self):
        from xarray import Dataset

        df = DataFrame({'a': list('abc'),
                        'b': list(range(1, 4)),
                        'c': np.arange(3, 6).astype('u1'),
                        'd': np.arange(4.0, 7.0, dtype='float64'),
                        'e': [True, False, True],
                        'f': pd.Categorical(list('abc')),
                        'g': pd.date_range('20130101', periods=3),
                        'h': pd.date_range('20130101',
                                           periods=3,
                                           tz='US/Eastern')}
                       )

        df.index.name = 'foo'
        result = df[0:0].to_xarray()
        assert result.dims['foo'] == 0
        assert isinstance(result, Dataset)

        # available in 0.7.1
        # MultiIndex
        df.index = pd.MultiIndex.from_product([['a'], range(3)],
                                              names=['one', 'two'])
        result = df.to_xarray()
        assert result.dims['one'] == 1
        assert result.dims['two'] == 3
        assert len(result.coords) == 2
        assert len(result.data_vars) == 8
        assert_almost_equal(list(result.coords.keys()), ['one', 'two'])
        assert isinstance(result, Dataset)

        result = result.to_dataframe()
        expected = df.copy()
        expected['f'] = expected['f'].astype(object)
        expected['h'] = expected['h'].astype('datetime64[ns]')
        expected.columns.name = None
        assert_frame_equal(result,
                           expected,
                           check_index_type=False)

Source File: test_netcdf4.py From typhon with MIT License

5 votes

def test_scalar_masked(self):
        """Test if scalar masked values read OK

        Test for issue #277
        """

        fh = NetCDF4()

        with tempfile.TemporaryDirectory() as tdir:
            tfile = os.path.join(tdir, "testfile.nc")
            before = xr.Dataset({"a": xr.DataArray(42)})
            before["a"].encoding = {"_FillValue": 42}
            fh.write(before, tfile)
            after = fh.read(tfile)
            assert np.isnan(after["a"]) # fill value should become nan

Source File: test_nsview.py From spyder-kernels with MIT License

5 votes

def test_default_display():
    """Tests for default_display."""
    # Display of defaultdict
    assert (value_to_display(COMPLEX_OBJECT) ==
            'defaultdict object of collections module')

    # Display of array of COMPLEX_OBJECT
    assert (value_to_display(np.array(COMPLEX_OBJECT)) ==
            'ndarray object of numpy module')

    # Display of Dataset
    assert (value_to_display(DATASET) ==
            'Dataset object of xarray.core.dataset module')

Source File: test_nsview.py From spyder-kernels with MIT License

5 votes

def test_list_display():
    """Tests for display of lists."""
    long_list = list(range(100))

    # Simple list
    assert value_to_display([1, 2, 3]) == '[1, 2, 3]'

    # Long list
    assert (value_to_display(long_list) ==
            '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ...]')

    # Short list of lists
    assert (value_to_display([long_list] * 3) ==
            '[[0, 1, 2, 3, 4, ...], [0, 1, 2, 3, 4, ...], [0, 1, 2, 3, 4, ...]]')

    # Long list of lists
    result = '[' + ''.join('[0, 1, 2, 3, 4, ...], '*10)[:-2] + ']'
    assert value_to_display([long_list] * 10) == result[:70] + ' ...'

    # Multiple level lists
    assert (value_to_display([[1, 2, 3, [4], 5]] + long_list) ==
            '[[1, 2, 3, [...], 5], 0, 1, 2, 3, 4, 5, 6, 7, 8, ...]')
    assert value_to_display([1, 2, [DF]]) == '[1, 2, [Dataframe]]'
    assert value_to_display([1, 2, [[DF], DATASET]]) == '[1, 2, [[...], Dataset]]'

    # List of complex object
    assert value_to_display([COMPLEX_OBJECT]) == '[defaultdict]'

    # List of composed objects
    li = [COMPLEX_OBJECT, DATASET, 1, {1:2, 3:4}, DF]
    result = '[defaultdict, Dataset, 1, {1:2, 3:4}, Dataframe]'
    assert value_to_display(li) == result

    # List starting with a non-supported object (#5313)
    supported_types = tuple(get_supported_types()['editable'])
    li = [len, 1]
    assert value_to_display(li) == '[builtin_function_or_method, 1]'
    assert is_supported(li, filters=supported_types)

Source File: dataset.py From typhon with MIT License

5 votes

def _correct_overallocation(arr, N):
        if isinstance(arr, xarray.Dataset):
            raise RuntimeError("We shouldn't be here.  Ever.")
        logger.debug("Correcting overallocation ({:d}->{:d})".format(
            arr.size, N))
        return arr[:N]

Python xarray.Dataset() Examples