Python xarray.open_zarr() Examples

The following are 30 code examples of xarray.open_zarr(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module xarray , or try the search function .
Example #1
Source File: test_chunkstore.py    From xcube with MIT License 7 votes vote down vote up
def gen_index_var(dims, shape, chunks):
    # noinspection PyUnusedLocal
    def get_chunk(cube_store: ChunkStore, name: str, index: Tuple[int, ...]) -> bytes:
        data = np.zeros(cube_store.chunks, dtype=np.uint64)
        data_view = data.ravel()
        if data_view.base is not data:
            raise ValueError('view expected')
        if data_view.size < cube_store.ndim * 2:
            raise ValueError('size too small')
        for i in range(cube_store.ndim):
            j1 = cube_store.chunks[i] * index[i]
            j2 = j1 + cube_store.chunks[i]
            data_view[2 * i] = j1
            data_view[2 * i + 1] = j2
        return data.tobytes()

    store = ChunkStore(dims, shape, chunks)
    store.add_lazy_array('__index_var__', '<u8', get_chunk=get_chunk)

    ds = xr.open_zarr(store)
    return ds.__index_var__ 
Example #2
Source File: stores.py    From xarray-simlab with BSD 3-Clause "New" or "Revised" License 7 votes vote down vote up
def open_as_xr_dataset(self) -> xr.Dataset:
        if self.in_memory:
            chunks = None
        else:
            chunks = "auto"

        ds = xr.open_zarr(
            self.zgroup.store,
            group=self.zgroup.path,
            chunks=chunks,
            consolidated=self.consolidated,
            # disable mask (not nice with zarr default fill_value=0)
            mask_and_scale=False,
        )

        if self.in_memory:
            # lazy loading may be confusing for the default, in-memory option
            ds.load()
        else:
            # load scalar data vars (there might be many of them: model params)
            for da in ds.data_vars.values():
                if not da.dims:
                    da.load()

        return ds 
Example #3
Source File: test_timeslice.py    From xcube with MIT License 7 votes vote down vote up
def test_update_corrupt_cube(self):
        self.write_cube('2019-01-01', 3)

        cube = xr.open_zarr(self.CUBE_PATH)
        t, y, x = cube.precipitation.shape
        new_shape = y, t, x
        t, y, x = cube.precipitation.dims
        new_dims = y, t, x
        cube['precipitation'] = xr.DataArray(cube.precipitation.values.reshape(new_shape),
                                             dims=new_dims,
                                             coords=cube.precipitation.coords)
        cube.to_zarr(self.CUBE_PATH_2)

        with self.assertRaises(ValueError) as cm:
            insert_time_slice(self.CUBE_PATH_2, 2, self.make_slice('2019-01-02T06:30'))
        self.assertEqual("dimension 'time' of variable 'precipitation' must be first dimension",
                         f"{cm.exception}") 
Example #4
Source File: test_timeslice.py    From xcube with MIT License 6 votes vote down vote up
def test_insert_time_slice(self):
        self.write_cube('2019-01-02', 10)

        insert_time_slice(self.CUBE_PATH, 5, self.make_slice('2019-01-06T02:00'))
        insert_time_slice(self.CUBE_PATH, 10, self.make_slice('2019-01-10T02:00'))
        insert_time_slice(self.CUBE_PATH, 0, self.make_slice('2019-01-01T02:00'))

        cube = xr.open_zarr(self.CUBE_PATH)
        expected = np.array(['2019-01-01T14:00', '2019-01-02T12:00',
                             '2019-01-03T12:00', '2019-01-04T12:00',
                             '2019-01-05T12:00', '2019-01-06T12:00',
                             '2019-01-06T14:00', '2019-01-07T12:00',
                             '2019-01-08T12:00', '2019-01-09T12:00',
                             '2019-01-10T12:00', '2019-01-10T14:00',
                             '2019-01-11T12:00'], dtype=cube.time.dtype)
        self.assertEqual(13, cube.time.size)
        self.assertEqual(None, cube.time.chunks)
        np.testing.assert_equal(cube.time.values, expected) 
Example #5
Source File: mldataset.py    From xcube with MIT License 6 votes vote down vote up
def _get_dataset_lazily(self, index: int, parameters: Dict[str, Any]) -> xr.Dataset:
        """
        Read the dataset for the level at given *index*.

        :param index: the level index
        :param parameters: keyword arguments passed to xr.open_zarr()
        :return: the dataset for the level at *index*.
        """
        ext, level_path = self._level_paths[index]
        if ext == ".link":
            with open(level_path, "r") as fp:
                level_path = fp.read()
                # if file_path is a relative path, resolve it against the levels directory
                if not os.path.isabs(level_path):
                    base_dir = os.path.dirname(self._dir_path)
                    level_path = os.path.join(base_dir, level_path)
        with measure_time(tag=f"opened local dataset {level_path} for level {index}"):
            return assert_cube(xr.open_zarr(level_path, **parameters), name=level_path) 
Example #6
Source File: mldataset.py    From xcube with MIT License 6 votes vote down vote up
def _get_dataset_lazily(self, index: int, parameters: Dict[str, Any]) -> xr.Dataset:
        """
        Read the dataset for the level at given *index*.

        :param index: the level index
        :param parameters: keyword arguments passed to xr.open_zarr()
        :return: the dataset for the level at *index*.
        """
        ext, level_path = self._level_paths[index]
        if ext == ".link":
            with self._s3_file_system.open(level_path, "w") as fp:
                level_path = fp.read()
                # if file_path is a relative path, resolve it against the levels directory
                if not os.path.isabs(level_path):
                    base_dir = os.path.dirname(self._dir_path)
                    level_path = os.path.join(base_dir, level_path)
        store = s3fs.S3Map(root=level_path, s3=self._s3_file_system, check=False)
        max_size = self.get_chunk_cache_capacity(index)
        if max_size:
            store = zarr.LRUStoreCache(store, max_size=max_size)
        with measure_time(tag=f"opened remote dataset {level_path} for level {index}"):
            consolidated = self._s3_file_system.exists(f'{level_path}/.zmetadata')
            return assert_cube(xr.open_zarr(store, consolidated=consolidated, **parameters), name=level_path) 
Example #7
Source File: test_gen.py    From xcube with MIT License 6 votes vote down vote up
def test_process_inputs_replace_multiple_zarr(self):
        status, output = gen_cube_wrapper(
            [get_inputdata_path('20170101-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'),
             get_inputdata_path('20170102-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'),
             get_inputdata_path('20170103-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'),
             get_inputdata_path('20170102-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc')], 'l2c.zarr',
            no_sort_mode=True)
        self.assertEqual(True, status)
        self.assertTrue('\nstep 9 of 9: creating input slice in l2c.zarr...\n' in output)
        self.assertTrue('\nstep 9 of 9: appending input slice to l2c.zarr...\n' in output)
        self.assertTrue('\nstep 9 of 9: replacing input slice at index 1 in l2c.zarr...\n' in output)
        self.assert_cube_ok(xr.open_zarr('l2c.zarr'),
                            expected_time_dim=3,
                            expected_extra_attrs=dict(date_modified=None,
                                                      time_coverage_start='2016-12-31T12:00:00.000000000',
                                                      time_coverage_end='2017-01-03T12:00:00.000000000'))
        self.assertTrue(os.path.exists(os.path.join('l2c.zarr', '.zmetadata'))) 
Example #8
Source File: mldataset.py    From xcube with MIT License 6 votes vote down vote up
def open_ml_dataset_from_local_fs(path: str,
                                  data_format: str = None,
                                  ds_id: str = None,
                                  exception_type: type = ValueError,
                                  **kwargs) -> MultiLevelDataset:
    data_format = data_format or guess_ml_dataset_format(path)

    if data_format == FORMAT_NAME_NETCDF4:
        with measure_time(tag=f"opened local NetCDF dataset {path}"):
            ds = assert_cube(xr.open_dataset(path, **kwargs))
            return BaseMultiLevelDataset(ds, ds_id=ds_id)
    elif data_format == FORMAT_NAME_ZARR:
        with measure_time(tag=f"opened local zarr dataset {path}"):
            ds = assert_cube(xr.open_zarr(path, **kwargs))
            return BaseMultiLevelDataset(ds, ds_id=ds_id)
    elif data_format == FORMAT_NAME_LEVELS:
        with measure_time(tag=f"opened local levels dataset {path}"):
            return FileStorageMultiLevelDataset(path, ds_id=ds_id, zarr_kwargs=kwargs)

    raise exception_type(f'Unrecognized multi-level dataset format {data_format!r} for path {path!r}') 
Example #9
Source File: xarray_container.py    From intake-xarray with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def __init__(self, url, headers, **kwargs):
        """
        Initialise local xarray, whose dask arrays contain tasks that pull data

        The matadata contains a key "internal", which is a result of running
        ``serialize_zarr_ds`` on the xarray on the server. It is a dict
        containing the metadata parts of the original dataset (i.e., the
        keys with names like ".z*"). This can be opened by xarray as-is, and
        will make a local xarray object. In ``._get_schema()``, the numpy
        parts (coordinates) are fetched and the dask-array parts (cariables)
        have their dask graphs redefined to tasks that fetch data from the
        server.
        """
        import xarray as xr
        super(RemoteXarray, self).__init__(url, headers, **kwargs)
        self._schema = None
        self._ds = xr.open_zarr(self.metadata['internal']) 
Example #10
Source File: test_mldataset.py    From xcube with MIT License 6 votes vote down vote up
def _write_test_cube_pyramid(cls):
        # Create bucket 'xcube-test', so it exists before we write a test pyramid
        s3_conn = boto3.client('s3')
        s3_conn.create_bucket(Bucket='xcube-test', ACL='public-read')

        # Create a test cube pyramid with just one variable "conc_chl"
        zarr_path = os.path.join(os.path.dirname(__file__), '../../examples/serve/demo/cube-1-250-250.zarr')
        base_dataset = xr.open_zarr(zarr_path)
        base_dataset = xr.Dataset(dict(conc_chl=base_dataset.conc_chl))
        ml_dataset = BaseMultiLevelDataset(base_dataset)

        # Write test cube pyramid
        write_levels(ml_dataset,
                     'https://s3.amazonaws.com/xcube-test/cube-1-250-250.levels',
                     client_kwargs=dict(provider_access_key_id='test_fake_id',
                                        provider_secret_access_key='test_fake_secret')) 
Example #11
Source File: utilities.py    From minian with GNU General Public License v3.0 6 votes vote down vote up
def open_minian(dpath, fname='minian', backend='netcdf', chunks=None, post_process=None):
    if backend is 'netcdf':
        fname = fname + '.nc'
        if chunks is 'auto':
            chunks = dict([(d, 'auto') for d in ds.dims])
        mpath = pjoin(dpath, fname)
        with xr.open_dataset(mpath) as ds:
            dims = ds.dims
        chunks = dict([(d, 'auto') for d in dims])
        ds = xr.open_dataset(os.path.join(dpath, fname), chunks=chunks)
        if post_process:
            ds = post_process(ds, mpath)
        return ds
    elif backend is 'zarr':
        mpath = pjoin(dpath, fname)
        dslist = [xr.open_zarr(pjoin(mpath, d)) for d in listdir(mpath) if isdir(pjoin(mpath, d))]
        ds = xr.merge(dslist)
        if chunks is 'auto':
            chunks = dict([(d, 'auto') for d in ds.dims])
        if post_process:
            ds = post_process(ds, mpath)
        return ds.chunk(chunks)
    else:
        raise NotImplementedError("backend {} not supported".format(backend)) 
Example #12
Source File: test_prune.py    From xcube with MIT License 6 votes vote down vote up
def test_no_dry_run(self):
        result = self.invoke_cli(['prune', self.TEST_CUBE])
        self.assertEqual(0, result.exit_code)
        self.assertEqual("Opening cube from 'test.zarr'...\n"
                         "Identifying empty blocks...\n"
                         "Deleting 24 empty block file(s) for variable 'precipitation'...\n"
                         "Deleting 24 empty block file(s) for variable 'temperature'...\n"
                         "Done, 48 block file(s) deleted.\n",
                         result.stdout)
        expected_file_names = sorted(['.zarray', '.zattrs'])
        self.assertEqual(expected_file_names, sorted(os.listdir('test.zarr/precipitation')))
        self.assertEqual(expected_file_names, sorted(os.listdir('test.zarr/temperature')))
        ds = xr.open_zarr('test.zarr')
        assert_cube(ds)
        self.assertIn('precipitation', ds)
        self.assertEqual((3, 180, 360), ds.precipitation.shape)
        self.assertEqual(('time', 'lat', 'lon'), ds.precipitation.dims)
        self.assertIn('temperature', ds)
        self.assertEqual((3, 180, 360), ds.temperature.shape)
        self.assertEqual(('time', 'lat', 'lon'), ds.temperature.dims) 
Example #13
Source File: test_prune.py    From xcube with MIT License 6 votes vote down vote up
def test_dry_run(self):
        result = self.invoke_cli(['prune', self.TEST_CUBE, "--dry-run"])
        self.assertEqual(0, result.exit_code)
        self.assertEqual("Opening cube from 'test.zarr'...\n"
                         "Identifying empty blocks...\n"
                         "Deleting 24 empty block file(s) for variable 'precipitation'...\n"
                         "Deleting 24 empty block file(s) for variable 'temperature'...\n"
                         "Done, 48 block file(s) deleted.\n",
                         result.stdout)
        expected_file_names = sorted(['.zarray',
                                      '.zattrs',
                                      '0.0.0', '0.0.1', '0.0.2', '0.0.3', '0.1.0', '0.1.1', '0.1.2', '0.1.3',
                                      '1.0.0', '1.0.1', '1.0.2', '1.0.3', '1.1.0', '1.1.1', '1.1.2', '1.1.3',
                                      '2.0.0', '2.0.1', '2.0.2', '2.0.3', '2.1.0', '2.1.1', '2.1.2', '2.1.3'])
        self.assertEqual(expected_file_names, sorted(os.listdir('test.zarr/precipitation')))
        self.assertEqual(expected_file_names, sorted(os.listdir('test.zarr/temperature')))
        ds = xr.open_zarr('test.zarr')
        assert_cube(ds)
        self.assertIn('precipitation', ds)
        self.assertEqual((3, 180, 360), ds.precipitation.shape)
        self.assertEqual(('time', 'lat', 'lon'), ds.precipitation.dims)
        self.assertIn('temperature', ds)
        self.assertEqual((3, 180, 360), ds.temperature.shape)
        self.assertEqual(('time', 'lat', 'lon'), ds.temperature.dims) 
Example #14
Source File: test_rectify.py    From xcube with MIT License 6 votes vote down vote up
def test_rectify_without_vars(self):
        """Test that rectify selects all variables when --var is not given."""

        # For now, specify the image geometry explicitly with --size, --point,
        # and --res to avoid triggering an "invalid y_min" ValueError when
        # ImageGeom tries to determine it automatically. Once Issue #303 has
        # been fixed, these options can be omitted.

        result = self.invoke_cli(['rectify',
                                  '--size', '508,253',
                                  '--point', '-179.5,-89.5',
                                  '--res', '0.7071067811865475',
                                  TEST_ZARR_DIR])
        self.assertEqual(0, result.exit_code)
        self.assertEqual('Opening dataset from \'test.zarr\'...\n'
                         'Rectifying...\n'
                         'Writing rectified dataset to \'out.zarr\'...\n'
                         'Done.\n',
                         result.stdout)
        self.assertTrue(os.path.isdir('out.zarr'))
        ds = xr.open_zarr('out.zarr')
        assert_cube(ds)
        self.assertIn('precipitation', ds)
        self.assertIn('temperature', ds)
        self.assertIn('soil_moisture', ds) 
Example #15
Source File: io.py    From cate with MIT License 5 votes vote down vote up
def read_zarr(path: str,
              file_system: str = 'Local',
              drop_variables: VarNamesLike.TYPE = None,
              decode_cf: bool = True,
              decode_times: bool = True,
              normalize: bool = True) -> xr.Dataset:
    """
    Read a dataset from a Zarr directory, Zarr ZIP archive, or remote Zarr object storage.

    For the Zarr format, refer to http://zarr.readthedocs.io/en/stable/.

    :param path: Zarr directory path, Zarr ZIP archive path, or object storage path or bucket name.
    :param file_system: File system identifier, "Local" is your locally mounted file system,
           for Amazon S3 use "S3", for general Object Storage use "OBS".
    :param drop_variables: List of variables to be dropped.
    :param decode_cf: Whether to decode CF attributes and coordinate variables.
    :param decode_times: Whether to decode time information (convert time coordinates to ``datetime`` objects).
    :param normalize: Whether to normalize the dataset's geo- and time-coding upon opening. See operation ``normalize``.
    """
    drop_variables = VarNamesLike.convert(drop_variables)

    if file_system == 'Local':
        ds = xr.open_zarr(path,
                          drop_variables=drop_variables,
                          decode_cf=decode_cf,
                          decode_times=decode_times)
    elif file_system == 'S3' or file_system == 'OBS':
        import s3fs
        store = s3fs.S3Map(path, s3=(s3fs.S3FileSystem(anon=True)))
        ds = xr.open_zarr(store,
                          drop_variables=drop_variables,
                          decode_cf=decode_cf,
                          decode_times=decode_times)
    else:
        raise ValidationError(f'Unknown file_system {file_system!r}')

    if normalize:
        return adjust_temporal_attrs(normalize_op(ds))
    return ds 
Example #16
Source File: merge_util.py    From intake-esm with Apache License 2.0 5 votes vote down vote up
def _open_asset(path, data_format, zarr_kwargs, cdf_kwargs, preprocess, varname):
    protocol = None
    root = path
    if isinstance(path, fsspec.mapping.FSMap):
        protocol = path.fs.protocol
        if isinstance(protocol, list):
            protocol = tuple(protocol)

        if protocol in {'http', 'https', 'file'} or protocol is None:
            path = path.root
            root = path

        else:
            root = path.root

    if data_format == 'zarr':
        logger.debug(f'Opening zarr store: {root} - protocol: {protocol}')
        try:
            ds = xr.open_zarr(path, **zarr_kwargs)
        except Exception as e:
            logger.error(f'Failed to open zarr store with zarr_kwargs={zarr_kwargs}')
            raise e

    else:
        logger.debug(f'Opening netCDF/HDF dataset: {root} - protocol: {protocol}')
        try:
            ds = xr.open_dataset(path, **cdf_kwargs)
        except Exception as e:
            logger.error(f'Failed to open netCDF/HDF dataset with cdf_kwargs={cdf_kwargs}')
            raise e

    ds.attrs['intake_esm_varname'] = varname

    if preprocess is None:
        return ds
    logger.debug(f'Applying pre-processing with {preprocess.__name__} function')
    return preprocess(ds) 
Example #17
Source File: dsio.py    From xcube with MIT License 5 votes vote down vote up
def read(self,
             path: str,
             client_kwargs: Dict[str, Any] = None,
             **kwargs) -> xr.Dataset:
        path_or_store = path
        consolidated = False
        if isinstance(path, str):
            path_or_store, consolidated = get_path_or_obs_store(path_or_store, client_kwargs, mode='r')
            if 'max_cache_size' in kwargs:
                max_cache_size = kwargs.pop('max_cache_size')
                if max_cache_size > 0:
                    path_or_store = zarr.LRUStoreCache(path_or_store, max_size=max_cache_size)
        return xr.open_zarr(path_or_store, consolidated=consolidated, **kwargs) 
Example #18
Source File: timeslice.py    From xcube with MIT License 5 votes vote down vote up
def find_time_slice(store: Union[str, MutableMapping],
                    time_stamp: Union[np.datetime64, np.ndarray],
                    time_eps: np.timedelta64 = DEFAULT_TIME_EPS) -> Tuple[int, str]:
    """
    Find time index and update mode for *time_stamp* in ZARR dataset given by *store*.

    :param store: A zarr store.
    :param time_stamp: Time stamp to find index for.
    :param time_eps: Time epsilon for equality comparison, defaults to 1 millisecond.
    :return: A tuple (time_index, 'insert') or (time_index, 'replace') if an index was found,
        (-1, 'create') or (-1, 'append') otherwise.
    """
    try:
        cube = xr.open_zarr(store)
    except ValueError:
        # ValueError raised if cube store does not exist
        try:
            cube = xr.open_dataset(store)
        except FileNotFoundError:
            return -1, 'create'

    # TODO (forman): optimise following naive search by bi-sectioning or so
    for i in range(cube.time.size):
        time = cube.time[i]
        if abs(time_stamp - time) < time_eps:
            return i, 'replace'
        if time_stamp < time:
            return i, 'insert'

    return -1, 'append' 
Example #19
Source File: compute.py    From xcube with MIT License 5 votes vote down vote up
def _gen_index_var(cube_schema: CubeSchema):
    dims = cube_schema.dims
    shape = cube_schema.shape
    chunks = cube_schema.chunks

    # noinspection PyUnusedLocal
    def get_chunk(cube_store: ChunkStore, name: str, index: Tuple[int, ...]) -> bytes:
        data = np.zeros(cube_store.chunks, dtype=np.uint64)
        data_view = data.ravel()
        if data_view.base is not data:
            raise ValueError('view expected')
        if data_view.size < cube_store.ndim * 2:
            raise ValueError('size too small')
        for i in range(cube_store.ndim):
            j1 = cube_store.chunks[i] * index[i]
            j2 = j1 + cube_store.chunks[i]
            data_view[2 * i] = j1
            data_view[2 * i + 1] = j2
        return data.tobytes()

    store = ChunkStore(dims, shape, chunks)
    store.add_lazy_array('__index_var__', '<u8', get_chunk=get_chunk)

    dataset = xr.open_zarr(store)
    index_var = dataset.__index_var__
    index_var = index_var.assign_coords(**cube_schema.coords)
    return index_var 
Example #20
Source File: unchunk.py    From xcube with MIT License 5 votes vote down vote up
def unchunk_dataset(dataset_path: str, var_names: Sequence[str] = None, coords_only: bool = False):
    """
    Unchunk dataset variables in-place.

    :param dataset_path: Path to ZARR dataset directory.
    :param var_names: Optional list of variable names.
    :param coords_only: Un-chunk coordinate variables only.
    """

    is_zarr = os.path.isfile(os.path.join(dataset_path, '.zgroup'))
    if not is_zarr:
        raise ValueError(f'{dataset_path!r} is not a valid Zarr directory')

    with xr.open_zarr(dataset_path) as dataset:
        if var_names is None:
            if coords_only:
                var_names = list(dataset.coords)
            else:
                var_names = list(dataset.variables)
        else:
            for var_name in var_names:
                if coords_only:
                    if var_name not in dataset.coords:
                        raise ValueError(f'variable {var_name!r} is not a coordinate variable in {dataset_path!r}')
                else:
                    if var_name not in dataset.variables:
                        raise ValueError(f'variable {var_name!r} is not a variable in {dataset_path!r}')

    _unchunk_vars(dataset_path, var_names) 
Example #21
Source File: dataset.py    From xcube with MIT License 5 votes vote down vote up
def open_data(self, data_id: str, **open_params) -> xr.Dataset:
        return xr.open_zarr(data_id, **open_params) 
Example #22
Source File: dataset.py    From xcube with MIT License 5 votes vote down vote up
def open_data(self, data_id: str, **open_params) -> xr.Dataset:
        s3_fs = self._s3_fs
        if s3_fs is None:
            s3_fs, open_params = self.consume_s3fs_params(open_params)
        bucket_name, open_params = self.consume_bucket_name_param(open_params)
        try:
            return xr.open_zarr(s3fs.S3Map(root=f'{bucket_name}/{data_id}' if bucket_name else data_id,
                                           s3=s3_fs,
                                           check=False),
                                **open_params)
        except ValueError as e:
            raise DataStoreError(f'{e}') from e

    # noinspection PyMethodMayBeStatic 
Example #23
Source File: test_stores.py    From xarray-simlab with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_write_input_xr_dataset(self, in_ds, store):
        store.write_input_xr_dataset()
        ds = xr.open_zarr(store.zgroup.store, chunks=None)

        # output variables removed
        del in_ds["add__offset"]

        xr.testing.assert_equal(ds, in_ds)

        # check output variables attrs removed before saving input dataset
        assert not ds.xsimlab.output_vars 
Example #24
Source File: test_drivers.py    From xarray-simlab with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_run_model_get_results(self, in_dataset, out_dataset, xarray_driver):
        xarray_driver.run_model()
        out_ds_actual = xarray_driver.get_results()

        # skip attributes added by xr.open_zarr from check
        for xr_var in out_ds_actual.variables.values():
            xr_var.attrs.pop("_FillValue", None)

        assert out_ds_actual is not out_dataset
        xr.testing.assert_identical(out_ds_actual.load(), out_dataset) 
Example #25
Source File: process.py    From echopype with Apache License 2.0 5 votes vote down vote up
def Process(nc_path):
    """
    Provides data analysis and computation tools for sonar data in netCDF form.

    Parameters
    ----------
    nc_path : str
        The path to a .nc or .zarr file generated by `echopype`

    Returns
    -------
        Returns a specialized Process object depending on
        the type of echosounder the .nc file was produced with
    """

    fname = os.path.basename(nc_path)
    _, ext = os.path.splitext(fname)

    if fname.endswith('.nc'):
        open_dataset = xr.open_dataset
    elif fname.endswith('.zarr'):
        open_dataset = xr.open_zarr
    else:
        raise ValueError(f"{ext} is not a valid file format.")

    # Open nc file in order to determine what echosounder produced the original dataset
    with open_dataset(nc_path) as nc_file:
        try:
            echo_type = nc_file.keywords
        except AttributeError:
            raise ValueError("This file is incompatible with echopype functions.")

    # Returns specific Process object
    if echo_type == "EK60":
        return ProcessEK60(nc_path)
    elif echo_type == "EK80":
        return ProcessEK80(nc_path)
    elif echo_type == "AZFP":
        return ProcessAZFP(nc_path)
    else:
        raise ValueError("Unsupported file type") 
Example #26
Source File: preprocessing.py    From DLWP with MIT License 5 votes vote down vote up
def open(self, **kwargs):
        """
        Open the dataset pointed to by the instance's _predictor_file attribute onto self.data

        :param kwargs: passed to xarray.open_dataset() or xarray.open_zarr()
        """
        if self._predictor_file.endswith('.zarr'):
            self.data = xr.open_zarr(self._predictor_file, **kwargs)
        else:
            self.data = xr.open_dataset(self._predictor_file, **kwargs) 
Example #27
Source File: xarray_container.py    From intake-xarray with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def serialize_zarr_ds(ds):
    """Gather group/metadata information from a Zarr into dictionary repr

    A version of the dataset can be recreated, but will not be able to directly
    load data without further manipulation.

    Use as follows
    >>> out = serialize_zarr(s._ds)

    and reconstitute with
    >>> d2 = xr.open_zarr(out, decode_times=False)

    (decode_times is required here because the times will be random binary
    data and not be decodable)

    Parameters
    ----------
    ds: xarray dataset

    Returns
    -------
    dictionary with .z* keys for the various elements of the original dataset.
    """
    import dask
    s = ZarrSerialiser()
    try:
        attrs = ds.attrs.copy()
        ds.attrs.pop('_ARRAY_DIMENSIONS', None)  # zarr implementation detail
        x = ds.to_zarr(s, compute=False)
        x.dask = dict(x.dask)
        for k, v in x.dask.items():
            # replace the data writing funcs with no-op, so as not to waste
            # time on serialization, when all we want is metadata
            if isinstance(k, tuple) and k[0].startswith('store-'):
                x.dask[k] = (noop, ) + x.dask[k][1:]
        dask.compute(x, scheduler='threads')
    finally:
        ds.attrs = attrs
    return s 
Example #28
Source File: test_convert_ek60.py    From echopype with Apache License 2.0 5 votes vote down vote up
def test_convert_zarr():
    tmp = Convert(raw_path)
    tmp.raw2zarr()
    ds_beam = xr.open_zarr(tmp.zarr_path, group='Beam')
    with xr.open_dataset(test_path) as ds_test:
        assert np.allclose(ds_test.power, ds_beam.backscatter_r)

    shutil.rmtree(tmp.zarr_path, ignore_errors=True)    # Delete non-empty folder 
Example #29
Source File: test_gen.py    From xcube with MIT License 5 votes vote down vote up
def test_handle_360_lon(self):
        status, output = gen_cube_wrapper(
            [get_inputdata_path('20170101120000-UKMO-L4_GHRSST-SSTfnd-OSTIAanom-GLOB-v02.0-fv02.0.nc')],
            'l2c-single.zarr', no_sort_mode=False)
        self.assertEqual(True, status)
        ds = xr.open_zarr('l2c-single.zarr')
        self.assertIn('lon', ds.coords)
        self.assertFalse(np.any(ds.coords['lon'] > 180.)) 
Example #30
Source File: xzarr.py    From intake-xarray with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def _open_dataset(self):
        import xarray as xr
        from fsspec import get_mapper

        self._mapper = get_mapper(self.urlpath, **self.storage_options)
        self._ds = xr.open_zarr(self._mapper, **self.kwargs)