Python Examples of xarray.open

Source File: test_chunkstore.py From xcube with MIT License

7 votes

def gen_index_var(dims, shape, chunks):
    # noinspection PyUnusedLocal
    def get_chunk(cube_store: ChunkStore, name: str, index: Tuple[int, ...]) -> bytes:
        data = np.zeros(cube_store.chunks, dtype=np.uint64)
        data_view = data.ravel()
        if data_view.base is not data:
            raise ValueError('view expected')
        if data_view.size < cube_store.ndim * 2:
            raise ValueError('size too small')
        for i in range(cube_store.ndim):
            j1 = cube_store.chunks[i] * index[i]
            j2 = j1 + cube_store.chunks[i]
            data_view[2 * i] = j1
            data_view[2 * i + 1] = j2
        return data.tobytes()

    store = ChunkStore(dims, shape, chunks)
    store.add_lazy_array('__index_var__', '<u8', get_chunk=get_chunk)

    ds = xr.open_zarr(store)
    return ds.__index_var__

Source File: stores.py From xarray-simlab with BSD 3-Clause "New" or "Revised" License

7 votes

def open_as_xr_dataset(self) -> xr.Dataset:
        if self.in_memory:
            chunks = None
        else:
            chunks = "auto"

        ds = xr.open_zarr(
            self.zgroup.store,
            group=self.zgroup.path,
            chunks=chunks,
            consolidated=self.consolidated,
            # disable mask (not nice with zarr default fill_value=0)
            mask_and_scale=False,
        )

        if self.in_memory:
            # lazy loading may be confusing for the default, in-memory option
            ds.load()
        else:
            # load scalar data vars (there might be many of them: model params)
            for da in ds.data_vars.values():
                if not da.dims:
                    da.load()

        return ds

Source File: test_timeslice.py From xcube with MIT License

7 votes

def test_update_corrupt_cube(self):
        self.write_cube('2019-01-01', 3)

        cube = xr.open_zarr(self.CUBE_PATH)
        t, y, x = cube.precipitation.shape
        new_shape = y, t, x
        t, y, x = cube.precipitation.dims
        new_dims = y, t, x
        cube['precipitation'] = xr.DataArray(cube.precipitation.values.reshape(new_shape),
                                             dims=new_dims,
                                             coords=cube.precipitation.coords)
        cube.to_zarr(self.CUBE_PATH_2)

        with self.assertRaises(ValueError) as cm:
            insert_time_slice(self.CUBE_PATH_2, 2, self.make_slice('2019-01-02T06:30'))
        self.assertEqual("dimension 'time' of variable 'precipitation' must be first dimension",
                         f"{cm.exception}")

Source File: test_timeslice.py From xcube with MIT License

6 votes

def test_insert_time_slice(self):
        self.write_cube('2019-01-02', 10)

        insert_time_slice(self.CUBE_PATH, 5, self.make_slice('2019-01-06T02:00'))
        insert_time_slice(self.CUBE_PATH, 10, self.make_slice('2019-01-10T02:00'))
        insert_time_slice(self.CUBE_PATH, 0, self.make_slice('2019-01-01T02:00'))

        cube = xr.open_zarr(self.CUBE_PATH)
        expected = np.array(['2019-01-01T14:00', '2019-01-02T12:00',
                             '2019-01-03T12:00', '2019-01-04T12:00',
                             '2019-01-05T12:00', '2019-01-06T12:00',
                             '2019-01-06T14:00', '2019-01-07T12:00',
                             '2019-01-08T12:00', '2019-01-09T12:00',
                             '2019-01-10T12:00', '2019-01-10T14:00',
                             '2019-01-11T12:00'], dtype=cube.time.dtype)
        self.assertEqual(13, cube.time.size)
        self.assertEqual(None, cube.time.chunks)
        np.testing.assert_equal(cube.time.values, expected)

Source File: mldataset.py From xcube with MIT License

6 votes

def _get_dataset_lazily(self, index: int, parameters: Dict[str, Any]) -> xr.Dataset:
        """
        Read the dataset for the level at given *index*.

        :param index: the level index
        :param parameters: keyword arguments passed to xr.open_zarr()
        :return: the dataset for the level at *index*.
        """
        ext, level_path = self._level_paths[index]
        if ext == ".link":
            with open(level_path, "r") as fp:
                level_path = fp.read()
                # if file_path is a relative path, resolve it against the levels directory
                if not os.path.isabs(level_path):
                    base_dir = os.path.dirname(self._dir_path)
                    level_path = os.path.join(base_dir, level_path)
        with measure_time(tag=f"opened local dataset {level_path} for level {index}"):
            return assert_cube(xr.open_zarr(level_path, **parameters), name=level_path)

Source File: mldataset.py From xcube with MIT License

6 votes

def _get_dataset_lazily(self, index: int, parameters: Dict[str, Any]) -> xr.Dataset:
        """
        Read the dataset for the level at given *index*.

        :param index: the level index
        :param parameters: keyword arguments passed to xr.open_zarr()
        :return: the dataset for the level at *index*.
        """
        ext, level_path = self._level_paths[index]
        if ext == ".link":
            with self._s3_file_system.open(level_path, "w") as fp:
                level_path = fp.read()
                # if file_path is a relative path, resolve it against the levels directory
                if not os.path.isabs(level_path):
                    base_dir = os.path.dirname(self._dir_path)
                    level_path = os.path.join(base_dir, level_path)
        store = s3fs.S3Map(root=level_path, s3=self._s3_file_system, check=False)
        max_size = self.get_chunk_cache_capacity(index)
        if max_size:
            store = zarr.LRUStoreCache(store, max_size=max_size)
        with measure_time(tag=f"opened remote dataset {level_path} for level {index}"):
            consolidated = self._s3_file_system.exists(f'{level_path}/.zmetadata')
            return assert_cube(xr.open_zarr(store, consolidated=consolidated, **parameters), name=level_path)

Source File: test_gen.py From xcube with MIT License

6 votes

def test_process_inputs_replace_multiple_zarr(self):
        status, output = gen_cube_wrapper(
            [get_inputdata_path('20170101-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'),
             get_inputdata_path('20170102-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'),
             get_inputdata_path('20170103-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc'),
             get_inputdata_path('20170102-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc')], 'l2c.zarr',
            no_sort_mode=True)
        self.assertEqual(True, status)
        self.assertTrue('\nstep 9 of 9: creating input slice in l2c.zarr...\n' in output)
        self.assertTrue('\nstep 9 of 9: appending input slice to l2c.zarr...\n' in output)
        self.assertTrue('\nstep 9 of 9: replacing input slice at index 1 in l2c.zarr...\n' in output)
        self.assert_cube_ok(xr.open_zarr('l2c.zarr'),
                            expected_time_dim=3,
                            expected_extra_attrs=dict(date_modified=None,
                                                      time_coverage_start='2016-12-31T12:00:00.000000000',
                                                      time_coverage_end='2017-01-03T12:00:00.000000000'))
        self.assertTrue(os.path.exists(os.path.join('l2c.zarr', '.zmetadata')))

Source File: mldataset.py From xcube with MIT License

6 votes

def open_ml_dataset_from_local_fs(path: str,
                                  data_format: str = None,
                                  ds_id: str = None,
                                  exception_type: type = ValueError,
                                  **kwargs) -> MultiLevelDataset:
    data_format = data_format or guess_ml_dataset_format(path)

    if data_format == FORMAT_NAME_NETCDF4:
        with measure_time(tag=f"opened local NetCDF dataset {path}"):
            ds = assert_cube(xr.open_dataset(path, **kwargs))
            return BaseMultiLevelDataset(ds, ds_id=ds_id)
    elif data_format == FORMAT_NAME_ZARR:
        with measure_time(tag=f"opened local zarr dataset {path}"):
            ds = assert_cube(xr.open_zarr(path, **kwargs))
            return BaseMultiLevelDataset(ds, ds_id=ds_id)
    elif data_format == FORMAT_NAME_LEVELS:
        with measure_time(tag=f"opened local levels dataset {path}"):
            return FileStorageMultiLevelDataset(path, ds_id=ds_id, zarr_kwargs=kwargs)

    raise exception_type(f'Unrecognized multi-level dataset format {data_format!r} for path {path!r}')

Source File: xarray_container.py From intake-xarray with BSD 2-Clause "Simplified" License

6 votes

def __init__(self, url, headers, **kwargs):
        """
        Initialise local xarray, whose dask arrays contain tasks that pull data

        The matadata contains a key "internal", which is a result of running
        ``serialize_zarr_ds`` on the xarray on the server. It is a dict
        containing the metadata parts of the original dataset (i.e., the
        keys with names like ".z*"). This can be opened by xarray as-is, and
        will make a local xarray object. In ``._get_schema()``, the numpy
        parts (coordinates) are fetched and the dask-array parts (cariables)
        have their dask graphs redefined to tasks that fetch data from the
        server.
        """
        import xarray as xr
        super(RemoteXarray, self).__init__(url, headers, **kwargs)
        self._schema = None
        self._ds = xr.open_zarr(self.metadata['internal'])

Source File: test_mldataset.py From xcube with MIT License

6 votes

def _write_test_cube_pyramid(cls):
        # Create bucket 'xcube-test', so it exists before we write a test pyramid
        s3_conn = boto3.client('s3')
        s3_conn.create_bucket(Bucket='xcube-test', ACL='public-read')

        # Create a test cube pyramid with just one variable "conc_chl"
        zarr_path = os.path.join(os.path.dirname(__file__), '../../examples/serve/demo/cube-1-250-250.zarr')
        base_dataset = xr.open_zarr(zarr_path)
        base_dataset = xr.Dataset(dict(conc_chl=base_dataset.conc_chl))
        ml_dataset = BaseMultiLevelDataset(base_dataset)

        # Write test cube pyramid
        write_levels(ml_dataset,
                     'https://s3.amazonaws.com/xcube-test/cube-1-250-250.levels',
                     client_kwargs=dict(provider_access_key_id='test_fake_id',
                                        provider_secret_access_key='test_fake_secret'))

Source File: utilities.py From minian with GNU General Public License v3.0

6 votes

def open_minian(dpath, fname='minian', backend='netcdf', chunks=None, post_process=None):
    if backend is 'netcdf':
        fname = fname + '.nc'
        if chunks is 'auto':
            chunks = dict([(d, 'auto') for d in ds.dims])
        mpath = pjoin(dpath, fname)
        with xr.open_dataset(mpath) as ds:
            dims = ds.dims
        chunks = dict([(d, 'auto') for d in dims])
        ds = xr.open_dataset(os.path.join(dpath, fname), chunks=chunks)
        if post_process:
            ds = post_process(ds, mpath)
        return ds
    elif backend is 'zarr':
        mpath = pjoin(dpath, fname)
        dslist = [xr.open_zarr(pjoin(mpath, d)) for d in listdir(mpath) if isdir(pjoin(mpath, d))]
        ds = xr.merge(dslist)
        if chunks is 'auto':
            chunks = dict([(d, 'auto') for d in ds.dims])
        if post_process:
            ds = post_process(ds, mpath)
        return ds.chunk(chunks)
    else:
        raise NotImplementedError("backend {} not supported".format(backend))

Source File: test_prune.py From xcube with MIT License

6 votes

def test_no_dry_run(self):
        result = self.invoke_cli(['prune', self.TEST_CUBE])
        self.assertEqual(0, result.exit_code)
        self.assertEqual("Opening cube from 'test.zarr'...\n"
                         "Identifying empty blocks...\n"
                         "Deleting 24 empty block file(s) for variable 'precipitation'...\n"
                         "Deleting 24 empty block file(s) for variable 'temperature'...\n"
                         "Done, 48 block file(s) deleted.\n",
                         result.stdout)
        expected_file_names = sorted(['.zarray', '.zattrs'])
        self.assertEqual(expected_file_names, sorted(os.listdir('test.zarr/precipitation')))
        self.assertEqual(expected_file_names, sorted(os.listdir('test.zarr/temperature')))
        ds = xr.open_zarr('test.zarr')
        assert_cube(ds)
        self.assertIn('precipitation', ds)
        self.assertEqual((3, 180, 360), ds.precipitation.shape)
        self.assertEqual(('time', 'lat', 'lon'), ds.precipitation.dims)
        self.assertIn('temperature', ds)
        self.assertEqual((3, 180, 360), ds.temperature.shape)
        self.assertEqual(('time', 'lat', 'lon'), ds.temperature.dims)

Source File: test_prune.py From xcube with MIT License

6 votes

def test_dry_run(self):
        result = self.invoke_cli(['prune', self.TEST_CUBE, "--dry-run"])
        self.assertEqual(0, result.exit_code)
        self.assertEqual("Opening cube from 'test.zarr'...\n"
                         "Identifying empty blocks...\n"
                         "Deleting 24 empty block file(s) for variable 'precipitation'...\n"
                         "Deleting 24 empty block file(s) for variable 'temperature'...\n"
                         "Done, 48 block file(s) deleted.\n",
                         result.stdout)
        expected_file_names = sorted(['.zarray',
                                      '.zattrs',
                                      '0.0.0', '0.0.1', '0.0.2', '0.0.3', '0.1.0', '0.1.1', '0.1.2', '0.1.3',
                                      '1.0.0', '1.0.1', '1.0.2', '1.0.3', '1.1.0', '1.1.1', '1.1.2', '1.1.3',
                                      '2.0.0', '2.0.1', '2.0.2', '2.0.3', '2.1.0', '2.1.1', '2.1.2', '2.1.3'])
        self.assertEqual(expected_file_names, sorted(os.listdir('test.zarr/precipitation')))
        self.assertEqual(expected_file_names, sorted(os.listdir('test.zarr/temperature')))
        ds = xr.open_zarr('test.zarr')
        assert_cube(ds)
        self.assertIn('precipitation', ds)
        self.assertEqual((3, 180, 360), ds.precipitation.shape)
        self.assertEqual(('time', 'lat', 'lon'), ds.precipitation.dims)
        self.assertIn('temperature', ds)
        self.assertEqual((3, 180, 360), ds.temperature.shape)
        self.assertEqual(('time', 'lat', 'lon'), ds.temperature.dims)

Source File: test_rectify.py From xcube with MIT License

6 votes

def test_rectify_without_vars(self):
        """Test that rectify selects all variables when --var is not given."""

        # For now, specify the image geometry explicitly with --size, --point,
        # and --res to avoid triggering an "invalid y_min" ValueError when
        # ImageGeom tries to determine it automatically. Once Issue #303 has
        # been fixed, these options can be omitted.

        result = self.invoke_cli(['rectify',
                                  '--size', '508,253',
                                  '--point', '-179.5,-89.5',
                                  '--res', '0.7071067811865475',
                                  TEST_ZARR_DIR])
        self.assertEqual(0, result.exit_code)
        self.assertEqual('Opening dataset from \'test.zarr\'...\n'
                         'Rectifying...\n'
                         'Writing rectified dataset to \'out.zarr\'...\n'
                         'Done.\n',
                         result.stdout)
        self.assertTrue(os.path.isdir('out.zarr'))
        ds = xr.open_zarr('out.zarr')
        assert_cube(ds)
        self.assertIn('precipitation', ds)
        self.assertIn('temperature', ds)
        self.assertIn('soil_moisture', ds)

Source File: io.py From cate with MIT License

5 votes

def read_zarr(path: str,
              file_system: str = 'Local',
              drop_variables: VarNamesLike.TYPE = None,
              decode_cf: bool = True,
              decode_times: bool = True,
              normalize: bool = True) -> xr.Dataset:
    """
    Read a dataset from a Zarr directory, Zarr ZIP archive, or remote Zarr object storage.

    For the Zarr format, refer to http://zarr.readthedocs.io/en/stable/.

    :param path: Zarr directory path, Zarr ZIP archive path, or object storage path or bucket name.
    :param file_system: File system identifier, "Local" is your locally mounted file system,
           for Amazon S3 use "S3", for general Object Storage use "OBS".
    :param drop_variables: List of variables to be dropped.
    :param decode_cf: Whether to decode CF attributes and coordinate variables.
    :param decode_times: Whether to decode time information (convert time coordinates to ``datetime`` objects).
    :param normalize: Whether to normalize the dataset's geo- and time-coding upon opening. See operation ``normalize``.
    """
    drop_variables = VarNamesLike.convert(drop_variables)

    if file_system == 'Local':
        ds = xr.open_zarr(path,
                          drop_variables=drop_variables,
                          decode_cf=decode_cf,
                          decode_times=decode_times)
    elif file_system == 'S3' or file_system == 'OBS':
        import s3fs
        store = s3fs.S3Map(path, s3=(s3fs.S3FileSystem(anon=True)))
        ds = xr.open_zarr(store,
                          drop_variables=drop_variables,
                          decode_cf=decode_cf,
                          decode_times=decode_times)
    else:
        raise ValidationError(f'Unknown file_system {file_system!r}')

    if normalize:
        return adjust_temporal_attrs(normalize_op(ds))
    return ds

Source File: merge_util.py From intake-esm with Apache License 2.0

5 votes

def _open_asset(path, data_format, zarr_kwargs, cdf_kwargs, preprocess, varname):
    protocol = None
    root = path
    if isinstance(path, fsspec.mapping.FSMap):
        protocol = path.fs.protocol
        if isinstance(protocol, list):
            protocol = tuple(protocol)

        if protocol in {'http', 'https', 'file'} or protocol is None:
            path = path.root
            root = path

        else:
            root = path.root

    if data_format == 'zarr':
        logger.debug(f'Opening zarr store: {root} - protocol: {protocol}')
        try:
            ds = xr.open_zarr(path, **zarr_kwargs)
        except Exception as e:
            logger.error(f'Failed to open zarr store with zarr_kwargs={zarr_kwargs}')
            raise e

    else:
        logger.debug(f'Opening netCDF/HDF dataset: {root} - protocol: {protocol}')
        try:
            ds = xr.open_dataset(path, **cdf_kwargs)
        except Exception as e:
            logger.error(f'Failed to open netCDF/HDF dataset with cdf_kwargs={cdf_kwargs}')
            raise e

    ds.attrs['intake_esm_varname'] = varname

    if preprocess is None:
        return ds
    logger.debug(f'Applying pre-processing with {preprocess.__name__} function')
    return preprocess(ds)

Source File: dsio.py From xcube with MIT License

5 votes

def read(self,
             path: str,
             client_kwargs: Dict[str, Any] = None,
             **kwargs) -> xr.Dataset:
        path_or_store = path
        consolidated = False
        if isinstance(path, str):
            path_or_store, consolidated = get_path_or_obs_store(path_or_store, client_kwargs, mode='r')
            if 'max_cache_size' in kwargs:
                max_cache_size = kwargs.pop('max_cache_size')
                if max_cache_size > 0:
                    path_or_store = zarr.LRUStoreCache(path_or_store, max_size=max_cache_size)
        return xr.open_zarr(path_or_store, consolidated=consolidated, **kwargs)

Source File: timeslice.py From xcube with MIT License

5 votes

def find_time_slice(store: Union[str, MutableMapping],
                    time_stamp: Union[np.datetime64, np.ndarray],
                    time_eps: np.timedelta64 = DEFAULT_TIME_EPS) -> Tuple[int, str]:
    """
    Find time index and update mode for *time_stamp* in ZARR dataset given by *store*.

    :param store: A zarr store.
    :param time_stamp: Time stamp to find index for.
    :param time_eps: Time epsilon for equality comparison, defaults to 1 millisecond.
    :return: A tuple (time_index, 'insert') or (time_index, 'replace') if an index was found,
        (-1, 'create') or (-1, 'append') otherwise.
    """
    try:
        cube = xr.open_zarr(store)
    except ValueError:
        # ValueError raised if cube store does not exist
        try:
            cube = xr.open_dataset(store)
        except FileNotFoundError:
            return -1, 'create'

    # TODO (forman): optimise following naive search by bi-sectioning or so
    for i in range(cube.time.size):
        time = cube.time[i]
        if abs(time_stamp - time) < time_eps:
            return i, 'replace'
        if time_stamp < time:
            return i, 'insert'

    return -1, 'append'

Source File: compute.py From xcube with MIT License

5 votes

def _gen_index_var(cube_schema: CubeSchema):
    dims = cube_schema.dims
    shape = cube_schema.shape
    chunks = cube_schema.chunks

    # noinspection PyUnusedLocal
    def get_chunk(cube_store: ChunkStore, name: str, index: Tuple[int, ...]) -> bytes:
        data = np.zeros(cube_store.chunks, dtype=np.uint64)
        data_view = data.ravel()
        if data_view.base is not data:
            raise ValueError('view expected')
        if data_view.size < cube_store.ndim * 2:
            raise ValueError('size too small')
        for i in range(cube_store.ndim):
            j1 = cube_store.chunks[i] * index[i]
            j2 = j1 + cube_store.chunks[i]
            data_view[2 * i] = j1
            data_view[2 * i + 1] = j2
        return data.tobytes()

    store = ChunkStore(dims, shape, chunks)
    store.add_lazy_array('__index_var__', '<u8', get_chunk=get_chunk)

    dataset = xr.open_zarr(store)
    index_var = dataset.__index_var__
    index_var = index_var.assign_coords(**cube_schema.coords)
    return index_var

Source File: unchunk.py From xcube with MIT License

5 votes

def unchunk_dataset(dataset_path: str, var_names: Sequence[str] = None, coords_only: bool = False):
    """
    Unchunk dataset variables in-place.

    :param dataset_path: Path to ZARR dataset directory.
    :param var_names: Optional list of variable names.
    :param coords_only: Un-chunk coordinate variables only.
    """

    is_zarr = os.path.isfile(os.path.join(dataset_path, '.zgroup'))
    if not is_zarr:
        raise ValueError(f'{dataset_path!r} is not a valid Zarr directory')

    with xr.open_zarr(dataset_path) as dataset:
        if var_names is None:
            if coords_only:
                var_names = list(dataset.coords)
            else:
                var_names = list(dataset.variables)
        else:
            for var_name in var_names:
                if coords_only:
                    if var_name not in dataset.coords:
                        raise ValueError(f'variable {var_name!r} is not a coordinate variable in {dataset_path!r}')
                else:
                    if var_name not in dataset.variables:
                        raise ValueError(f'variable {var_name!r} is not a variable in {dataset_path!r}')

    _unchunk_vars(dataset_path, var_names)

Source File: dataset.py From xcube with MIT License

5 votes

def open_data(self, data_id: str, **open_params) -> xr.Dataset:
        return xr.open_zarr(data_id, **open_params)

Source File: dataset.py From xcube with MIT License

5 votes

def open_data(self, data_id: str, **open_params) -> xr.Dataset:
        s3_fs = self._s3_fs
        if s3_fs is None:
            s3_fs, open_params = self.consume_s3fs_params(open_params)
        bucket_name, open_params = self.consume_bucket_name_param(open_params)
        try:
            return xr.open_zarr(s3fs.S3Map(root=f'{bucket_name}/{data_id}' if bucket_name else data_id,
                                           s3=s3_fs,
                                           check=False),
                                **open_params)
        except ValueError as e:
            raise DataStoreError(f'{e}') from e

    # noinspection PyMethodMayBeStatic

Source File: test_stores.py From xarray-simlab with BSD 3-Clause "New" or "Revised" License

5 votes

def test_write_input_xr_dataset(self, in_ds, store):
        store.write_input_xr_dataset()
        ds = xr.open_zarr(store.zgroup.store, chunks=None)

        # output variables removed
        del in_ds["add__offset"]

        xr.testing.assert_equal(ds, in_ds)

        # check output variables attrs removed before saving input dataset
        assert not ds.xsimlab.output_vars

Source File: test_drivers.py From xarray-simlab with BSD 3-Clause "New" or "Revised" License

5 votes

def test_run_model_get_results(self, in_dataset, out_dataset, xarray_driver):
        xarray_driver.run_model()
        out_ds_actual = xarray_driver.get_results()

        # skip attributes added by xr.open_zarr from check
        for xr_var in out_ds_actual.variables.values():
            xr_var.attrs.pop("_FillValue", None)

        assert out_ds_actual is not out_dataset
        xr.testing.assert_identical(out_ds_actual.load(), out_dataset)

Source File: process.py From echopype with Apache License 2.0

5 votes

def Process(nc_path):
    """
    Provides data analysis and computation tools for sonar data in netCDF form.

    Parameters
    ----------
    nc_path : str
        The path to a .nc or .zarr file generated by `echopype`

    Returns
    -------
        Returns a specialized Process object depending on
        the type of echosounder the .nc file was produced with
    """

    fname = os.path.basename(nc_path)
    _, ext = os.path.splitext(fname)

    if fname.endswith('.nc'):
        open_dataset = xr.open_dataset
    elif fname.endswith('.zarr'):
        open_dataset = xr.open_zarr
    else:
        raise ValueError(f"{ext} is not a valid file format.")

    # Open nc file in order to determine what echosounder produced the original dataset
    with open_dataset(nc_path) as nc_file:
        try:
            echo_type = nc_file.keywords
        except AttributeError:
            raise ValueError("This file is incompatible with echopype functions.")

    # Returns specific Process object
    if echo_type == "EK60":
        return ProcessEK60(nc_path)
    elif echo_type == "EK80":
        return ProcessEK80(nc_path)
    elif echo_type == "AZFP":
        return ProcessAZFP(nc_path)
    else:
        raise ValueError("Unsupported file type")

Source File: preprocessing.py From DLWP with MIT License

5 votes

def open(self, **kwargs):
        """
        Open the dataset pointed to by the instance's _predictor_file attribute onto self.data

        :param kwargs: passed to xarray.open_dataset() or xarray.open_zarr()
        """
        if self._predictor_file.endswith('.zarr'):
            self.data = xr.open_zarr(self._predictor_file, **kwargs)
        else:
            self.data = xr.open_dataset(self._predictor_file, **kwargs)

Source File: xarray_container.py From intake-xarray with BSD 2-Clause "Simplified" License

5 votes

def serialize_zarr_ds(ds):
    """Gather group/metadata information from a Zarr into dictionary repr

    A version of the dataset can be recreated, but will not be able to directly
    load data without further manipulation.

    Use as follows
    >>> out = serialize_zarr(s._ds)

    and reconstitute with
    >>> d2 = xr.open_zarr(out, decode_times=False)

    (decode_times is required here because the times will be random binary
    data and not be decodable)

    Parameters
    ----------
    ds: xarray dataset

    Returns
    -------
    dictionary with .z* keys for the various elements of the original dataset.
    """
    import dask
    s = ZarrSerialiser()
    try:
        attrs = ds.attrs.copy()
        ds.attrs.pop('_ARRAY_DIMENSIONS', None)  # zarr implementation detail
        x = ds.to_zarr(s, compute=False)
        x.dask = dict(x.dask)
        for k, v in x.dask.items():
            # replace the data writing funcs with no-op, so as not to waste
            # time on serialization, when all we want is metadata
            if isinstance(k, tuple) and k[0].startswith('store-'):
                x.dask[k] = (noop, ) + x.dask[k][1:]
        dask.compute(x, scheduler='threads')
    finally:
        ds.attrs = attrs
    return s

Source File: test_convert_ek60.py From echopype with Apache License 2.0

5 votes

def test_convert_zarr():
    tmp = Convert(raw_path)
    tmp.raw2zarr()
    ds_beam = xr.open_zarr(tmp.zarr_path, group='Beam')
    with xr.open_dataset(test_path) as ds_test:
        assert np.allclose(ds_test.power, ds_beam.backscatter_r)

    shutil.rmtree(tmp.zarr_path, ignore_errors=True)    # Delete non-empty folder

Source File: test_gen.py From xcube with MIT License

5 votes

def test_handle_360_lon(self):
        status, output = gen_cube_wrapper(
            [get_inputdata_path('20170101120000-UKMO-L4_GHRSST-SSTfnd-OSTIAanom-GLOB-v02.0-fv02.0.nc')],
            'l2c-single.zarr', no_sort_mode=False)
        self.assertEqual(True, status)
        ds = xr.open_zarr('l2c-single.zarr')
        self.assertIn('lon', ds.coords)
        self.assertFalse(np.any(ds.coords['lon'] > 180.))

Source File: xzarr.py From intake-xarray with BSD 2-Clause "Simplified" License

5 votes

def _open_dataset(self):
        import xarray as xr
        from fsspec import get_mapper

        self._mapper = get_mapper(self.urlpath, **self.storage_options)
        self._ds = xr.open_zarr(self._mapper, **self.kwargs)

Python xarray.open_zarr() Examples