Python Examples of xarray.open

Source File: model.py From aospy with Apache License 2.0

7 votes

def _get_grid_files(self):
        """Get the files holding grid data for an aospy object."""
        grid_file_paths = self.grid_file_paths
        datasets = []
        if isinstance(grid_file_paths, str):
            grid_file_paths = [grid_file_paths]
        for path in grid_file_paths:
            try:
                ds = xr.open_dataset(path, decode_times=False)
            except (TypeError, AttributeError):
                ds = xr.open_mfdataset(path, decode_times=False,
                                       combine='by_coords').load()
            except (RuntimeError, OSError) as e:
                msg = str(e) + ': {}'.format(path)
                raise RuntimeError(msg)
            datasets.append(ds)
        return tuple(datasets)

Source File: sarah.py From atlite with GNU General Public License v3.0

6 votes

def prepare_meta_sarah(xs, ys, year, month, template_sis, template_sid, module, resolution=resolution):
    fns = [next(glob.iglob(t.format(year=year, month=month)))
           for t in (template_sis, template_sid)]

    with xr.open_mfdataset(fns, compat='identical') as ds:
        ds = _rename_and_clean_coords(ds)
        ds = ds.coords.to_dataset()

        t = pd.Timestamp(year=year, month=month, day=1)
        ds['time'] = pd.date_range(t, t + pd.DateOffset(months=1),
                                   freq='1h', closed='left')

        if resolution is not None:
            def p(s):
                s += 0.1*resolution
                return s - (s % resolution)
            xs = np.arange(p(xs.start), p(xs.stop) + 1.1*resolution, resolution)
            ys = np.arange(p(ys.start), p(ys.stop) - 0.1*resolution, - resolution)
            ds = ds.sel(x=xs, y=ys, method='nearest')
        else:
            ds = ds.sel(x=as_slice(xs), y=as_slice(ys))

        return ds.load()

Source File: imagery.py From glmtools with BSD 3-Clause "New" or "Revised" License

6 votes

def open_glm_time_series(filenames, chunks=None):
    """ Convenience function for combining individual 1-min GLM gridded imagery
    files into a single xarray.Dataset with a time dimension.
    
    Creates an index on the time dimension.
    
    The time dimension will be in the order in which the files are listed
    due to the behavior of combine='nested' in open_mfdataset.
    
    Adjusts the time_coverage_start and time_coverage_end metadata.
    """
    # Need to fix time_coverage_start and _end in concat dataset
    starts = [t for t in gen_file_times(filenames)]
    ends = [t for t in gen_file_times(filenames, time_attr='time_coverage_end')]
    
    d = xr.open_mfdataset(filenames, concat_dim='time', chunks=chunks, combine='nested')
    d['time'] = starts
    d = d.set_index({'time':'time'})
    d = d.set_coords('time')
    
    d.attrs['time_coverage_start'] = pd.Timestamp(min(starts)).isoformat()
    d.attrs['time_coverage_end'] = pd.Timestamp(max(ends)).isoformat()

    return d

Source File: __main__.py From cfgrib with Apache License 2.0

6 votes

def to_netcdf(inpaths, outpath, cdm, engine):
    import cf2cdm
    import xarray as xr

    # NOTE: noop if no input argument
    if len(inpaths) == 0:
        return

    if not outpath:
        outpath = os.path.splitext(inpaths[0])[0] + '.nc'

    ds = xr.open_mfdataset(inpaths, engine=engine, combine='by_coords')
    if cdm:
        coord_model = getattr(cf2cdm, cdm)
        ds = cf2cdm.translate_coords(ds, coord_model=coord_model)
    ds.to_netcdf(outpath)

Source File: netcdf.py From intake-xarray with BSD 2-Clause "Simplified" License

6 votes

def _open_dataset(self):
        import xarray as xr
        url = self.urlpath

        kwargs = self.xarray_kwargs

        if "*" in url or isinstance(url, list):
            _open_dataset = xr.open_mfdataset
            if self.pattern:
                kwargs.update(preprocess=self._add_path_to_ds)
            if self.combine is not None:
                if 'combine' in kwargs:
                    raise Exception("Setting 'combine' argument twice  in the catalog is invalid")
                kwargs.update(combine=self.combine)
            if self.concat_dim is not None:
                if 'concat_dim' in kwargs:
                    raise Exception("Setting 'concat_dim' argument twice  in the catalog is invalid")
                kwargs.update(concat_dim=self.concat_dim)
        else:
            _open_dataset = xr.open_dataset
        url = fsspec.open_local(url, **self.storage_options)

        self._ds = _open_dataset(url, chunks=self.chunks, **kwargs)

Source File: cfsr.py From DLWP with MIT License

6 votes

def open(self, exact_dates=True, concat_dim='time', **dataset_kwargs):
        """
        Open an xarray multi-file Dataset for the processed files with dates set using set_dates(), retrieve(), or
        write(). Once opened, this Dataset is accessible by self.Dataset.

        :param exact_dates: bool: if True, set the Dataset to have the exact dates of this instance; otherwise,
            keep all of the monthly dates in the opened files
        :param concat_dim: passed to xarray.open_mfdataset()
        :param dataset_kwargs: kwargs passed to xarray.open_mfdataset()
        """
        nc_file_dir = '%s/processed' % self._root_directory
        if not self.dataset_dates:
            raise ValueError("use set_dates() to specify times of data to load")
        dates_index = pd.DatetimeIndex(self.dataset_dates).sort_values()
        months = dates_index.to_period('M')
        unique_months = months.unique()
        nc_files = ['%s/%sfcst_%s.nc' % (nc_file_dir, self._file_id, d.strftime('%Y%m')) for d in unique_months]
        self.Dataset = xr.open_mfdataset(nc_files, concat_dim=concat_dim, **dataset_kwargs)
        if exact_dates:
            self.Dataset = self.Dataset.sel(time=self.dataset_dates)
        self.dataset_variables = list(self.Dataset.variables.keys())

Source File: cfsr.py From DLWP with MIT License

6 votes

def open(self, exact_dates=True, concat_dim='time', **dataset_kwargs):
        """
        Open an xarray multi-file Dataset for the processed files with dates set using set_dates(), retrieve(), or
        write(). Once opened, this Dataset is accessible by self.Dataset.

        :param exact_dates: bool: if True, set the Dataset to have the exact dates of this instance; otherwise,
            keep all of the monthly dates in the opened files
        :param concat_dim: passed to xarray.open_mfdataset()
        :param dataset_kwargs: kwargs passed to xarray.open_mfdataset()
        """
        nc_file_dir = '%s/processed' % self._root_directory
        if not self.dataset_dates:
            raise ValueError("use set_dates() to specify times of data to load")
        dates_index = pd.DatetimeIndex(self.dataset_dates).sort_values()
        months = dates_index.to_period('M')
        unique_months = months.unique()
        nc_files = ['%s/%s%s.nc' % (nc_file_dir, self._file_id, d.strftime('%Y%m')) for d in unique_months]
        self.Dataset = xr.open_mfdataset(nc_files, concat_dim=concat_dim, **dataset_kwargs)
        if exact_dates:
            self.Dataset = self.Dataset.sel(time=self.dataset_dates)
        self.dataset_variables = list(self.Dataset.variables.keys())

Source File: data.py From psyplot with GNU General Public License v2.0

6 votes

def _open_ds_from_store(fname, store_mod=None, store_cls=None, **kwargs):
    """Open a dataset and return it"""
    if isinstance(fname, xr.Dataset):
        return fname
    if not isstring(fname):
        try:  # test iterable
            fname[0]
        except TypeError:
            pass
        else:
            if store_mod is not None and store_cls is not None:
                if isstring(store_mod):
                    store_mod = repeat(store_mod)
                if isstring(store_cls):
                    store_cls = repeat(store_cls)
                fname = [_open_store(sm, sc, f)
                         for sm, sc, f in zip(store_mod, store_cls, fname)]
                kwargs['engine'] = None
                kwargs['lock'] = False
                return open_mfdataset(fname, **kwargs)
    if store_mod is not None and store_cls is not None:
        fname = _open_store(store_mod, store_cls, fname)
    return open_dataset(fname, **kwargs)

Source File: test_subset.py From xclim with Apache License 2.0

5 votes

def test_dataset(self):
        da = xr.open_mfdataset(
            [self.nc_file, self.nc_file.replace("tasmax", "tasmin")],
            combine="by_coords",
        )
        out = subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat)
        assert np.all(out.lon >= np.min(self.lon))
        assert np.all(out.lon <= np.max(self.lon))
        assert np.all(out.lat >= np.min(self.lat))
        assert np.all(out.lat <= np.max(self.lat))
        np.testing.assert_array_equal(out.tasmin.shape, out.tasmax.shape)

Source File: data_loader.py From aospy with Apache License 2.0

5 votes

def _preprocess_and_rename_grid_attrs(func, grid_attrs=None, **kwargs):
    """Call a custom preprocessing method first then rename grid attrs.

    This wrapper is needed to generate a single function to pass to the
    ``preprocesss`` of xr.open_mfdataset.  It makes sure that the
    user-specified preprocess function is called on the loaded Dataset before
    aospy's is applied.  An example for why this might be needed is output from
    the WRF model; one needs to add a CF-compliant units attribute to the time
    coordinate of all input files, because it is not present by default.

    Parameters
    ----------
    func : function
       An arbitrary function to call before calling
       ``grid_attrs_to_aospy_names`` in ``_load_data_from_disk``.  Must take
       an xr.Dataset as an argument as well as ``**kwargs``.
    grid_attrs : dict (optional)
        Overriding dictionary of grid attributes mapping aospy internal
        names to names of grid attributes used in a particular model.

    Returns
    -------
    function
        A function that calls the provided function ``func`` on the Dataset
        before calling ``grid_attrs_to_aospy_names``; this is meant to be
        passed as a ``preprocess`` argument to ``xr.open_mfdataset``.
    """

    def func_wrapper(ds):
        return grid_attrs_to_aospy_names(func(ds, **kwargs), grid_attrs)
    return func_wrapper

Source File: netcdf_index.py From cosima-cookbook with Apache License 2.0

5 votes

def decode_time(dataset, time_units, offset):
    """
    Decode and offset time axis for a single dataset (preprocessing step for open_mfdataset).

    See get_nc_variable for documentation on the arguments.
    """

    if 'time' in dataset.coords:
        calendar = None
        if time_units is None:
            time_units = dataset.time.units
            if 'calendar' in dataset.time.attrs:
                calendar = dataset.time.calendar
            elif 'calendar_type' in dataset.time.attrs:
                calendar = dataaray.time.calendar_type
        if offset is not None:
            dataset = rebase_dataset(dataset, time_units, offset=offset)
        try:
            decoded_time = xr.conventions.times.decode_cf_datetime(dataset.time, time_units, calendar=calendar)
        except:  # for compatibility with older xarray (pre-0.10.2 ?)
            decoded_time = xr.conventions.decode_cf_datetime(dataset.time, time_units)
        dataset.coords['time'] = ('time', decoded_time,
                                    {'long_name': 'time', 'decoded_using': time_units }
                                   )

    return dataset

Source File: ek60.py From echopype with Apache License 2.0

5 votes

def _combine_files(self):
        # Do nothing if combine_opt is true if there is nothing to combine
        if not self._temp_path:
            return
        save_path = self.save_path
        split = os.path.splitext(self.save_path)
        all_temp = os.listdir(self._temp_dir)
        file_groups = [[]]
        # Split the files in the temp directory into range_bin groups
        i = 0
        while i < len(all_temp):
            file_groups[-1].append(os.path.join(self._temp_dir, all_temp[i]))
            if "_part" in all_temp[i]:
                i += 1
                file_groups.append([os.path.join(self._temp_dir, all_temp[i])])
            i += 1
        for n, file_group in enumerate(file_groups):
            if len(file_groups) > 1:
                # Construct a new path with _part[n] if there are multiple range_bin lengths
                save_path = split[0] + '_part%02d' % (n + 1) + split[1]
            # Open multiple files as one dataset of each group and save them into a single file
            with xr.open_dataset(file_group[0], group='Provenance') as ds_prov:
                ds_prov.to_netcdf(path=save_path, mode='w', group='Provenance')
            with xr.open_dataset(file_group[0], group='Sonar') as ds_sonar:
                ds_sonar.to_netcdf(path=save_path, mode='a', group='Sonar')
            with xr.open_mfdataset(file_group, group='Beam', combine='by_coords') as ds_beam:
                ds_beam.to_netcdf(path=save_path, mode='a', group='Beam')
            with xr.open_dataset(file_group[0], group='Environment') as ds_env:
                ds_env.to_netcdf(path=save_path, mode='a', group='Environment')
            with xr.open_mfdataset(file_group, group='Platform', combine='by_coords') as ds_plat:
                ds_plat.to_netcdf(path=save_path, mode='a', group='Platform')
            with xr.open_mfdataset(file_group, group='Platform/NMEA',
                                   combine='nested', concat_dim='time', decode_times=False) as ds_nmea:
                ds_nmea.to_netcdf(path=save_path, mode='a', group='Platform/NMEA')

        # Delete temporary folder:
        shutil.rmtree(self._temp_dir)

Source File: ek80.py From echopype with Apache License 2.0

5 votes

def _combine_files(self):
        # Do nothing if combine_opt is true if there is nothing to combine
        if not self._temp_path:
            return
        save_path = self.save_path
        split = os.path.splitext(self.save_path)
        all_temp = os.listdir(self._temp_dir)
        # Group files into cw (index 0) and broadband files (index 1)
        file_groups = [[], []]
        for f in all_temp:
            if "_cw" in f:
                file_groups[0].append(os.path.join(self._temp_dir, f))
            else:
                file_groups[1].append(os.path.join(self._temp_dir, f))

        for n, file_group in enumerate(file_groups):
            if len(file_groups) > 1:
                if not file_groups[n]:
                    # Skip saving either bb or cw if only one or the other is present
                    continue
                save_path = split[0] + '_cw' + split[1] if n == 0 else self.save_path
            # Open multiple files as one dataset of each group and save them into a single file
            with xr.open_dataset(file_group[0], group='Provenance') as ds_prov:
                ds_prov.to_netcdf(path=save_path, mode='w', group='Provenance')
            with xr.open_dataset(file_group[0], group='Sonar') as ds_sonar:
                ds_sonar.to_netcdf(path=save_path, mode='a', group='Sonar')
            with xr.open_mfdataset(file_group, group='Beam', combine='by_coords') as ds_beam:
                ds_beam.to_netcdf(path=save_path, mode='a', group='Beam')
            with xr.open_dataset(file_group[0], group='Environment') as ds_env:
                ds_env.to_netcdf(path=save_path, mode='a', group='Environment')
            with xr.open_mfdataset(file_group, group='Platform', combine='by_coords') as ds_plat:
                ds_plat.to_netcdf(path=save_path, mode='a', group='Platform')
            with xr.open_mfdataset(file_group, group='Platform/NMEA',
                                   combine='nested', concat_dim='time', decode_times=False) as ds_nmea:
                ds_nmea.to_netcdf(path=save_path, mode='a', group='Platform/NMEA')

        # Delete temporary folder:
        shutil.rmtree(self._temp_dir)

Source File: azfp.py From echopype with Apache License 2.0

5 votes

def _combine_files(self):
        # Do nothing if combine_opt is true if there is nothing to combine
        if not self._temp_path:
            return
        save_path = self.save_path
        all_temp = os.listdir(self._temp_dir)
        file_groups = [[]]
        file_groups[0] = [os.path.join(self._temp_dir, file) for file in all_temp]
        for file_group in file_groups:
            # Open multiple files as one dataset of each group and save them into a single file
            with xr.open_dataset(file_group[0], group='Provenance') as ds_prov:
                ds_prov.to_netcdf(path=save_path, mode='w', group='Provenance')
            with xr.open_dataset(file_group[0], group='Sonar') as ds_sonar:
                ds_sonar.to_netcdf(path=save_path, mode='a', group='Sonar')
            with xr.open_mfdataset(file_group, group='Beam', combine='by_coords') as ds_beam:
                ds_beam.to_netcdf(path=save_path, mode='a', group='Beam')
            with xr.open_mfdataset(file_group, group='Environment', combine='by_coords') as ds_env:
                ds_env.to_netcdf(path=save_path, mode='a', group='Environment')
            # The platform group for AZFP does not have coordinates, so it must be handled differently from EK60
            with xr.open_dataset(file_group[0], group='Platform') as ds_plat:
                ds_plat.to_netcdf(path=save_path, mode='a', group='Platform')
            # EK60 does not have the "vendor specific" group
            with xr.open_mfdataset(file_group, group='Vendor', combine='by_coords') as ds_vend:
                ds_vend.to_netcdf(path=save_path, mode='a', group='Vendor')

        # Delete temporary folder:
        shutil.rmtree(self._temp_dir)

Source File: metsim.py From MetSim with GNU General Public License v3.0

5 votes

def open_output(self):
        filenames = [self._get_output_filename(times) for times in self._times]
        return xr.open_mfdataset(filenames)

Source File: io.py From MetSim with GNU General Public License v3.0

5 votes

def read_netcdf(data_handle, domain=None, is_worker=False,
                start=None, stop=None, calendar='standard',
                var_dict=None) -> xr.Dataset:
    """Read in a NetCDF file"""
    if '*' in data_handle:
        ds = xr.open_mfdataset(data_handle)
    else:
        ds = xr.open_dataset(data_handle)

    if domain is not None:
        ds = ds.sel({k: domain[k]
                     for k in list(domain.dims.keys())
                     if k in list(ds.dims.keys())})
    else:
        dims_wo_coords = set(ds.dims) - set(ds.coords)
        for d in dims_wo_coords:
            if is_worker:
                logger = logging.getLogger('MetSim')
                logger.warning(
                    'Setting sequential coordinate on dimension {}'.format(d))
            ds[d] = np.arange(0, len(ds[d]))

    if 'time' in ds.coords:
        if isinstance(ds.indexes['time'], xr.CFTimeIndex):
            ds['time'] = ds.indexes['time'].to_datetimeindex()
        ds['time'] = (ds.indexes['time'] -
                      pd.Timedelta('11H59M59S')).round('D')

    if var_dict is not None:
        var_list = list(var_dict.keys())
        ds = ds[var_list]
        ds = ds.rename(var_dict)

    if start is not None or stop is not None:
        ds = ds.sel(time=slice(start, stop))
        dates = ds.indexes['time']
        ds['day_of_year'] = xr.Variable(('time', ), dates.dayofyear)

    return ds

Source File: cross_registration.py From minian with GNU General Public License v3.0

5 votes

def load_cnm_dataset_mf(path, pattern=r'^cnm.nc$', concat_dim='session'):
    path = os.path.normpath(path)
    cnmlist = []
    for dirpath, dirnames, fnames in os.walk(path):
        cnmnames = filter(lambda fn: re.search(pattern, fn), fnames)
        cnmpath = [os.path.join(dirpath, cnm) for cnm in cnmnames]
        cnmlist += cnmpath
    if len(cnmlist) > 1:
        return xr.open_mfdataset(cnmlist, concat_dim=concat_dim)
    else:
        print("No CNMF dataset found under path: {}".format(path))
        return None

Source File: track_model_results.py From oggm with BSD 3-Clause "New" or "Revised" License

5 votes

def track_avg_temp_full_period(self, gdirs):
        self.cfg_init()
        path = os.path.join(cfg.PATHS['working_dir'], 'climate_input*.nc')
        with xr.open_mfdataset(path, combine='by_coords') as ds:
            return float(ds.temp.mean())

Source File: test_subset.py From xclim with Apache License 2.0

5 votes

def test_dataset(self):
        da = xr.open_mfdataset(
            [self.nc_file, self.nc_file.replace("tasmax", "tasmin")],
            combine="by_coords",
        )
        lon = -72.4
        lat = 46.1
        out = subset.subset_gridpoint(da, lon=lon, lat=lat)
        np.testing.assert_almost_equal(out.lon, lon, 1)
        np.testing.assert_almost_equal(out.lat, lat, 1)
        np.testing.assert_array_equal(out.tasmin.shape, out.tasmax.shape)

Source File: run_xarray.py From recipy with Apache License 2.0

5 votes

def open_mfdataset_glob(self):
        """
        Use xarray.open_mfdataset to read multiple netcdf files with a glob
        pattern.
        """
        pattern = os.path.join(self.data_dir, "*PropertiesRhineMeuse30min.nc")
        xarray.open_mfdataset(pattern)

Source File: run_xarray.py From recipy with Apache License 2.0

5 votes

def open_mfdataset_list(self):
        """
        Use xarray.open_mfdataset to read multiple netcdf files from a list.
        """
        file_names = [os.path.join(self.data_dir, f)
                      for f in ('soilPropertiesRhineMeuse30min.nc',
                                'topoPropertiesRhineMeuse30min.nc')]
        xarray.open_mfdataset(file_names)

Source File: era5.py From DLWP with MIT License

5 votes

def open(self, **dataset_kwargs):
        """
        Open an xarray multi-file Dataset for the processed files. Set the variables and levels with the instance
        set_variables and set_levels methods. Once opened, this Dataset is accessible by self.Dataset.

        :param dataset_kwargs: kwargs passed to xarray.open_mfdataset()
        """
        if len(self.dataset_variables) == 0:
            raise ValueError('set the variables to open with the set_variables() method')
        if len(self.dataset_levels) == 0:
            raise ValueError('set the pressure levels to open with the set_levels() method')
        self._set_file_names()
        self.Dataset = xr.open_mfdataset(self.raw_files, **dataset_kwargs)
        self.dataset_dates = self.Dataset['time']

Source File: track_model_results.py From oggm with BSD 3-Clause "New" or "Revised" License

5 votes

def track_start_volume(self, gdirs):
        self.cfg_init()
        path = os.path.join(cfg.PATHS['working_dir'], 'run_output_tstar*.nc')
        with xr.open_mfdataset(path, combine='by_coords') as ds:
            return float(ds.volume.sum(dim='rgi_id').isel(time=0)) * 1e-9

Source File: track_model_results.py From oggm with BSD 3-Clause "New" or "Revised" License

5 votes

def track_tstar_run_final_volume(self, gdirs):
        self.cfg_init()
        path = os.path.join(cfg.PATHS['working_dir'], 'run_output_tstar*.nc')
        with xr.open_mfdataset(path, combine='by_coords') as ds:
            return float(ds.volume.sum(dim='rgi_id').isel(time=-1)) * 1e-9

Source File: track_model_results.py From oggm with BSD 3-Clause "New" or "Revised" License

5 votes

def track_1990_run_final_volume(self, gdirs):
        self.cfg_init()
        path = os.path.join(cfg.PATHS['working_dir'], 'run_output_pd*.nc')
        with xr.open_mfdataset(path, combine='by_coords') as ds:
            return float(ds.volume.sum(dim='rgi_id').isel(time=-1)) * 1e-9

Source File: NetCDFField.py From skinnywms with Apache License 2.0

5 votes

def get_fields(self):
        with closing(xr.open_mfdataset(self.path)) as ds:
            return self._get_fields(ds)

Source File: track_model_results.py From oggm with BSD 3-Clause "New" or "Revised" License

5 votes

def track_avg_prcp_full_period(self, gdirs):
        self.cfg_init()
        path = os.path.join(cfg.PATHS['working_dir'], 'climate_input*.nc')
        with xr.open_mfdataset(path, combine='by_coords') as ds:
            return float(ds.prcp.mean())

Source File: data_loader.py From aospy with Apache License 2.0

4 votes

def _load_data_from_disk(file_set, preprocess_func=lambda ds: ds,
                         data_vars='minimal', coords='minimal',
                         grid_attrs=None, **kwargs):
    """Load a Dataset from a list or glob-string of files.

    Datasets from files are concatenated along time,
    and all grid attributes are renamed to their aospy internal names.

    Parameters
    ----------
    file_set : list or str
        List of paths to files or glob-string
    preprocess_func : function (optional)
        Custom function to call before applying any aospy logic
        to the loaded dataset
    data_vars : str (default 'minimal')
        Mode for concatenating data variables in call to ``xr.open_mfdataset``
    coords : str (default 'minimal')
        Mode for concatenating coordinate variables in call to
        ``xr.open_mfdataset``.
    grid_attrs : dict
        Overriding dictionary of grid attributes mapping aospy internal
        names to names of grid attributes used in a particular model.

    Returns
    -------
    Dataset

    """
    apply_preload_user_commands(file_set)
    func = _preprocess_and_rename_grid_attrs(preprocess_func, grid_attrs,
                                             **kwargs)
    return xr.open_mfdataset(
        file_set,
        preprocess=func,
        combine='by_coords',
        decode_times=False,
        decode_coords=False,
        mask_and_scale=True,
        data_vars=data_vars,
        coords=coords,
    )

Source File: data.py From psyplot with GNU General Public License v2.0

4 votes

def open_mfdataset(paths, decode_cf=True, decode_times=True,
                   decode_coords=True, engine=None, gridfile=None,
                   t_format=None, **kwargs):
    """
    Open multiple files as a single dataset.

    This function is essentially the same as the :func:`xarray.open_mfdataset`
    function but (as the :func:`open_dataset`) supports additional decoding
    and the ``'gdal'`` engine.
    You can further specify the `t_format` parameter to get the time
    information from the files and use the results to concatenate the files

    Parameters
    ----------
    %(xarray.open_mfdataset.parameters.no_engine)s
    %(open_dataset.parameters.engine)s
    %(get_tdata.parameters.t_format)s
    %(CFDecoder.decode_coords.parameters.gridfile)s

    Returns
    -------
    xarray.Dataset
        The dataset that contains the variables from `filename_or_obj`"""
    if t_format is not None or engine == 'gdal':
        if isinstance(paths, six.string_types):
            paths = sorted(glob(paths))
        if not paths:
            raise IOError('no files to open')
    if t_format is not None:
        time, paths = get_tdata(t_format, paths)
        kwargs['concat_dim'] = 'time'
        if xr_version > (0, 11):
            kwargs['combine'] = 'nested'
    if engine == 'gdal':
        from psyplot.gdal_store import GdalStore
        paths = list(map(GdalStore, paths))
        engine = None
        kwargs['lock'] = False

    ds = xr.open_mfdataset(
        paths, decode_cf=decode_cf, decode_times=decode_times, engine=engine,
        decode_coords=False, **kwargs)
    if decode_cf:
        ds = CFDecoder.decode_ds(ds, gridfile=gridfile,
                                 decode_coords=decode_coords,
                                 decode_times=decode_times)
    ds.psy._concat_dim = kwargs.get('concat_dim')
    if t_format is not None:
        ds['time'] = time
    return ds

Source File: netcdf_data.py From Ocean-Data-Map-Project with GNU General Public License v3.0

4 votes

def __enter__(self):
        if not self.meta_only:
            # Don't decode times since we do it anyways.
            decode_times = False

            if self._nc_files:
                try:
                    self.dataset = xarray.open_mfdataset(
                        self._nc_files,
                        decode_times=decode_times,
                        chunks=200,
                    )
                except xarray.core.variable.MissingDimensionsError:
                    # xarray won't open FVCOM files due to dimension/coordinate/variable label
                    # duplication issue, so fall back to using netCDF4.Dataset()
                    self.dataset = netCDF4.MFDataset(self._nc_files)
            else:
                try:
                    # Handle list of URLs for staggered grid velocity field datasets
                    url = self.url if isinstance(self.url, list) else [self.url]
                    # This will raise a FutureWarning for xarray>=0.12.2.
                    # That warning should be resolvable by changing to:
                    # fields = xarray.open_mfdataset(self.url, combine="by_coords", decode_times=decode_times)
                    fields = xarray.open_mfdataset(url, decode_times=decode_times)
                except xarray.core.variable.MissingDimensionsError:
                    # xarray won't open FVCOM files due to dimension/coordinate/variable label
                    # duplication issue, so fall back to using netCDF4.Dataset()
                    fields = netCDF4.Dataset(self.url)
                if getattr(self._dataset_config, "geo_ref", {}):
                    drop_variables = self._dataset_config.geo_ref.get("drop_variables", [])
                    geo_refs = xarray.open_dataset(
                        self._dataset_config.geo_ref["url"], drop_variables=drop_variables,
                    )
                    fields = fields.merge(geo_refs)
                self.dataset = fields

            if self._grid_angle_file_url:
                angle_file = xarray.open_dataset(
                    self._grid_angle_file_url,
                    drop_variables=[self._dataset_config.lat_var_key, self._dataset_config.lon_var_key]
                )
                self.dataset = self.dataset.merge(angle_file)
                angle_file.close()

            self._dataset_open = True

        return self

Python xarray.open_mfdataset() Examples