Python xarray.open_mfdataset() Examples

The following are 30 code examples of xarray.open_mfdataset(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module xarray , or try the search function .
Example #1
Source File: model.py    From aospy with Apache License 2.0 7 votes vote down vote up
def _get_grid_files(self):
        """Get the files holding grid data for an aospy object."""
        grid_file_paths = self.grid_file_paths
        datasets = []
        if isinstance(grid_file_paths, str):
            grid_file_paths = [grid_file_paths]
        for path in grid_file_paths:
            try:
                ds = xr.open_dataset(path, decode_times=False)
            except (TypeError, AttributeError):
                ds = xr.open_mfdataset(path, decode_times=False,
                                       combine='by_coords').load()
            except (RuntimeError, OSError) as e:
                msg = str(e) + ': {}'.format(path)
                raise RuntimeError(msg)
            datasets.append(ds)
        return tuple(datasets) 
Example #2
Source File: sarah.py    From atlite with GNU General Public License v3.0 6 votes vote down vote up
def prepare_meta_sarah(xs, ys, year, month, template_sis, template_sid, module, resolution=resolution):
    fns = [next(glob.iglob(t.format(year=year, month=month)))
           for t in (template_sis, template_sid)]

    with xr.open_mfdataset(fns, compat='identical') as ds:
        ds = _rename_and_clean_coords(ds)
        ds = ds.coords.to_dataset()

        t = pd.Timestamp(year=year, month=month, day=1)
        ds['time'] = pd.date_range(t, t + pd.DateOffset(months=1),
                                   freq='1h', closed='left')

        if resolution is not None:
            def p(s):
                s += 0.1*resolution
                return s - (s % resolution)
            xs = np.arange(p(xs.start), p(xs.stop) + 1.1*resolution, resolution)
            ys = np.arange(p(ys.start), p(ys.stop) - 0.1*resolution, - resolution)
            ds = ds.sel(x=xs, y=ys, method='nearest')
        else:
            ds = ds.sel(x=as_slice(xs), y=as_slice(ys))

        return ds.load() 
Example #3
Source File: imagery.py    From glmtools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def open_glm_time_series(filenames, chunks=None):
    """ Convenience function for combining individual 1-min GLM gridded imagery
    files into a single xarray.Dataset with a time dimension.
    
    Creates an index on the time dimension.
    
    The time dimension will be in the order in which the files are listed
    due to the behavior of combine='nested' in open_mfdataset.
    
    Adjusts the time_coverage_start and time_coverage_end metadata.
    """
    # Need to fix time_coverage_start and _end in concat dataset
    starts = [t for t in gen_file_times(filenames)]
    ends = [t for t in gen_file_times(filenames, time_attr='time_coverage_end')]
    
    d = xr.open_mfdataset(filenames, concat_dim='time', chunks=chunks, combine='nested')
    d['time'] = starts
    d = d.set_index({'time':'time'})
    d = d.set_coords('time')
    
    d.attrs['time_coverage_start'] = pd.Timestamp(min(starts)).isoformat()
    d.attrs['time_coverage_end'] = pd.Timestamp(max(ends)).isoformat()

    return d 
Example #4
Source File: __main__.py    From cfgrib with Apache License 2.0 6 votes vote down vote up
def to_netcdf(inpaths, outpath, cdm, engine):
    import cf2cdm
    import xarray as xr

    # NOTE: noop if no input argument
    if len(inpaths) == 0:
        return

    if not outpath:
        outpath = os.path.splitext(inpaths[0])[0] + '.nc'

    ds = xr.open_mfdataset(inpaths, engine=engine, combine='by_coords')
    if cdm:
        coord_model = getattr(cf2cdm, cdm)
        ds = cf2cdm.translate_coords(ds, coord_model=coord_model)
    ds.to_netcdf(outpath) 
Example #5
Source File: netcdf.py    From intake-xarray with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def _open_dataset(self):
        import xarray as xr
        url = self.urlpath

        kwargs = self.xarray_kwargs

        if "*" in url or isinstance(url, list):
            _open_dataset = xr.open_mfdataset
            if self.pattern:
                kwargs.update(preprocess=self._add_path_to_ds)
            if self.combine is not None:
                if 'combine' in kwargs:
                    raise Exception("Setting 'combine' argument twice  in the catalog is invalid")
                kwargs.update(combine=self.combine)
            if self.concat_dim is not None:
                if 'concat_dim' in kwargs:
                    raise Exception("Setting 'concat_dim' argument twice  in the catalog is invalid")
                kwargs.update(concat_dim=self.concat_dim)
        else:
            _open_dataset = xr.open_dataset
        url = fsspec.open_local(url, **self.storage_options)

        self._ds = _open_dataset(url, chunks=self.chunks, **kwargs) 
Example #6
Source File: cfsr.py    From DLWP with MIT License 6 votes vote down vote up
def open(self, exact_dates=True, concat_dim='time', **dataset_kwargs):
        """
        Open an xarray multi-file Dataset for the processed files with dates set using set_dates(), retrieve(), or
        write(). Once opened, this Dataset is accessible by self.Dataset.

        :param exact_dates: bool: if True, set the Dataset to have the exact dates of this instance; otherwise,
            keep all of the monthly dates in the opened files
        :param concat_dim: passed to xarray.open_mfdataset()
        :param dataset_kwargs: kwargs passed to xarray.open_mfdataset()
        """
        nc_file_dir = '%s/processed' % self._root_directory
        if not self.dataset_dates:
            raise ValueError("use set_dates() to specify times of data to load")
        dates_index = pd.DatetimeIndex(self.dataset_dates).sort_values()
        months = dates_index.to_period('M')
        unique_months = months.unique()
        nc_files = ['%s/%sfcst_%s.nc' % (nc_file_dir, self._file_id, d.strftime('%Y%m')) for d in unique_months]
        self.Dataset = xr.open_mfdataset(nc_files, concat_dim=concat_dim, **dataset_kwargs)
        if exact_dates:
            self.Dataset = self.Dataset.sel(time=self.dataset_dates)
        self.dataset_variables = list(self.Dataset.variables.keys()) 
Example #7
Source File: cfsr.py    From DLWP with MIT License 6 votes vote down vote up
def open(self, exact_dates=True, concat_dim='time', **dataset_kwargs):
        """
        Open an xarray multi-file Dataset for the processed files with dates set using set_dates(), retrieve(), or
        write(). Once opened, this Dataset is accessible by self.Dataset.

        :param exact_dates: bool: if True, set the Dataset to have the exact dates of this instance; otherwise,
            keep all of the monthly dates in the opened files
        :param concat_dim: passed to xarray.open_mfdataset()
        :param dataset_kwargs: kwargs passed to xarray.open_mfdataset()
        """
        nc_file_dir = '%s/processed' % self._root_directory
        if not self.dataset_dates:
            raise ValueError("use set_dates() to specify times of data to load")
        dates_index = pd.DatetimeIndex(self.dataset_dates).sort_values()
        months = dates_index.to_period('M')
        unique_months = months.unique()
        nc_files = ['%s/%s%s.nc' % (nc_file_dir, self._file_id, d.strftime('%Y%m')) for d in unique_months]
        self.Dataset = xr.open_mfdataset(nc_files, concat_dim=concat_dim, **dataset_kwargs)
        if exact_dates:
            self.Dataset = self.Dataset.sel(time=self.dataset_dates)
        self.dataset_variables = list(self.Dataset.variables.keys()) 
Example #8
Source File: data.py    From psyplot with GNU General Public License v2.0 6 votes vote down vote up
def _open_ds_from_store(fname, store_mod=None, store_cls=None, **kwargs):
    """Open a dataset and return it"""
    if isinstance(fname, xr.Dataset):
        return fname
    if not isstring(fname):
        try:  # test iterable
            fname[0]
        except TypeError:
            pass
        else:
            if store_mod is not None and store_cls is not None:
                if isstring(store_mod):
                    store_mod = repeat(store_mod)
                if isstring(store_cls):
                    store_cls = repeat(store_cls)
                fname = [_open_store(sm, sc, f)
                         for sm, sc, f in zip(store_mod, store_cls, fname)]
                kwargs['engine'] = None
                kwargs['lock'] = False
                return open_mfdataset(fname, **kwargs)
    if store_mod is not None and store_cls is not None:
        fname = _open_store(store_mod, store_cls, fname)
    return open_dataset(fname, **kwargs) 
Example #9
Source File: test_subset.py    From xclim with Apache License 2.0 5 votes vote down vote up
def test_dataset(self):
        da = xr.open_mfdataset(
            [self.nc_file, self.nc_file.replace("tasmax", "tasmin")],
            combine="by_coords",
        )
        out = subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat)
        assert np.all(out.lon >= np.min(self.lon))
        assert np.all(out.lon <= np.max(self.lon))
        assert np.all(out.lat >= np.min(self.lat))
        assert np.all(out.lat <= np.max(self.lat))
        np.testing.assert_array_equal(out.tasmin.shape, out.tasmax.shape) 
Example #10
Source File: data_loader.py    From aospy with Apache License 2.0 5 votes vote down vote up
def _preprocess_and_rename_grid_attrs(func, grid_attrs=None, **kwargs):
    """Call a custom preprocessing method first then rename grid attrs.

    This wrapper is needed to generate a single function to pass to the
    ``preprocesss`` of xr.open_mfdataset.  It makes sure that the
    user-specified preprocess function is called on the loaded Dataset before
    aospy's is applied.  An example for why this might be needed is output from
    the WRF model; one needs to add a CF-compliant units attribute to the time
    coordinate of all input files, because it is not present by default.

    Parameters
    ----------
    func : function
       An arbitrary function to call before calling
       ``grid_attrs_to_aospy_names`` in ``_load_data_from_disk``.  Must take
       an xr.Dataset as an argument as well as ``**kwargs``.
    grid_attrs : dict (optional)
        Overriding dictionary of grid attributes mapping aospy internal
        names to names of grid attributes used in a particular model.

    Returns
    -------
    function
        A function that calls the provided function ``func`` on the Dataset
        before calling ``grid_attrs_to_aospy_names``; this is meant to be
        passed as a ``preprocess`` argument to ``xr.open_mfdataset``.
    """

    def func_wrapper(ds):
        return grid_attrs_to_aospy_names(func(ds, **kwargs), grid_attrs)
    return func_wrapper 
Example #11
Source File: netcdf_index.py    From cosima-cookbook with Apache License 2.0 5 votes vote down vote up
def decode_time(dataset, time_units, offset):
    """
    Decode and offset time axis for a single dataset (preprocessing step for open_mfdataset).

    See get_nc_variable for documentation on the arguments.
    """

    if 'time' in dataset.coords:
        calendar = None
        if time_units is None:
            time_units = dataset.time.units
            if 'calendar' in dataset.time.attrs:
                calendar = dataset.time.calendar
            elif 'calendar_type' in dataset.time.attrs:
                calendar = dataaray.time.calendar_type
        if offset is not None:
            dataset = rebase_dataset(dataset, time_units, offset=offset)
        try:
            decoded_time = xr.conventions.times.decode_cf_datetime(dataset.time, time_units, calendar=calendar)
        except:  # for compatibility with older xarray (pre-0.10.2 ?)
            decoded_time = xr.conventions.decode_cf_datetime(dataset.time, time_units)
        dataset.coords['time'] = ('time', decoded_time,
                                    {'long_name': 'time', 'decoded_using': time_units }
                                   )

    return dataset 
Example #12
Source File: ek60.py    From echopype with Apache License 2.0 5 votes vote down vote up
def _combine_files(self):
        # Do nothing if combine_opt is true if there is nothing to combine
        if not self._temp_path:
            return
        save_path = self.save_path
        split = os.path.splitext(self.save_path)
        all_temp = os.listdir(self._temp_dir)
        file_groups = [[]]
        # Split the files in the temp directory into range_bin groups
        i = 0
        while i < len(all_temp):
            file_groups[-1].append(os.path.join(self._temp_dir, all_temp[i]))
            if "_part" in all_temp[i]:
                i += 1
                file_groups.append([os.path.join(self._temp_dir, all_temp[i])])
            i += 1
        for n, file_group in enumerate(file_groups):
            if len(file_groups) > 1:
                # Construct a new path with _part[n] if there are multiple range_bin lengths
                save_path = split[0] + '_part%02d' % (n + 1) + split[1]
            # Open multiple files as one dataset of each group and save them into a single file
            with xr.open_dataset(file_group[0], group='Provenance') as ds_prov:
                ds_prov.to_netcdf(path=save_path, mode='w', group='Provenance')
            with xr.open_dataset(file_group[0], group='Sonar') as ds_sonar:
                ds_sonar.to_netcdf(path=save_path, mode='a', group='Sonar')
            with xr.open_mfdataset(file_group, group='Beam', combine='by_coords') as ds_beam:
                ds_beam.to_netcdf(path=save_path, mode='a', group='Beam')
            with xr.open_dataset(file_group[0], group='Environment') as ds_env:
                ds_env.to_netcdf(path=save_path, mode='a', group='Environment')
            with xr.open_mfdataset(file_group, group='Platform', combine='by_coords') as ds_plat:
                ds_plat.to_netcdf(path=save_path, mode='a', group='Platform')
            with xr.open_mfdataset(file_group, group='Platform/NMEA',
                                   combine='nested', concat_dim='time', decode_times=False) as ds_nmea:
                ds_nmea.to_netcdf(path=save_path, mode='a', group='Platform/NMEA')

        # Delete temporary folder:
        shutil.rmtree(self._temp_dir) 
Example #13
Source File: ek80.py    From echopype with Apache License 2.0 5 votes vote down vote up
def _combine_files(self):
        # Do nothing if combine_opt is true if there is nothing to combine
        if not self._temp_path:
            return
        save_path = self.save_path
        split = os.path.splitext(self.save_path)
        all_temp = os.listdir(self._temp_dir)
        # Group files into cw (index 0) and broadband files (index 1)
        file_groups = [[], []]
        for f in all_temp:
            if "_cw" in f:
                file_groups[0].append(os.path.join(self._temp_dir, f))
            else:
                file_groups[1].append(os.path.join(self._temp_dir, f))

        for n, file_group in enumerate(file_groups):
            if len(file_groups) > 1:
                if not file_groups[n]:
                    # Skip saving either bb or cw if only one or the other is present
                    continue
                save_path = split[0] + '_cw' + split[1] if n == 0 else self.save_path
            # Open multiple files as one dataset of each group and save them into a single file
            with xr.open_dataset(file_group[0], group='Provenance') as ds_prov:
                ds_prov.to_netcdf(path=save_path, mode='w', group='Provenance')
            with xr.open_dataset(file_group[0], group='Sonar') as ds_sonar:
                ds_sonar.to_netcdf(path=save_path, mode='a', group='Sonar')
            with xr.open_mfdataset(file_group, group='Beam', combine='by_coords') as ds_beam:
                ds_beam.to_netcdf(path=save_path, mode='a', group='Beam')
            with xr.open_dataset(file_group[0], group='Environment') as ds_env:
                ds_env.to_netcdf(path=save_path, mode='a', group='Environment')
            with xr.open_mfdataset(file_group, group='Platform', combine='by_coords') as ds_plat:
                ds_plat.to_netcdf(path=save_path, mode='a', group='Platform')
            with xr.open_mfdataset(file_group, group='Platform/NMEA',
                                   combine='nested', concat_dim='time', decode_times=False) as ds_nmea:
                ds_nmea.to_netcdf(path=save_path, mode='a', group='Platform/NMEA')

        # Delete temporary folder:
        shutil.rmtree(self._temp_dir) 
Example #14
Source File: azfp.py    From echopype with Apache License 2.0 5 votes vote down vote up
def _combine_files(self):
        # Do nothing if combine_opt is true if there is nothing to combine
        if not self._temp_path:
            return
        save_path = self.save_path
        all_temp = os.listdir(self._temp_dir)
        file_groups = [[]]
        file_groups[0] = [os.path.join(self._temp_dir, file) for file in all_temp]
        for file_group in file_groups:
            # Open multiple files as one dataset of each group and save them into a single file
            with xr.open_dataset(file_group[0], group='Provenance') as ds_prov:
                ds_prov.to_netcdf(path=save_path, mode='w', group='Provenance')
            with xr.open_dataset(file_group[0], group='Sonar') as ds_sonar:
                ds_sonar.to_netcdf(path=save_path, mode='a', group='Sonar')
            with xr.open_mfdataset(file_group, group='Beam', combine='by_coords') as ds_beam:
                ds_beam.to_netcdf(path=save_path, mode='a', group='Beam')
            with xr.open_mfdataset(file_group, group='Environment', combine='by_coords') as ds_env:
                ds_env.to_netcdf(path=save_path, mode='a', group='Environment')
            # The platform group for AZFP does not have coordinates, so it must be handled differently from EK60
            with xr.open_dataset(file_group[0], group='Platform') as ds_plat:
                ds_plat.to_netcdf(path=save_path, mode='a', group='Platform')
            # EK60 does not have the "vendor specific" group
            with xr.open_mfdataset(file_group, group='Vendor', combine='by_coords') as ds_vend:
                ds_vend.to_netcdf(path=save_path, mode='a', group='Vendor')

        # Delete temporary folder:
        shutil.rmtree(self._temp_dir) 
Example #15
Source File: metsim.py    From MetSim with GNU General Public License v3.0 5 votes vote down vote up
def open_output(self):
        filenames = [self._get_output_filename(times) for times in self._times]
        return xr.open_mfdataset(filenames) 
Example #16
Source File: io.py    From MetSim with GNU General Public License v3.0 5 votes vote down vote up
def read_netcdf(data_handle, domain=None, is_worker=False,
                start=None, stop=None, calendar='standard',
                var_dict=None) -> xr.Dataset:
    """Read in a NetCDF file"""
    if '*' in data_handle:
        ds = xr.open_mfdataset(data_handle)
    else:
        ds = xr.open_dataset(data_handle)

    if domain is not None:
        ds = ds.sel({k: domain[k]
                     for k in list(domain.dims.keys())
                     if k in list(ds.dims.keys())})
    else:
        dims_wo_coords = set(ds.dims) - set(ds.coords)
        for d in dims_wo_coords:
            if is_worker:
                logger = logging.getLogger('MetSim')
                logger.warning(
                    'Setting sequential coordinate on dimension {}'.format(d))
            ds[d] = np.arange(0, len(ds[d]))

    if 'time' in ds.coords:
        if isinstance(ds.indexes['time'], xr.CFTimeIndex):
            ds['time'] = ds.indexes['time'].to_datetimeindex()
        ds['time'] = (ds.indexes['time'] -
                      pd.Timedelta('11H59M59S')).round('D')

    if var_dict is not None:
        var_list = list(var_dict.keys())
        ds = ds[var_list]
        ds = ds.rename(var_dict)

    if start is not None or stop is not None:
        ds = ds.sel(time=slice(start, stop))
        dates = ds.indexes['time']
        ds['day_of_year'] = xr.Variable(('time', ), dates.dayofyear)

    return ds 
Example #17
Source File: cross_registration.py    From minian with GNU General Public License v3.0 5 votes vote down vote up
def load_cnm_dataset_mf(path, pattern=r'^cnm.nc$', concat_dim='session'):
    path = os.path.normpath(path)
    cnmlist = []
    for dirpath, dirnames, fnames in os.walk(path):
        cnmnames = filter(lambda fn: re.search(pattern, fn), fnames)
        cnmpath = [os.path.join(dirpath, cnm) for cnm in cnmnames]
        cnmlist += cnmpath
    if len(cnmlist) > 1:
        return xr.open_mfdataset(cnmlist, concat_dim=concat_dim)
    else:
        print("No CNMF dataset found under path: {}".format(path))
        return None 
Example #18
Source File: track_model_results.py    From oggm with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def track_avg_temp_full_period(self, gdirs):
        self.cfg_init()
        path = os.path.join(cfg.PATHS['working_dir'], 'climate_input*.nc')
        with xr.open_mfdataset(path, combine='by_coords') as ds:
            return float(ds.temp.mean()) 
Example #19
Source File: test_subset.py    From xclim with Apache License 2.0 5 votes vote down vote up
def test_dataset(self):
        da = xr.open_mfdataset(
            [self.nc_file, self.nc_file.replace("tasmax", "tasmin")],
            combine="by_coords",
        )
        lon = -72.4
        lat = 46.1
        out = subset.subset_gridpoint(da, lon=lon, lat=lat)
        np.testing.assert_almost_equal(out.lon, lon, 1)
        np.testing.assert_almost_equal(out.lat, lat, 1)
        np.testing.assert_array_equal(out.tasmin.shape, out.tasmax.shape) 
Example #20
Source File: run_xarray.py    From recipy with Apache License 2.0 5 votes vote down vote up
def open_mfdataset_glob(self):
        """
        Use xarray.open_mfdataset to read multiple netcdf files with a glob
        pattern.
        """
        pattern = os.path.join(self.data_dir, "*PropertiesRhineMeuse30min.nc")
        xarray.open_mfdataset(pattern) 
Example #21
Source File: run_xarray.py    From recipy with Apache License 2.0 5 votes vote down vote up
def open_mfdataset_list(self):
        """
        Use xarray.open_mfdataset to read multiple netcdf files from a list.
        """
        file_names = [os.path.join(self.data_dir, f)
                      for f in ('soilPropertiesRhineMeuse30min.nc',
                                'topoPropertiesRhineMeuse30min.nc')]
        xarray.open_mfdataset(file_names) 
Example #22
Source File: era5.py    From DLWP with MIT License 5 votes vote down vote up
def open(self, **dataset_kwargs):
        """
        Open an xarray multi-file Dataset for the processed files. Set the variables and levels with the instance
        set_variables and set_levels methods. Once opened, this Dataset is accessible by self.Dataset.

        :param dataset_kwargs: kwargs passed to xarray.open_mfdataset()
        """
        if len(self.dataset_variables) == 0:
            raise ValueError('set the variables to open with the set_variables() method')
        if len(self.dataset_levels) == 0:
            raise ValueError('set the pressure levels to open with the set_levels() method')
        self._set_file_names()
        self.Dataset = xr.open_mfdataset(self.raw_files, **dataset_kwargs)
        self.dataset_dates = self.Dataset['time'] 
Example #23
Source File: track_model_results.py    From oggm with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def track_start_volume(self, gdirs):
        self.cfg_init()
        path = os.path.join(cfg.PATHS['working_dir'], 'run_output_tstar*.nc')
        with xr.open_mfdataset(path, combine='by_coords') as ds:
            return float(ds.volume.sum(dim='rgi_id').isel(time=0)) * 1e-9 
Example #24
Source File: track_model_results.py    From oggm with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def track_tstar_run_final_volume(self, gdirs):
        self.cfg_init()
        path = os.path.join(cfg.PATHS['working_dir'], 'run_output_tstar*.nc')
        with xr.open_mfdataset(path, combine='by_coords') as ds:
            return float(ds.volume.sum(dim='rgi_id').isel(time=-1)) * 1e-9 
Example #25
Source File: track_model_results.py    From oggm with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def track_1990_run_final_volume(self, gdirs):
        self.cfg_init()
        path = os.path.join(cfg.PATHS['working_dir'], 'run_output_pd*.nc')
        with xr.open_mfdataset(path, combine='by_coords') as ds:
            return float(ds.volume.sum(dim='rgi_id').isel(time=-1)) * 1e-9 
Example #26
Source File: NetCDFField.py    From skinnywms with Apache License 2.0 5 votes vote down vote up
def get_fields(self):
        with closing(xr.open_mfdataset(self.path)) as ds:
            return self._get_fields(ds) 
Example #27
Source File: track_model_results.py    From oggm with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def track_avg_prcp_full_period(self, gdirs):
        self.cfg_init()
        path = os.path.join(cfg.PATHS['working_dir'], 'climate_input*.nc')
        with xr.open_mfdataset(path, combine='by_coords') as ds:
            return float(ds.prcp.mean()) 
Example #28
Source File: data_loader.py    From aospy with Apache License 2.0 4 votes vote down vote up
def _load_data_from_disk(file_set, preprocess_func=lambda ds: ds,
                         data_vars='minimal', coords='minimal',
                         grid_attrs=None, **kwargs):
    """Load a Dataset from a list or glob-string of files.

    Datasets from files are concatenated along time,
    and all grid attributes are renamed to their aospy internal names.

    Parameters
    ----------
    file_set : list or str
        List of paths to files or glob-string
    preprocess_func : function (optional)
        Custom function to call before applying any aospy logic
        to the loaded dataset
    data_vars : str (default 'minimal')
        Mode for concatenating data variables in call to ``xr.open_mfdataset``
    coords : str (default 'minimal')
        Mode for concatenating coordinate variables in call to
        ``xr.open_mfdataset``.
    grid_attrs : dict
        Overriding dictionary of grid attributes mapping aospy internal
        names to names of grid attributes used in a particular model.

    Returns
    -------
    Dataset

    """
    apply_preload_user_commands(file_set)
    func = _preprocess_and_rename_grid_attrs(preprocess_func, grid_attrs,
                                             **kwargs)
    return xr.open_mfdataset(
        file_set,
        preprocess=func,
        combine='by_coords',
        decode_times=False,
        decode_coords=False,
        mask_and_scale=True,
        data_vars=data_vars,
        coords=coords,
    ) 
Example #29
Source File: data.py    From psyplot with GNU General Public License v2.0 4 votes vote down vote up
def open_mfdataset(paths, decode_cf=True, decode_times=True,
                   decode_coords=True, engine=None, gridfile=None,
                   t_format=None, **kwargs):
    """
    Open multiple files as a single dataset.

    This function is essentially the same as the :func:`xarray.open_mfdataset`
    function but (as the :func:`open_dataset`) supports additional decoding
    and the ``'gdal'`` engine.
    You can further specify the `t_format` parameter to get the time
    information from the files and use the results to concatenate the files

    Parameters
    ----------
    %(xarray.open_mfdataset.parameters.no_engine)s
    %(open_dataset.parameters.engine)s
    %(get_tdata.parameters.t_format)s
    %(CFDecoder.decode_coords.parameters.gridfile)s

    Returns
    -------
    xarray.Dataset
        The dataset that contains the variables from `filename_or_obj`"""
    if t_format is not None or engine == 'gdal':
        if isinstance(paths, six.string_types):
            paths = sorted(glob(paths))
        if not paths:
            raise IOError('no files to open')
    if t_format is not None:
        time, paths = get_tdata(t_format, paths)
        kwargs['concat_dim'] = 'time'
        if xr_version > (0, 11):
            kwargs['combine'] = 'nested'
    if engine == 'gdal':
        from psyplot.gdal_store import GdalStore
        paths = list(map(GdalStore, paths))
        engine = None
        kwargs['lock'] = False

    ds = xr.open_mfdataset(
        paths, decode_cf=decode_cf, decode_times=decode_times, engine=engine,
        decode_coords=False, **kwargs)
    if decode_cf:
        ds = CFDecoder.decode_ds(ds, gridfile=gridfile,
                                 decode_coords=decode_coords,
                                 decode_times=decode_times)
    ds.psy._concat_dim = kwargs.get('concat_dim')
    if t_format is not None:
        ds['time'] = time
    return ds 
Example #30
Source File: netcdf_data.py    From Ocean-Data-Map-Project with GNU General Public License v3.0 4 votes vote down vote up
def __enter__(self):
        if not self.meta_only:
            # Don't decode times since we do it anyways.
            decode_times = False

            if self._nc_files:
                try:
                    self.dataset = xarray.open_mfdataset(
                        self._nc_files,
                        decode_times=decode_times,
                        chunks=200,
                    )
                except xarray.core.variable.MissingDimensionsError:
                    # xarray won't open FVCOM files due to dimension/coordinate/variable label
                    # duplication issue, so fall back to using netCDF4.Dataset()
                    self.dataset = netCDF4.MFDataset(self._nc_files)
            else:
                try:
                    # Handle list of URLs for staggered grid velocity field datasets
                    url = self.url if isinstance(self.url, list) else [self.url]
                    # This will raise a FutureWarning for xarray>=0.12.2.
                    # That warning should be resolvable by changing to:
                    # fields = xarray.open_mfdataset(self.url, combine="by_coords", decode_times=decode_times)
                    fields = xarray.open_mfdataset(url, decode_times=decode_times)
                except xarray.core.variable.MissingDimensionsError:
                    # xarray won't open FVCOM files due to dimension/coordinate/variable label
                    # duplication issue, so fall back to using netCDF4.Dataset()
                    fields = netCDF4.Dataset(self.url)
                if getattr(self._dataset_config, "geo_ref", {}):
                    drop_variables = self._dataset_config.geo_ref.get("drop_variables", [])
                    geo_refs = xarray.open_dataset(
                        self._dataset_config.geo_ref["url"], drop_variables=drop_variables,
                    )
                    fields = fields.merge(geo_refs)
                self.dataset = fields

            if self._grid_angle_file_url:
                angle_file = xarray.open_dataset(
                    self._grid_angle_file_url,
                    drop_variables=[self._dataset_config.lat_var_key, self._dataset_config.lon_var_key]
                )
                self.dataset = self.dataset.merge(angle_file)
                angle_file.close()

            self._dataset_open = True

        return self