Python dask.array.compute() Examples
The following are 30
code examples of dask.array.compute().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
dask.array
, or try the search function
.
Example #1
Source File: k_means.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def fit(self, X, y=None): X = self._check_array(X) labels, centroids, inertia, n_iter = k_means( X, self.n_clusters, oversampling_factor=self.oversampling_factor, random_state=self.random_state, init=self.init, return_n_iter=True, max_iter=self.max_iter, init_max_iter=self.init_max_iter, tol=self.tol, ) self.cluster_centers_ = centroids self.labels_ = labels self.inertia_ = inertia.compute().item() self.n_iter_ = n_iter self.n_features_in_ = X.shape[1] return self
Example #2
Source File: geometry.py From pyresample with GNU Lesser General Public License v3.0 | 6 votes |
def get_geostationary_angle_extent(geos_area): """Get the max earth (vs space) viewing angles in x and y.""" # get some projection parameters a, b = proj4_radius_parameters(geos_area.proj_dict) req = a / 1000.0 rp = b / 1000.0 h = geos_area.proj_dict['h'] / 1000.0 + req # compute some constants aeq = 1 - req ** 2 / (h ** 2) ap_ = 1 - rp ** 2 / (h ** 2) # generate points around the north hemisphere in satellite projection # make it a bit smaller so that we stay inside the valid area xmax = np.arccos(np.sqrt(aeq)) ymax = np.arccos(np.sqrt(ap_)) return xmax, ymax
Example #3
Source File: xarr.py From pyresample with GNU Lesser General Public License v3.0 | 6 votes |
def query_no_distance(target_lons, target_lats, valid_output_index, kdtree, neighbours, epsilon, radius): """Query the kdtree. No distances are returned.""" voi = valid_output_index voir = da.ravel(voi) target_lons_valid = da.ravel(target_lons)[voir] target_lats_valid = da.ravel(target_lats)[voir] coords = lonlat2xyz(target_lons_valid, target_lats_valid) distance_array, index_array = kdtree.query( coords.compute(), k=neighbours, eps=epsilon, distance_upper_bound=radius) return index_array
Example #4
Source File: xarr.py From pyresample with GNU Lesser General Public License v3.0 | 6 votes |
def _get_corner_dask(stride, valid, in_x, in_y, index_array): """Get closest set of coordinates from the *valid* locations.""" # Find the closest valid pixels, if any idxs = np.argmax(valid, axis=1) # Check which of these were actually valid invalid = np.invert(np.max(valid, axis=1)) # idxs = idxs.compute() index_array = index_array.compute() # Replace invalid points with np.nan x__ = in_x[stride, idxs] # TODO: daskify x__ = da.where(invalid, np.nan, x__) y__ = in_y[stride, idxs] # TODO: daskify y__ = da.where(invalid, np.nan, y__) idx = index_array[stride, idxs] # TODO: daskify return x__, y__, idx
Example #5
Source File: catalog.py From nbodykit with GNU General Public License v3.0 | 6 votes |
def persist(self, columns=None): """ Return a CatalogSource, where the selected columns are computed and persist in memory. """ import dask.array as da if columns is None: columns = self.columns r = {} for key in columns: r[key] = self[key] r = da.compute(r)[0] # particularity of dask from nbodykit.source.catalog.array import ArrayCatalog c = ArrayCatalog(r, comm=self.comm) c.attrs.update(self.attrs) return c
Example #6
Source File: test_bilinear.py From pyresample with GNU Lesser General Public License v3.0 | 6 votes |
def test_compute_indices(self, mock_setattr): """Test running .compute() for indices.""" from pyresample.bilinear.xarr import (XArrayResamplerBilinear, CACHE_INDICES) resampler = XArrayResamplerBilinear(self.source_def, self.target_def, self.radius) # Set indices to Numpy arrays for idx in CACHE_INDICES: setattr(resampler, idx, np.array([])) resampler._compute_indices() # None of the indices shouldn't have been reassigned mock_setattr.assert_not_called() # Set indices to a Mock object arr = mock.MagicMock() for idx in CACHE_INDICES: setattr(resampler, idx, arr) resampler._compute_indices() # All the indices should have been reassigned self.assertEqual(mock_setattr.call_count, len(CACHE_INDICES)) # The compute should have been called the same amount of times self.assertEqual(arr.compute.call_count, len(CACHE_INDICES))
Example #7
Source File: test_catalog.py From nbodykit with GNU General Public License v3.0 | 6 votes |
def test_slice(comm): source = UniformCatalog(nbar=2e-4, BoxSize=512., seed=42, comm=comm) source['NZ'] = 1 # slice a subset subset = source[:10] assert all(col in subset for col in source.columns) assert isinstance(subset, source.__class__) assert len(subset) == 10 assert_array_equal(subset['Position'], source['Position'].compute()[:10]) subset = source[[0,1,2]] assert_array_equal(subset['Position'], source['Position'].compute()[[0,1,2]]) # cannot slice with list of floats with pytest.raises(KeyError): subset = source[[0.0,1.0,2.0]] # missing column with pytest.raises(KeyError): col = source['BAD_COLUMN']
Example #8
Source File: xarr.py From pyresample with GNU Lesser General Public License v3.0 | 6 votes |
def _create_resample_kdtree(self): """Set up kd tree on input.""" # Get input information valid_input_index, source_lons, source_lats = \ _get_valid_input_index_dask(self.source_geo_def, self.target_geo_def, self.reduce_data, self.radius_of_influence) # FIXME: Is dask smart enough to only compute the pixels we end up # using even with this complicated indexing input_coords = lonlat2xyz(source_lons, source_lats) valid_input_index = da.ravel(valid_input_index) input_coords = input_coords[valid_input_index, :] input_coords = input_coords.compute() # Build kd-tree on input input_coords = input_coords.astype(np.float) valid_input_index, input_coords = da.compute(valid_input_index, input_coords) return valid_input_index, KDTree(input_coords)
Example #9
Source File: __init__.py From satpy with GNU General Public License v3.0 | 6 votes |
def compute_writer_results(results): """Compute all the given dask graphs `results` so that the files are saved. Args: results (iterable): Iterable of dask graphs resulting from calls to `scn.save_datasets(..., compute=False)` """ if not results: return sources, targets, delayeds = split_results(results) # one or more writers have targets that we need to close in the future if targets: delayeds.append(da.store(sources, targets, compute=False)) if delayeds: da.compute(delayeds) if targets: for target in targets: if hasattr(target, 'close'): target.close()
Example #10
Source File: test_bilinear.py From pyresample with GNU Lesser General Public License v3.0 | 6 votes |
def test_get_valid_input_index_dask(self): """Test finding valid indices for reduced input data.""" from pyresample.bilinear.xarr import _get_valid_input_index_dask # Do not reduce data vii, lons, lats = _get_valid_input_index_dask(self.source_def, self.target_def, False, self.radius) self.assertEqual(vii.shape, (self.source_def.size, )) self.assertTrue(vii.dtype == np.bool) # No data has been reduced, whole input is used self.assertTrue(vii.compute().all()) # Reduce data vii, lons, lats = _get_valid_input_index_dask(self.source_def, self.target_def, True, self.radius) # 2700 valid input points self.assertEqual(vii.compute().sum(), 2700)
Example #11
Source File: test_bilinear.py From pyresample with GNU Lesser General Public License v3.0 | 6 votes |
def test_solve_quadratic(self): """Test solving quadratic equation.""" from pyresample.bilinear.xarr import (_solve_quadratic_dask, _calc_abc_dask) res = _solve_quadratic_dask(1, 0, 0).compute() self.assertEqual(res, 0.0) res = _solve_quadratic_dask(1, 2, 1).compute() self.assertTrue(np.isnan(res)) res = _solve_quadratic_dask(1, 2, 1, min_val=-2.).compute() self.assertEqual(res, -1.0) # Test that small adjustments work pt_1, pt_2, pt_3, pt_4 = self.pts_vert_parallel pt_1 = self.pts_vert_parallel[0].copy() pt_1[0][0] += 1e-7 res = _calc_abc_dask(pt_1, pt_2, pt_3, pt_4, 0.0, 0.0) res = _solve_quadratic_dask(res[0], res[1], res[2]).compute() self.assertAlmostEqual(res[0], 0.5, 5) res = _calc_abc_dask(pt_1, pt_3, pt_2, pt_4, 0.0, 0.0) res = _solve_quadratic_dask(res[0], res[1], res[2]).compute() self.assertAlmostEqual(res[0], 0.5, 5)
Example #12
Source File: test_bilinear.py From pyresample with GNU Lesser General Public License v3.0 | 5 votes |
def test_lonlat2xyz(self): """Test conversion from geographic to cartesian 3D coordinates.""" from pyresample.bilinear.xarr import lonlat2xyz from pyresample import CHUNK_SIZE lons, lats = self.target_def.get_lonlats(chunks=CHUNK_SIZE) res = lonlat2xyz(lons, lats) self.assertEqual(res.shape, (self.target_def.size, 3)) vals = [3188578.91069278, -612099.36103276, 5481596.63569999] self.assertTrue(np.allclose(res.compute()[0, :], vals))
Example #13
Source File: test_bucket.py From pyresample with GNU Lesser General Public License v3.0 | 5 votes |
def test_get_average(self): """Test averaging bucket resampling.""" data = da.from_array(np.array([[2., 4.], [3., np.nan]]), chunks=self.chunks) # Without pre-calculated indices with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_average(data) result = result.compute() self.assertEqual(np.nanmax(result), 3.) self.assertTrue(np.any(np.isnan(result))) # Use a fill value other than np.nan with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_average(data, fill_value=-1) result = result.compute() self.assertEqual(np.max(result), 3.) self.assertEqual(np.min(result), -1) self.assertFalse(np.any(np.isnan(result))) # Test masking all-NaN bins data = da.from_array(np.array([[np.nan, np.nan], [np.nan, np.nan]]), chunks=self.chunks) with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_average(data, mask_all_nan=True) self.assertTrue(np.all(np.isnan(result))) # By default all-NaN bins have a value of NaN with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_average(data) self.assertTrue(np.all(np.isnan(result)))
Example #14
Source File: test_bucket.py From pyresample with GNU Lesser General Public License v3.0 | 5 votes |
def test_resample_bucket_fractions(self): """Test fraction calculations for categorical data.""" data = da.from_array(np.array([[2, 4], [2, 2]]), chunks=self.chunks) categories = [1, 2, 3, 4] with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_fractions(data, categories=categories) self.assertEqual(set(categories), set(result.keys())) res = result[1].compute() self.assertTrue(np.nanmax(res) == 0.) res = result[2].compute() self.assertTrue(np.nanmax(res) == 1.) self.assertTrue(np.nanmin(res) == 0.5) res = result[3].compute() self.assertTrue(np.nanmax(res) == 0.) res = result[4].compute() self.assertTrue(np.nanmax(res) == 0.5) self.assertTrue(np.nanmin(res) == 0.) # There should be NaN values self.assertTrue(np.any(np.isnan(res))) # Use a fill value with dask.config.set(scheduler=CustomScheduler(max_computes=0)): result = self.resampler.get_fractions(data, categories=categories, fill_value=-1) # There should not be any NaN values for i in categories: res = result[i].compute() self.assertFalse(np.any(np.isnan(res))) self.assertTrue(np.min(res) == -1) # No categories given, need to compute the data once to get # the categories with dask.config.set(scheduler=CustomScheduler(max_computes=1)): result = self.resampler.get_fractions(data, categories=None)
Example #15
Source File: test_bilinear.py From pyresample with GNU Lesser General Public License v3.0 | 5 votes |
def test_get_sample_from_bil_info(self): """Test bilinear interpolation as a whole.""" from pyresample.bilinear.xarr import XArrayResamplerBilinear resampler = XArrayResamplerBilinear(self.source_def, self.target_def, self.radius) _ = resampler.get_bil_info() # Sample from data1 res = resampler.get_sample_from_bil_info(self.data1) res = res.compute() # Check couple of values self.assertEqual(res.values[1, 1], 1.) self.assertTrue(np.isnan(res.values[0, 3])) # Check that the values haven't gone down or up a lot self.assertAlmostEqual(np.nanmin(res.values), 1.) self.assertAlmostEqual(np.nanmax(res.values), 1.) # Check that dimensions are the same self.assertEqual(res.dims, self.data1.dims) # Sample from data1, custom fill value res = resampler.get_sample_from_bil_info(self.data1, fill_value=-1.0) res = res.compute() self.assertEqual(np.nanmin(res.values), -1.) # Sample from integer data res = resampler.get_sample_from_bil_info(self.data1.astype(np.uint8), fill_value=None) res = res.compute() # Five values should be filled with zeros, which is the # default fill_value for integer data self.assertEqual(np.sum(res == 0), 6)
Example #16
Source File: test_bilinear.py From pyresample with GNU Lesser General Public License v3.0 | 5 votes |
def test_get_bounding_corners_dask(self): """Test finding surrounding bounding corners.""" import dask.array as da from pyresample.bilinear.xarr import (_get_input_xy_dask, _get_bounding_corners_dask) from pyresample._spatial_mp import Proj from pyresample import CHUNK_SIZE proj = Proj(self.target_def.proj_str) out_x, out_y = self.target_def.get_proj_coords(chunks=CHUNK_SIZE) out_x = da.ravel(out_x) out_y = da.ravel(out_y) in_x, in_y = _get_input_xy_dask(self.source_def, proj, da.from_array(self.valid_input_index), da.from_array(self.index_array)) pt_1, pt_2, pt_3, pt_4, ia_ = _get_bounding_corners_dask( in_x, in_y, out_x, out_y, self.neighbours, da.from_array(self.index_array)) self.assertTrue(pt_1.shape == pt_2.shape == pt_3.shape == pt_4.shape == (self.target_def.size, 2)) self.assertTrue(ia_.shape == (self.target_def.size, 4)) # Check which of the locations has four valid X/Y pairs by # finding where there are non-NaN values res = da.sum(pt_1 + pt_2 + pt_3 + pt_4, axis=1).compute() self.assertEqual(np.sum(~np.isnan(res)), 10)
Example #17
Source File: test_bilinear.py From pyresample with GNU Lesser General Public License v3.0 | 5 votes |
def test_get_corner_dask(self): """Test finding the closest corners.""" import dask.array as da from pyresample.bilinear.xarr import (_get_corner_dask, _get_input_xy_dask) from pyresample import CHUNK_SIZE from pyresample._spatial_mp import Proj proj = Proj(self.target_def.proj_str) in_x, in_y = _get_input_xy_dask(self.source_def, proj, da.from_array(self.valid_input_index), da.from_array(self.index_array)) out_x, out_y = self.target_def.get_proj_coords(chunks=CHUNK_SIZE) out_x = da.ravel(out_x) out_y = da.ravel(out_y) # Some copy&paste from the code to get the input out_x_tile = np.reshape(np.tile(out_x, self.neighbours), (self.neighbours, out_x.size)).T out_y_tile = np.reshape(np.tile(out_y, self.neighbours), (self.neighbours, out_y.size)).T x_diff = out_x_tile - in_x y_diff = out_y_tile - in_y stride = np.arange(x_diff.shape[0]) # Use lower left source pixels for testing valid = (x_diff > 0) & (y_diff > 0) x_3, y_3, idx_3 = _get_corner_dask(stride, valid, in_x, in_y, da.from_array(self.index_array)) self.assertTrue(x_3.shape == y_3.shape == idx_3.shape == (self.target_def.size, )) # Four locations have no data to the lower left of them (the # bottom row of the area self.assertEqual(np.sum(np.isnan(x_3.compute())), 4)
Example #18
Source File: k_means.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def init_pp(X, n_clusters, random_state): """K-means initialization using k-means++ This uses scikit-learn's implementation. """ x_squared_norms = row_norms(X, squared=True).compute() logger.info("Initializing with k-means++") with _timer("initialization of %2d centers" % n_clusters, _logger=logger): # XXX: Using a private scikit-learn API centers = _k_init( X, n_clusters, random_state=random_state, x_squared_norms=x_squared_norms ) return centers
Example #19
Source File: __init__.py From satpy with GNU General Public License v3.0 | 5 votes |
def save_dataset(self, dataset, filename=None, fill_value=None, compute=True, **kwargs): """Save the ``dataset`` to a given ``filename``. This method must be overloaded by the subclass. Args: dataset (xarray.DataArray): Dataset to save using this writer. filename (str): Optionally specify the filename to save this dataset to. If not provided then `filename` which can be provided to the init method will be used and formatted by dataset attributes. fill_value (int or float): Replace invalid values in the dataset with this fill value if applicable to this writer. compute (bool): If `True` (default), compute and save the dataset. If `False` return either a :doc:`dask:delayed` object or tuple of (source, target). See the return values below for more information. **kwargs: Other keyword arguments for this particular writer. Returns: Value returned depends on `compute`. If `compute` is `True` then the return value is the result of computing a :doc:`dask:delayed` object or running :func:`dask.array.store`. If `compute` is `False` then the returned value is either a :doc:`dask:delayed` object that can be computed using `delayed.compute()` or a tuple of (source, target) that should be passed to :func:`dask.array.store`. If target is provided the the caller is responsible for calling `target.close()` if the target has this method. """ raise NotImplementedError( "Writer '%s' has not implemented dataset saving" % (self.name, ))
Example #20
Source File: __init__.py From satpy with GNU General Public License v3.0 | 5 votes |
def save_dataset(self, dataset, filename=None, fill_value=None, overlay=None, decorate=None, compute=True, **kwargs): """Save the ``dataset`` to a given ``filename``. This method creates an enhanced image using :func:`get_enhanced_image`. The image is then passed to :meth:`save_image`. See both of these functions for more details on the arguments passed to this method. """ img = get_enhanced_image(dataset.squeeze(), enhance=self.enhancer, overlay=overlay, decorate=decorate, fill_value=fill_value) return self.save_image(img, filename=filename, compute=compute, fill_value=fill_value, **kwargs)
Example #21
Source File: __init__.py From satpy with GNU General Public License v3.0 | 5 votes |
def save_image(self, img, filename=None, compute=True, **kwargs): """Save Image object to a given ``filename``. Args: img (trollimage.xrimage.XRImage): Image object to save to disk. filename (str): Optionally specify the filename to save this dataset to. It may include string formatting patterns that will be filled in by dataset attributes. compute (bool): If `True` (default), compute and save the dataset. If `False` return either a :doc:`dask:delayed` object or tuple of (source, target). See the return values below for more information. **kwargs: Other keyword arguments to pass to this writer. Returns: Value returned depends on `compute`. If `compute` is `True` then the return value is the result of computing a :doc:`dask:delayed` object or running :func:`dask.array.store`. If `compute` is `False` then the returned value is either a :doc:`dask:delayed` object that can be computed using `delayed.compute()` or a tuple of (source, target) that should be passed to :func:`dask.array.store`. If target is provided the the caller is responsible for calling `target.close()` if the target has this method. """ raise NotImplementedError("Writer '%s' has not implemented image saving" % (self.name,))
Example #22
Source File: test_generic_image.py From satpy with GNU General Public License v3.0 | 5 votes |
def test_GenericImageFileHandler(self): """Test direct use of the reader.""" from satpy.readers.generic_image import GenericImageFileHandler from satpy.readers.generic_image import mask_image_data fname = os.path.join(self.base_dir, 'test_rgba.tif') fname_info = {'start_time': self.date} ftype_info = {} reader = GenericImageFileHandler(fname, fname_info, ftype_info) class Foo(object): """Mock class for dataset id""" def __init__(self): self.name = 'image' foo = Foo() self.assertTrue(reader.file_content) self.assertEqual(reader.finfo['filename'], fname) self.assertEqual(reader.finfo['start_time'], self.date) self.assertEqual(reader.finfo['end_time'], self.date) self.assertEqual(reader.area, self.area_def) self.assertEqual(reader.get_area_def(None), self.area_def) self.assertEqual(reader.start_time, self.date) self.assertEqual(reader.end_time, self.date) dataset = reader.get_dataset(foo, None) self.assertTrue(isinstance(dataset, xr.DataArray)) self.assertTrue('crs' in dataset.attrs) self.assertTrue('transform' in dataset.attrs) self.assertTrue(np.all(np.isnan(dataset.data[:, :10, :10].compute()))) # Test masking of floats data = self.scn['rgba'] self.assertRaises(ValueError, mask_image_data, data / 255.) data = data.astype(np.uint32) self.assertTrue(data.bands.size == 4) data = mask_image_data(data) self.assertTrue(data.bands.size == 3)
Example #23
Source File: impute.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _fit_array(self, X): if self.strategy not in {"mean", "constant"}: msg = "Can only use strategy='mean' or 'constant' with Dask Array." raise ValueError(msg) if self.strategy == "mean": statistics = da.nanmean(X, axis=0).compute() else: statistics = np.full(X.shape[1], self.fill_value, dtype=X.dtype) (self.statistics_,) = da.compute(statistics)
Example #24
Source File: impute.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _fit_frame(self, X): if self.strategy == "mean": avg = X.mean(axis=0).values elif self.strategy == "median": avg = X.quantile().values elif self.strategy == "constant": avg = np.full(len(X.columns), self.fill_value) else: avg = [X[col].value_counts().nlargest(1).index for col in X.columns] avg = np.concatenate(*dask.compute(avg)) self.statistics_ = pd.Series(dask.compute(avg)[0], index=X.columns)
Example #25
Source File: k_means.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _check_array(self, X): if isinstance(X, pd.DataFrame): X = X.values if isinstance(X, dd.DataFrame): X = X.to_dask_array(lengths=True) X = check_array( X, accept_dask_dataframe=False, accept_unknown_chunks=False, accept_sparse=False, remove_zero_chunks=True, ) if X.dtype == "int32": X = X.astype("float32") elif X.dtype == "int64": X = X.astype("float64") if isinstance(X, np.ndarray): X = da.from_array(X, chunks=(max(1, len(X) // cpu_count()), X.shape[-1])) bad = (da.isnull(X).any(), da.isinf(X).any()) if any(*compute(bad)): msg = ( "Input contains NaN, infinity or a value too large for " "dtype('float64')." ) raise ValueError(msg) return X
Example #26
Source File: _distributed.py From scanpy with BSD 3-Clause "New" or "Revised" License | 5 votes |
def materialize_as_ndarray(a): """Convert distributed arrays to ndarrays.""" if type(a) in (list, tuple): if da is not None and any(isinstance(arr, da.Array) for arr in a): return da.compute(*a, sync=True) return tuple(np.asarray(arr) for arr in a) return np.asarray(a)
Example #27
Source File: test_catalog.py From nbodykit with GNU General Public License v3.0 | 5 votes |
def test_getitem_columns(comm): source = UniformCatalog(nbar=2e-4, BoxSize=512., seed=42, comm=comm) # bad column name with pytest.raises(KeyError): subset = source[['Position', 'BAD_COLUMN']] subset = source[['Position']] for col in subset: assert_array_equal(subset[col].compute(), source[col].compute())
Example #28
Source File: catalog.py From nbodykit with GNU General Public License v3.0 | 5 votes |
def __getitem__(self, key): # compute dask index b/c they are not fully supported if isinstance(key, da.Array): key = self.catalog.compute(key) # base class behavior d = da.Array.__getitem__(self, key) # return a ColumnAccessor (okay b/c __setitem__ checks for circular references) toret = ColumnAccessor(self.catalog, d) toret.attrs.update(self.attrs) return toret
Example #29
Source File: catalog.py From nbodykit with GNU General Public License v3.0 | 5 votes |
def compute(self): return self.catalog.compute(self)
Example #30
Source File: catalog.py From nbodykit with GNU General Public License v3.0 | 5 votes |
def __str__(self): r = da.Array.__str__(self) if len(self) > 0: r = r + " first: %s" % str(self[0].compute()) if len(self) > 1: r = r + " last: %s" % str(self[-1].compute()) return r