Python dask.distributed.LocalCluster() Examples
The following are 6
code examples of dask.distributed.LocalCluster().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
dask.distributed
, or try the search function
.
Example #1
Source File: FEMSolver.py From florence with MIT License | 9 votes |
def LaunchDaskDistributedClient(self, scheduler_ip=None, scheduler_port=None): if self.parallel and self.parallel_model == "dask" and self.is_dask_scheduler_initialised is False: from multiprocessing.pool import ThreadPool try: import dask from dask.distributed import Client, LocalCluster except ImportError: raise ImportError("dask is not installed. Install it 'using pip install dask[complete]'") dask.config.set(pool=ThreadPool(self.no_of_cpu_cores)) # INITIALISE CLUSTER if scheduler_ip is None: cluster = LocalCluster(n_workers=self.no_of_cpu_cores, processes=False, threads_per_worker=None) client = Client(cluster) else: client = Client(scheduler_ip) self.dask_client = client self.is_dask_scheduler_initialised = True
Example #2
Source File: iblpipe.py From ibllib with MIT License | 5 votes |
def create_cluster(self): self.cluster = LocalCluster( n_workers=1, processes=False, silence_logs=logging.DEBUG) self.client = Client(self.cluster)
Example #3
Source File: executor.py From dagster with Apache License 2.0 | 5 votes |
def build_dict(self, pipeline_name): '''Returns a dict we can use for kwargs passed to dask client instantiation. Intended to be used like: with dask.distributed.Client(**cfg.build_dict()) as client: << use client here >> ''' if self.cluster_type in ['yarn', 'pbs', 'moab', 'sge', 'lsf', 'slurm', 'oar', 'kube']: dask_cfg = {'name': pipeline_name} else: dask_cfg = {} if self.cluster_configuration: for k, v in self.cluster_configuration.items(): dask_cfg[k] = v # if address is set, don't add LocalCluster args # context: https://github.com/dask/distributed/issues/3313 if (self.cluster_type == 'local') and ('address' not in dask_cfg): # We set threads_per_worker because Dagster is not thread-safe. Even though # environments=True by default, there is a clever piece of machinery # (dask.distributed.deploy.local.nprocesses_nthreads) that automagically makes execution # multithreaded by default when the number of available cores is greater than 4. # See: https://github.com/dagster-io/dagster/issues/2181 # We may want to try to figure out a way to enforce this on remote Dask clusters against # which users run Dagster workloads. dask_cfg['threads_per_worker'] = 1 return dask_cfg
Example #4
Source File: prune.py From pySCENIC with GNU General Public License v3.0 | 4 votes |
def _prepare_client(client_or_address, num_workers): """ :param client_or_address: one of: * None * verbatim: 'local' * string address * a Client instance :return: a tuple: (Client instance, shutdown callback function). :raises: ValueError if no valid client input was provided. """ # Credits to Thomas Moerman (arboreto package): # https://github.com/tmoerman/arboreto/blob/482ce8598da5385eb0e01a50362cb2b1e6f66a41/arboreto/algo.py#L145-L191 if client_or_address is None or str(client_or_address).lower() == 'local': local_cluster = LocalCluster(n_workers=num_workers, threads_per_worker=1) client = Client(local_cluster) def close_client_and_local_cluster(verbose=False): if verbose: LOGGER.info('shutting down client and local cluster') client.close() local_cluster.close() return client, close_client_and_local_cluster elif isinstance(client_or_address, str) and client_or_address.lower() != 'local': client = Client(client_or_address) def close_client(verbose=False): if verbose: LOGGER.info('shutting down client') client.close() return client, close_client elif isinstance(client_or_address, Client): def close_dummy(verbose=False): if verbose: LOGGER.info('not shutting down client, client was created externally') return None return client_or_address, close_dummy else: raise ValueError("Invalid client specified {}".format(str(client_or_address)))
Example #5
Source File: hpc-grnboost.py From pySCENIC with GNU General Public License v3.0 | 4 votes |
def run(cfg_fname): # Read configuration file. cfg = ConfigParser() cfg.read(cfg_fname) # Set logging level. logging_debug_opt = cfg["params"]["debug"].lower().strip() in {"yes", "true", "y"} LOGGER.addHandler(create_logging_handler(logging_debug_opt)) LOGGER.setLevel(logging.DEBUG) # Derive file names. #mtx_fnames = list(mapcat(glob.glob, cfg['data']['mtx_fnames'].split(";"))) mtx_fnames = glob.glob(cfg['data']['mtx_fnames']) tfs = load_tf_names(cfg['data']['tfs_fname']) # Derive cluster information. not_cluster_ip = 'scheduler_ip' not in cfg['params'] if not_cluster_ip: local_cluster = LocalCluster(n_workers=int(cfg['params']['num_cores']), threads_per_worker=1) client = Client(local_cluster) else: class DummyClient: def close(self): pass local_cluster = DummyClient() client = cfg['params']['scheduler_ip'] # Remove fnames that already have a corresponding results file. def add_output(fname, out_folder): basename = os.path.splitext(os.path.basename(fname))[0] return fname, os.path.join(out_folder, "{}.net.csv".format(basename)) out_folder = cfg['data']['out_folder'] for in_fname, out_fname in filter(lambda t: not os.path.exists(t[1]), map(partial(add_output, out_folder=out_folder), mtx_fnames)): LOGGER.info("Running GRNboost for {}.".format(in_fname)) try: process(in_fname, tfs, out_fname, client) except ValueError as e: LOGGER.error("Unable to process {} because of \"{}\". Stacktrace:".format(in_fname, str(e))) LOGGER.error(traceback.format_exc()) if not_cluster_ip: client.close() local_cluster.close() print("{} - Done.".format(datetime.datetime.now()))
Example #6
Source File: sum.py From ml-on-gcp with Apache License 2.0 | 4 votes |
def main(): parser = argparse.ArgumentParser() parser.add_argument('--xdim', type=int, default=500000) parser.add_argument('--ydim', type=int, default=500000) parser.add_argument('--x_chunk_size', type=int, default=10000) parser.add_argument('--y_chunk_size', type=int, default=10000) parser.add_argument('--use_gpus_only', action="store_true") parser.add_argument('--n_gpus', type=int, default=1) parser.add_argument('--use_cpus_only', action="store_true") parser.add_argument('--n_cpu_sockets', type=int, default=1) parser.add_argument('--n_cpu_cores_per_socket', type=int, default=1) parser.add_argument('--use_distributed_dask', action="store_true") args = parser.parse_args() sched_ip, sched_uri = get_scheduler_info() if args.use_distributed_dask: print('Using Distributed Dask') client = Client(sched_uri) elif args.use_gpus_only: print('Using GPUs and Local Dask') cluster = LocalCUDACluster(ip=sched_ip, n_workers=args.n_gpus) client = Client(cluster) elif args.use_cpus_only: print('Using CPUs and Local Dask') cluster = LocalCluster(ip=sched_ip, n_workers=args.n_cpu_sockets, threads_per_worker=args.n_cpu_cores_per_socket) client = Client(cluster) else: print("Exiting...") sys.exit(-1) start = time.time() if args.use_gpus_only: print('Allocating and initializing arrays using GPU memory with CuPY') rs = da.random.RandomState(RandomState=cupy.random.RandomState) elif args.use_cpus_only: print('Allocating and initializing arrays using CPU memory') rs = da.random.RandomState() x = create_data(rs, args.xdim, args.ydim, args.x_chunk_size, args.y_chunk_size) print('Array size: {:.2f} TB. Computing parallel sum . . .'.format( x.nbytes / 1e12)) run(x) end = time.time() delta = (end - start) print('Processing complete.') print('Wall time create data + computation time: {:10.8f} seconds'.format( delta)) del x