Python dask.distributed.LocalCluster() Examples

The following are 6 code examples of dask.distributed.LocalCluster(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module dask.distributed , or try the search function .
Example #1
Source File: FEMSolver.py    From florence with MIT License 9 votes vote down vote up
def LaunchDaskDistributedClient(self, scheduler_ip=None, scheduler_port=None):

        if self.parallel and self.parallel_model == "dask" and self.is_dask_scheduler_initialised is False:

            from multiprocessing.pool import ThreadPool
            try:
                import dask
                from dask.distributed import Client, LocalCluster
            except ImportError:
                raise ImportError("dask is not installed. Install it 'using pip install dask[complete]'")

            dask.config.set(pool=ThreadPool(self.no_of_cpu_cores))
            # INITIALISE CLUSTER
            if scheduler_ip is None:
                cluster = LocalCluster(n_workers=self.no_of_cpu_cores, processes=False, threads_per_worker=None)
                client = Client(cluster)
            else:
                client = Client(scheduler_ip)

            self.dask_client = client

            self.is_dask_scheduler_initialised = True 
Example #2
Source File: iblpipe.py    From ibllib with MIT License 5 votes vote down vote up
def create_cluster(self):
        self.cluster = LocalCluster(
            n_workers=1, processes=False, silence_logs=logging.DEBUG)
        self.client = Client(self.cluster) 
Example #3
Source File: executor.py    From dagster with Apache License 2.0 5 votes vote down vote up
def build_dict(self, pipeline_name):
        '''Returns a dict we can use for kwargs passed to dask client instantiation.

        Intended to be used like:

        with dask.distributed.Client(**cfg.build_dict()) as client:
            << use client here >>

        '''
        if self.cluster_type in ['yarn', 'pbs', 'moab', 'sge', 'lsf', 'slurm', 'oar', 'kube']:
            dask_cfg = {'name': pipeline_name}
        else:
            dask_cfg = {}

        if self.cluster_configuration:
            for k, v in self.cluster_configuration.items():
                dask_cfg[k] = v

        # if address is set, don't add LocalCluster args
        # context: https://github.com/dask/distributed/issues/3313
        if (self.cluster_type == 'local') and ('address' not in dask_cfg):
            # We set threads_per_worker because Dagster is not thread-safe. Even though
            # environments=True by default, there is a clever piece of machinery
            # (dask.distributed.deploy.local.nprocesses_nthreads) that automagically makes execution
            # multithreaded by default when the number of available cores is greater than 4.
            # See: https://github.com/dagster-io/dagster/issues/2181
            # We may want to try to figure out a way to enforce this on remote Dask clusters against
            # which users run Dagster workloads.
            dask_cfg['threads_per_worker'] = 1

        return dask_cfg 
Example #4
Source File: prune.py    From pySCENIC with GNU General Public License v3.0 4 votes vote down vote up
def _prepare_client(client_or_address, num_workers):
    """
    :param client_or_address: one of:
           * None
           * verbatim: 'local'
           * string address
           * a Client instance
    :return: a tuple: (Client instance, shutdown callback function).
    :raises: ValueError if no valid client input was provided.
    """
    # Credits to Thomas Moerman (arboreto package):
    # https://github.com/tmoerman/arboreto/blob/482ce8598da5385eb0e01a50362cb2b1e6f66a41/arboreto/algo.py#L145-L191

    if client_or_address is None or str(client_or_address).lower() == 'local':
        local_cluster = LocalCluster(n_workers=num_workers,
                                     threads_per_worker=1)
        client = Client(local_cluster)

        def close_client_and_local_cluster(verbose=False):
            if verbose:
                LOGGER.info('shutting down client and local cluster')

            client.close()
            local_cluster.close()

        return client, close_client_and_local_cluster

    elif isinstance(client_or_address, str) and client_or_address.lower() != 'local':
        client = Client(client_or_address)

        def close_client(verbose=False):
            if verbose:
                LOGGER.info('shutting down client')

            client.close()

        return client, close_client

    elif isinstance(client_or_address, Client):

        def close_dummy(verbose=False):
            if verbose:
                LOGGER.info('not shutting down client, client was created externally')

            return None

        return client_or_address, close_dummy

    else:
        raise ValueError("Invalid client specified {}".format(str(client_or_address))) 
Example #5
Source File: hpc-grnboost.py    From pySCENIC with GNU General Public License v3.0 4 votes vote down vote up
def run(cfg_fname):
    # Read configuration file.
    cfg = ConfigParser()
    cfg.read(cfg_fname)

    # Set logging level.
    logging_debug_opt = cfg["params"]["debug"].lower().strip() in {"yes", "true", "y"}
    LOGGER.addHandler(create_logging_handler(logging_debug_opt))
    LOGGER.setLevel(logging.DEBUG)

    # Derive file names.
    #mtx_fnames = list(mapcat(glob.glob, cfg['data']['mtx_fnames'].split(";")))
    mtx_fnames = glob.glob(cfg['data']['mtx_fnames'])
    tfs = load_tf_names(cfg['data']['tfs_fname'])

    # Derive cluster information.
    not_cluster_ip = 'scheduler_ip' not in cfg['params']
    if not_cluster_ip:
        local_cluster = LocalCluster(n_workers=int(cfg['params']['num_cores']),
                                 threads_per_worker=1)
        client = Client(local_cluster)
    else:
        class DummyClient:
            def close(self):
                pass
        local_cluster = DummyClient()
        client = cfg['params']['scheduler_ip']

    # Remove fnames that already have a corresponding results file.
    def add_output(fname, out_folder):
        basename = os.path.splitext(os.path.basename(fname))[0]
        return fname, os.path.join(out_folder, "{}.net.csv".format(basename))
    out_folder = cfg['data']['out_folder']
    for in_fname, out_fname in filter(lambda t: not os.path.exists(t[1]),
                                    map(partial(add_output, out_folder=out_folder),
                                        mtx_fnames)):
        LOGGER.info("Running GRNboost for {}.".format(in_fname))
        try:
            process(in_fname, tfs, out_fname, client)
        except ValueError as e:
            LOGGER.error("Unable to process {} because of \"{}\". Stacktrace:".format(in_fname, str(e)))
            LOGGER.error(traceback.format_exc())

    if not_cluster_ip:
        client.close()
        local_cluster.close()

    print("{} - Done.".format(datetime.datetime.now())) 
Example #6
Source File: sum.py    From ml-on-gcp with Apache License 2.0 4 votes vote down vote up
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--xdim', type=int, default=500000)
    parser.add_argument('--ydim', type=int, default=500000)
    parser.add_argument('--x_chunk_size', type=int, default=10000)
    parser.add_argument('--y_chunk_size', type=int, default=10000)
    parser.add_argument('--use_gpus_only', action="store_true")
    parser.add_argument('--n_gpus', type=int, default=1)
    parser.add_argument('--use_cpus_only', action="store_true")
    parser.add_argument('--n_cpu_sockets', type=int, default=1)
    parser.add_argument('--n_cpu_cores_per_socket', type=int, default=1)
    parser.add_argument('--use_distributed_dask', action="store_true")
    args = parser.parse_args()

    sched_ip, sched_uri = get_scheduler_info()

    if args.use_distributed_dask:
        print('Using Distributed Dask')
        client = Client(sched_uri)
    elif args.use_gpus_only:
        print('Using GPUs and Local Dask')
        cluster = LocalCUDACluster(ip=sched_ip, n_workers=args.n_gpus)
        client = Client(cluster)
    elif args.use_cpus_only:
        print('Using CPUs and Local Dask')
        cluster = LocalCluster(ip=sched_ip, n_workers=args.n_cpu_sockets,
                               threads_per_worker=args.n_cpu_cores_per_socket)
        client = Client(cluster)
    else:
        print("Exiting...")
        sys.exit(-1)

    start = time.time()
    if args.use_gpus_only:
        print('Allocating and initializing arrays using GPU memory with CuPY')
        rs = da.random.RandomState(RandomState=cupy.random.RandomState)
    elif args.use_cpus_only:
        print('Allocating and initializing arrays using CPU memory')
        rs = da.random.RandomState()
    x = create_data(rs, args.xdim, args.ydim, args.x_chunk_size,
                    args.y_chunk_size)
    print('Array size: {:.2f} TB.  Computing parallel sum . . .'.format(
        x.nbytes / 1e12))
    run(x)
    end = time.time()
    delta = (end - start)

    print('Processing complete.')
    print('Wall time create data + computation time: {:10.8f} seconds'.format(
        delta))

    del x