Python Examples of dask.distributed.LocalCluster

Source File: FEMSolver.py From florence with MIT License

9 votes

def LaunchDaskDistributedClient(self, scheduler_ip=None, scheduler_port=None):

        if self.parallel and self.parallel_model == "dask" and self.is_dask_scheduler_initialised is False:

            from multiprocessing.pool import ThreadPool
            try:
                import dask
                from dask.distributed import Client, LocalCluster
            except ImportError:
                raise ImportError("dask is not installed. Install it 'using pip install dask[complete]'")

            dask.config.set(pool=ThreadPool(self.no_of_cpu_cores))
            # INITIALISE CLUSTER
            if scheduler_ip is None:
                cluster = LocalCluster(n_workers=self.no_of_cpu_cores, processes=False, threads_per_worker=None)
                client = Client(cluster)
            else:
                client = Client(scheduler_ip)

            self.dask_client = client

            self.is_dask_scheduler_initialised = True

Source File: iblpipe.py From ibllib with MIT License

5 votes

def create_cluster(self):
        self.cluster = LocalCluster(
            n_workers=1, processes=False, silence_logs=logging.DEBUG)
        self.client = Client(self.cluster)

Source File: executor.py From dagster with Apache License 2.0

5 votes

def build_dict(self, pipeline_name):
        '''Returns a dict we can use for kwargs passed to dask client instantiation.

        Intended to be used like:

        with dask.distributed.Client(**cfg.build_dict()) as client:
            << use client here >>

        '''
        if self.cluster_type in ['yarn', 'pbs', 'moab', 'sge', 'lsf', 'slurm', 'oar', 'kube']:
            dask_cfg = {'name': pipeline_name}
        else:
            dask_cfg = {}

        if self.cluster_configuration:
            for k, v in self.cluster_configuration.items():
                dask_cfg[k] = v

        # if address is set, don't add LocalCluster args
        # context: https://github.com/dask/distributed/issues/3313
        if (self.cluster_type == 'local') and ('address' not in dask_cfg):
            # We set threads_per_worker because Dagster is not thread-safe. Even though
            # environments=True by default, there is a clever piece of machinery
            # (dask.distributed.deploy.local.nprocesses_nthreads) that automagically makes execution
            # multithreaded by default when the number of available cores is greater than 4.
            # See: https://github.com/dagster-io/dagster/issues/2181
            # We may want to try to figure out a way to enforce this on remote Dask clusters against
            # which users run Dagster workloads.
            dask_cfg['threads_per_worker'] = 1

        return dask_cfg

Source File: prune.py From pySCENIC with GNU General Public License v3.0

4 votes

def _prepare_client(client_or_address, num_workers):
    """
    :param client_or_address: one of:
           * None
           * verbatim: 'local'
           * string address
           * a Client instance
    :return: a tuple: (Client instance, shutdown callback function).
    :raises: ValueError if no valid client input was provided.
    """
    # Credits to Thomas Moerman (arboreto package):
    # https://github.com/tmoerman/arboreto/blob/482ce8598da5385eb0e01a50362cb2b1e6f66a41/arboreto/algo.py#L145-L191

    if client_or_address is None or str(client_or_address).lower() == 'local':
        local_cluster = LocalCluster(n_workers=num_workers,
                                     threads_per_worker=1)
        client = Client(local_cluster)

        def close_client_and_local_cluster(verbose=False):
            if verbose:
                LOGGER.info('shutting down client and local cluster')

            client.close()
            local_cluster.close()

        return client, close_client_and_local_cluster

    elif isinstance(client_or_address, str) and client_or_address.lower() != 'local':
        client = Client(client_or_address)

        def close_client(verbose=False):
            if verbose:
                LOGGER.info('shutting down client')

            client.close()

        return client, close_client

    elif isinstance(client_or_address, Client):

        def close_dummy(verbose=False):
            if verbose:
                LOGGER.info('not shutting down client, client was created externally')

            return None

        return client_or_address, close_dummy

    else:
        raise ValueError("Invalid client specified {}".format(str(client_or_address)))

Source File: hpc-grnboost.py From pySCENIC with GNU General Public License v3.0

4 votes

def run(cfg_fname):
    # Read configuration file.
    cfg = ConfigParser()
    cfg.read(cfg_fname)

    # Set logging level.
    logging_debug_opt = cfg["params"]["debug"].lower().strip() in {"yes", "true", "y"}
    LOGGER.addHandler(create_logging_handler(logging_debug_opt))
    LOGGER.setLevel(logging.DEBUG)

    # Derive file names.
    #mtx_fnames = list(mapcat(glob.glob, cfg['data']['mtx_fnames'].split(";")))
    mtx_fnames = glob.glob(cfg['data']['mtx_fnames'])
    tfs = load_tf_names(cfg['data']['tfs_fname'])

    # Derive cluster information.
    not_cluster_ip = 'scheduler_ip' not in cfg['params']
    if not_cluster_ip:
        local_cluster = LocalCluster(n_workers=int(cfg['params']['num_cores']),
                                 threads_per_worker=1)
        client = Client(local_cluster)
    else:
        class DummyClient:
            def close(self):
                pass
        local_cluster = DummyClient()
        client = cfg['params']['scheduler_ip']

    # Remove fnames that already have a corresponding results file.
    def add_output(fname, out_folder):
        basename = os.path.splitext(os.path.basename(fname))[0]
        return fname, os.path.join(out_folder, "{}.net.csv".format(basename))
    out_folder = cfg['data']['out_folder']
    for in_fname, out_fname in filter(lambda t: not os.path.exists(t[1]),
                                    map(partial(add_output, out_folder=out_folder),
                                        mtx_fnames)):
        LOGGER.info("Running GRNboost for {}.".format(in_fname))
        try:
            process(in_fname, tfs, out_fname, client)
        except ValueError as e:
            LOGGER.error("Unable to process {} because of \"{}\". Stacktrace:".format(in_fname, str(e)))
            LOGGER.error(traceback.format_exc())

    if not_cluster_ip:
        client.close()
        local_cluster.close()

    print("{} - Done.".format(datetime.datetime.now()))

Source File: sum.py From ml-on-gcp with Apache License 2.0

4 votes

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--xdim', type=int, default=500000)
    parser.add_argument('--ydim', type=int, default=500000)
    parser.add_argument('--x_chunk_size', type=int, default=10000)
    parser.add_argument('--y_chunk_size', type=int, default=10000)
    parser.add_argument('--use_gpus_only', action="store_true")
    parser.add_argument('--n_gpus', type=int, default=1)
    parser.add_argument('--use_cpus_only', action="store_true")
    parser.add_argument('--n_cpu_sockets', type=int, default=1)
    parser.add_argument('--n_cpu_cores_per_socket', type=int, default=1)
    parser.add_argument('--use_distributed_dask', action="store_true")
    args = parser.parse_args()

    sched_ip, sched_uri = get_scheduler_info()

    if args.use_distributed_dask:
        print('Using Distributed Dask')
        client = Client(sched_uri)
    elif args.use_gpus_only:
        print('Using GPUs and Local Dask')
        cluster = LocalCUDACluster(ip=sched_ip, n_workers=args.n_gpus)
        client = Client(cluster)
    elif args.use_cpus_only:
        print('Using CPUs and Local Dask')
        cluster = LocalCluster(ip=sched_ip, n_workers=args.n_cpu_sockets,
                               threads_per_worker=args.n_cpu_cores_per_socket)
        client = Client(cluster)
    else:
        print("Exiting...")
        sys.exit(-1)

    start = time.time()
    if args.use_gpus_only:
        print('Allocating and initializing arrays using GPU memory with CuPY')
        rs = da.random.RandomState(RandomState=cupy.random.RandomState)
    elif args.use_cpus_only:
        print('Allocating and initializing arrays using CPU memory')
        rs = da.random.RandomState()
    x = create_data(rs, args.xdim, args.ydim, args.x_chunk_size,
                    args.y_chunk_size)
    print('Array size: {:.2f} TB.  Computing parallel sum . . .'.format(
        x.nbytes / 1e12))
    run(x)
    end = time.time()
    delta = (end - start)

    print('Processing complete.')
    print('Wall time create data + computation time: {:10.8f} seconds'.format(
        delta))

    del x

Python dask.distributed.LocalCluster() Examples