Python torch.multiprocessing.Pool() Examples

The following are 15 code examples of torch.multiprocessing.Pool(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch.multiprocessing , or try the search function

Example #1

Source File: build_data.py From ParlAI with MIT License

6 votes

def _download_multiprocess_map_chunk(pool_tup):
    """
    Helper function for Pool imap_unordered.

    Apparently function must be pickable (which apparently means must be
    defined at the top level of a module and can't be a lamdba) to be used in
    imap_unordered. Has to do with how it's passed to the subprocess.

    :param pool_tup: is a tuple where first arg is an array of tuples of url
    and dest file name for the current chunk and second arg is function to be
    called.
    :return: an array of tuples
    """
    items = pool_tup[0]
    path = pool_tup[1]
    fn = pool_tup[2]
    return [fn(it[0], path, it[1]) for it in items]

Example #2

Source File: dataset.py From rising with MIT License

6 votes

def load_async(pool: Pool, fn: Callable, *args, callback: Callable = None, **kwargs) -> Any:
    """
    Load data asynchronously and serialize data via dill

    Args:
        pool: multiprocessing pool to use for :func:`apply_async`
        fn: function to load a single sample
        *args: positional arguments to dump with dill
        callback: optional callback. defaults to None.
        **kwargs: keyword arguments to dump with dill

    Returns:
        Any: reference to obtain data with :func:`get`

    """

    if not DILL_AVAILABLE:
        raise RuntimeError('dill is not installed. For async loading '
                           'please install it')

    payload = dill.dumps((fn, args, kwargs))
    return pool.apply_async(dill_helper, (payload,), callback=callback)

Example #3

Source File: async_featurization.py From chemprop with MIT License

6 votes

def async_mol2graph(q: Queue, 
                    data: MoleculeDataset, 
                    args: Namespace,
                    num_iters: int,
                    iter_size: int,
                    exit_q: Queue,
                    last_batch: bool=False):
    batches = []
    for i in range(0, num_iters, iter_size):  # will only go up to max size of queue, then yield
        if not last_batch and i + args.batch_size > len(data):
            break
        batch = MoleculeDataset(data[i:i + args.batch_size])
        batches.append(batch)
        if len(batches) == args.batches_per_queue_group:  # many at a time, since synchronization is expensive
            with Pool() as pool:
                processed_batches = pool.map(mol2graph_helper, [(batch, args) for batch in batches])
            q.put(processed_batches)
            batches = []
    if len(batches) > 0:
        with Pool() as pool:
            processed_batches = pool.map(mol2graph_helper, [(batch, args) for batch in batches])
        q.put(processed_batches)
    exit_q.get()  # prevent from exiting until main process tells it to; otherwise we apparently can't read the end of the queue and crash

Example #4

Source File: util.py From ConvLab with MIT License

5 votes

def parallelize(fn, args, num_cpus=NUM_CPUS):
    '''
    Parallelize a method fn, args and return results with order preserved per args.
    args should be a list of tuples.
    @returns {list} results Order preserved output from fn.
    '''
    pool = mp.Pool(num_cpus, maxtasksperchild=1)
    results = pool.starmap(fn, args)
    pool.close()
    pool.join()
    return results

Example #5

Source File: decode_full_model.py From fast_abs_rl with MIT License

5 votes

def rerank_mp(all_beams, ext_inds):
    beam_lists = [all_beams[i: i+n] for i, n in ext_inds if n > 0]
    with mp.Pool(8) as pool:
        reranked = pool.map(rerank_one, beam_lists)
    return list(concat(reranked))

Example #6

Source File: dataset.py From rising with MIT License

5 votes

def load_multi_process(self, load_fn: Callable, path: Sequence) -> List:
        """
        Helper function to load dataset with multiple processes

        Args:
            load_fn:  function to load a single sample
            path:  a sequence of paths which should be loaded

        Returns:
            list: loaded data

        """

        _processes = cpu_count() if self._num_workers is None else self._num_workers

        if self._verbosity:
            pbar = tqdm(total=len(path), unit='samples', desc="Loading Samples")

            def update(*a):
                pbar.update(1)

            callback = update
        else:
            callback = None

        with Pool(processes=_processes) as pool:
            jobs = [load_async(pool, load_fn, p, callback=callback) for p in path]
            _data = [j.get() for j in jobs]
        return _data

Example #7

Source File: test_dataset.py From rising with MIT License

5 votes

def test_load_async(self):
        callback = Mock()

        with Pool(processes=1) as p:
            ref = load_async(p, lambda x: x, 0, callback=callback)
            self.assertEqual(ref.get(), 0)

        callback.assert_called_once()

Example #8

Source File: base.py From hypothesis with BSD 3-Clause "New" or "Revised" License

5 votes

def __init__(self, simulator, workers=2):
        super(ParallelSimulator, self).__init__()
        self.pool = Pool(processes=workers)
        self.simulator = simulator
        self.workers = workers

Example #9

Source File: mcmc.py From hypothesis with BSD 3-Clause "New" or "Revised" License

5 votes

def sample(self, observations, num_samples, thetas=None):
        assert(thetas is None or len(thetas) is self.chains)
        self.sampler.reset()
        if thetas is None:
            thetas = self._prepare_thetas()
        pool = Pool(processes=self.workers)
        arguments = self._prepare_arguments(observations, thetas, num_samples)
        chains = pool.map(self.sample_chain, arguments)
        del pool

        return chains

Example #10

Source File: abc.py From hypothesis with BSD 3-Clause "New" or "Revised" License

5 votes

def __init__(self, abc, workers=2):
        super(ParallelApproximateBayesianComputation, self).__init__()
        self.abc = abc
        self.pool = Pool(processes=workers)
        self.workers = workers

Example #11

Source File: util.py From SLM-Lab with MIT License

5 votes

def parallelize(fn, args, num_cpus=NUM_CPUS):
    '''
    Parallelize a method fn, args and return results with order preserved per args.
    args should be a list of tuples.
    @returns {list} results Order preserved output from fn.
    '''
    pool = mp.Pool(num_cpus, maxtasksperchild=1)
    results = pool.starmap(fn, args)
    pool.close()
    pool.join()
    return results

Example #12

Source File: decode_full_model.py From ByteCup2018 with MIT License

5 votes

def rerank_mp(all_beams, ext_inds):
    beam_lists = [all_beams[i: i+n] for i, n in ext_inds if n > 0]
    with mp.Pool(8) as pool:
        reranked = pool.map(rerank_one, beam_lists)
    return list(concat(reranked))

Example #13

Source File: data_silo.py From FARM with Apache License 2.0

4 votes

def _get_dataset(self, filename, dicts=None):
        if not filename and not dicts:
            raise ValueError("You must either supply `filename` or `dicts`")

        # loading dicts from file (default)
        if dicts is None:
            dicts = list(self.processor.file_to_dicts(filename))
            #shuffle list of dicts here if we later want to have a random dev set splitted from train set
            if str(self.processor.train_filename) in str(filename):
                if not self.processor.dev_filename:
                    if self.processor.dev_split > 0.0:
                        random.shuffle(dicts)

        num_dicts = len(dicts)
        multiprocessing_chunk_size, num_cpus_used = calc_chunksize(
            num_dicts=num_dicts,
            max_processes=self.max_processes,
            max_chunksize=self.max_multiprocessing_chunksize,
        )

        with ExitStack() as stack:
            if self.max_processes > 1:  # use multiprocessing only when max_processes > 1
                p = stack.enter_context(mp.Pool(processes=num_cpus_used))

                logger.info(
                    f"Got ya {num_cpus_used} parallel workers to convert {num_dicts} dictionaries "
                    f"to pytorch datasets (chunksize = {multiprocessing_chunk_size})..."
                )
                log_ascii_workers(num_cpus_used, logger)

                results = p.imap(
                    partial(self._dataset_from_chunk, processor=self.processor),
                    grouper(dicts, multiprocessing_chunk_size),
                    chunksize=1,
                )
            else:
                logger.info(
                    f"Multiprocessing disabled, using a single worker to convert {num_dicts}"
                    f"dictionaries to pytorch datasets."
                )

                results = map(partial(self._dataset_from_chunk, processor=self.processor), grouper(dicts, num_dicts))

            datasets = []

            desc = f"Preprocessing Dataset"
            if filename:
                desc += f" {filename}"
            with tqdm(total=len(dicts), unit=' Dicts', desc=desc) as pbar:
                for dataset, tensor_names in results:
                    datasets.append(dataset)
                    # update progress bar (last step can have less dicts than actual chunk_size)
                    pbar.update(min(multiprocessing_chunk_size, pbar.total-pbar.n))
            # _dataset_from_chunk can return a None in cases where downsampling has occurred
            datasets = [d for d in datasets if d]
            concat_datasets = ConcatDataset(datasets)
            return concat_datasets, tensor_names

Example #14

Source File: simplePhonemLearner.py From libri-light with MIT License

4 votes

def loadSeqs(self):

        # Labels
        self.seqOffset = [0]
        self.phoneLabels = []
        self.phoneOffsets = [0]
        self.data = []
        self.maxSize = 0
        self.maxSizePhone = 0

        # Data

        nprocess = min(30, len(self.seqNames))

        start_time = time.time()
        to_load = [Path(self.pathDB) / x for _, x in self.seqNames]

        with Pool(nprocess) as p:
            poolData = p.map(load, to_load)

        tmpData = []
        poolData.sort()

        totSize = 0
        minSizePhone = 1000000
        for seqName, seq in poolData:
            self.phoneLabels += self.phoneLabelsDict[seqName]
            self.phoneOffsets.append(len(self.phoneLabels))
            self.maxSizePhone = max(self.maxSizePhone,
                                    len(self.phoneLabelsDict[seqName]))
            minSizePhone = min(minSizePhone, len(
                self.phoneLabelsDict[seqName]))
            sizeSeq = seq.size(1)
            self.maxSize = max(self.maxSize, sizeSeq)
            totSize += sizeSeq
            tmpData.append(seq)
            self.seqOffset.append(self.seqOffset[-1] + sizeSeq)
            del seq
        self.data = torch.cat(tmpData, dim=1)
        self.phoneLabels = torch.tensor(self.phoneLabels, dtype=torch.long)
        print(f'Loaded {len(self.phoneOffsets)} sequences '
              f'in {time.time() - start_time:.2f} seconds')
        print(f'maxSizeSeq : {self.maxSize}')
        print(f'maxSizePhone : {self.maxSizePhone}')
        print(f"minSizePhone : {minSizePhone}")
        print(f'Total size dataset {totSize / (16000 * 3600)} hours')

Example #15

Source File: PatchMatchOrig.py From Deep-Image-Analogy-PyTorch with MIT License

4 votes

def propagate(nnf, feat_A, feat_AP, feat_B, feat_BP, patch_size, iters=2, rand_search_radius=200):
    print("\tpatch_size:{}; num_iters:{}; rand_search_radius:{}".format(patch_size, iters, rand_search_radius))

    nnd = np.zeros(nnf.shape[:2])
    A_size = feat_A.shape[:2]
    B_size = feat_B.shape[:2]

    for ay in range(A_size[0]):
        for ax in range(A_size[1]):
            by, bx = nnf[ay, ax]
            nnd[ay, ax] = cal_dist(ay, ax, by, bx, feat_A, feat_AP, feat_B, feat_BP, A_size, B_size, patch_size)

    manager = mp.Manager()
    q = manager.Queue(A_size[1] * A_size[0])
    cpus = min(mp.cpu_count(), A_size[0] // 20 + 1)
    for i in range(iters):

        p = Pool(cpus)

        ay_start = 0

        while ay_start < A_size[0]:
            ax_start = 0
            while ax_start < A_size[1]:
                p.apply_async(pixelmatch, args=(q, ax_start, ay_start,
                                                cpus,
                                                nnf, nnd,
                                                A_size, B_size,
                                                feat_A, feat_AP,
                                                feat_B, feat_BP,
                                                patch_size,
                                                rand_search_radius,))

                ax_start += A_size[1] // cpus + 1
            ay_start += A_size[0] // cpus + 1

        p.close()
        p.join()

        while not q.empty():
            ax, ay, xbest, ybest, dbest = q.get()

            nnf[ay, ax] = np.array([ybest, xbest])
            nnd[ay, ax] = dbest

    return nnf, nnd