Python multiprocessing.pool.ThreadPool() Examples
The following are 30
code examples of multiprocessing.pool.ThreadPool().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
multiprocessing.pool
, or try the search function
.
Example #1
Source File: FEMSolver.py From florence with MIT License | 9 votes |
def LaunchDaskDistributedClient(self, scheduler_ip=None, scheduler_port=None): if self.parallel and self.parallel_model == "dask" and self.is_dask_scheduler_initialised is False: from multiprocessing.pool import ThreadPool try: import dask from dask.distributed import Client, LocalCluster except ImportError: raise ImportError("dask is not installed. Install it 'using pip install dask[complete]'") dask.config.set(pool=ThreadPool(self.no_of_cpu_cores)) # INITIALISE CLUSTER if scheduler_ip is None: cluster = LocalCluster(n_workers=self.no_of_cpu_cores, processes=False, threads_per_worker=None) client = Client(cluster) else: client = Client(scheduler_ip) self.dask_client = client self.is_dask_scheduler_initialised = True
Example #2
Source File: diagnostics.py From universe with MIT License | 6 votes |
def __init__(self, n, probe_key, ignore_clock_skew=False, metadata_encoding=None, disable_action_probes=False): # Each QR code takes about 1ms (and updates at 5fps). We do # our best to ensure the QR is processed in time for the next # step call (n/16 would put us right at the threshold). self.pool = pool.ThreadPool(max(int(n/4), 1)) self.qr_pool = pool.ThreadPool(max(int(n/8), 1)) self.lock = threading.RLock() self.instance_n = [None] * n self.ignore_clock_skew = ignore_clock_skew self.disable_action_probes = disable_action_probes self.metadata_encoding = metadata_encoding self.update(probe_key=probe_key, metadata_encoding=metadata_encoding) # only used in flashgames right now
Example #3
Source File: solver.py From dogTorch with MIT License | 6 votes |
def save_features(model, data_loaders, args): model.eval() os.makedirs(args.features_dir, exist_ok=True) thread_pool = pool.ThreadPool(args.workers) for data_loader in data_loaders: data_index = 0 for input, target, prev_absolutes, next_absolutes, _ in data_loader: input = Variable(input.cuda(async=True), volatile=True) features = model.feats(input).data.cpu() features_to_save = [] for feature in features: relpath = data_loader.dataset.get_relpath(data_index) feature_path = os.path.join(args.features_dir, relpath + '.pytar') features_to_save.append((feature, feature_path)) data_index += 1 thread_pool.map(_save_tensor, features_to_save)
Example #4
Source File: data.py From vaegan-celebs-keras with MIT License | 6 votes |
def celeba_loader(batch_size, normalize=True, num_child=4, seed=0, workers=8): rng = np.random.RandomState(seed) images = glob.glob(images_path) with Pool(workers) as p: while True: rng.shuffle(images) for s in range(0, len(images), batch_size): e = s + batch_size batch_names = images[s:e] batch_images = p.map(_load_image, batch_names) batch_images = np.stack(batch_images) if normalize: batch_images = batch_images / 127.5 - 1. # To be sure batch_images = np.clip(batch_images, -1., 1.) # Yield the same batch num_child times since the images will be consumed # by num_child different child generators for i in range(num_child): yield batch_images
Example #5
Source File: spark_dataset_converter.py From petastorm with Apache License 2.0 | 6 votes |
def _check_dataset_file_median_size(url_list): fs, path_list = get_filesystem_and_path_or_paths(url_list) RECOMMENDED_FILE_SIZE_BYTES = 50 * 1024 * 1024 # TODO: also check file size for other file system. if isinstance(fs, LocalFileSystem): pool = ThreadPool(64) try: file_size_list = pool.map(os.path.getsize, path_list) if len(file_size_list) > 1: mid_index = len(file_size_list) // 2 median_size = sorted(file_size_list)[mid_index] # take the larger one if tie if median_size < RECOMMENDED_FILE_SIZE_BYTES: logger.warning('The median size %d B (< 50 MB) of the parquet files is too small. ' 'Total size: %d B. Increase the median file size by calling df.repartition(n) or ' 'df.coalesce(n), which might help improve the performance. Parquet files: %s, ...', median_size, sum(file_size_list), url_list[0]) finally: pool.close() pool.join()
Example #6
Source File: InfrastructureInfo.py From im with GNU General Public License v3.0 | 6 votes |
def destroy_vms(self, auth): """ Destroy all the VMs """ delete_list = list(reversed(self.get_vm_list())) exceptions = [] if Config.MAX_SIMULTANEOUS_LAUNCHES > 1: pool = ThreadPool(processes=Config.MAX_SIMULTANEOUS_LAUNCHES) pool.map( lambda vm: vm.delete(delete_list, auth, exceptions), delete_list ) pool.close() else: # If IM server is the first VM, then it will be the last destroyed for vm in delete_list: vm.delete(delete_list, auth, exceptions) if exceptions: msg = "" for e in exceptions: msg += str(e) + "\n" raise Exception("Error destroying the infrastructure: \n%s" % msg)
Example #7
Source File: migrate.py From pyspider with Apache License 2.0 | 6 votes |
def migrate(pool, from_connection, to_connection): """ Migrate tool for pyspider """ f = connect_database(from_connection) t = connect_database(to_connection) if isinstance(f, ProjectDB): for each in f.get_all(): each = unicode_obj(each) logging.info("projectdb: %s", each['name']) t.drop(each['name']) t.insert(each['name'], each) elif isinstance(f, TaskDB): pool = Pool(pool) pool.map( lambda x, f=from_connection, t=to_connection: taskdb_migrating(x, f, t), f.projects) elif isinstance(f, ResultDB): pool = Pool(pool) pool.map( lambda x, f=from_connection, t=to_connection: resultdb_migrating(x, f, t), f.projects)
Example #8
Source File: ctaHistoryData.py From vnpy_crypto with MIT License | 6 votes |
def downloadAllFuturesDailyBar(self): """下载所有期货的主力合约日行情""" start = time() print( u'开始下载所有期货的主力合约日行情') productSymbolSet = self.readFuturesProductSymbol() print( u'代码列表读取成功,产品代码:%s' %productSymbolSet) # 这里也测试了线程池,但可能由于下载函数中涉及较多的数据格 # 式转换,CPU开销较大,多线程效率并无显著改变。 #p = ThreadPool(10) #p.map(self.downloadFuturesDailyBar, productSymbolSet) #p.close() #p.join() for productSymbol in productSymbolSet: self.downloadFuturesDailyBar(productSymbol+'0000') print( u'所有期货的主力合约日行情已经全部下载完成, 耗时%s秒' %(time()-start)) #----------------------------------------------------------------------
Example #9
Source File: multithread.py From vnpy_crypto with MIT License | 6 votes |
def test_multithread_stringio_read_csv(self): # see gh-11786 max_row_range = 10000 num_files = 100 bytes_to_df = [ '\n'.join( ['%d,%d,%d' % (i, i, i) for i in range(max_row_range)] ).encode() for j in range(num_files)] files = [BytesIO(b) for b in bytes_to_df] # read all files in many threads pool = ThreadPool(8) results = pool.map(self.read_csv, files) first_result = results[0] for result in results: tm.assert_frame_equal(first_result, result)
Example #10
Source File: agent.py From fairseq with MIT License | 6 votes |
def decode(self, session, low=0, high=100000, num_thread=10): corpus_info = session.corpus_info() high = min(corpus_info["num_sentences"] - 1, high) if low >= high: return t0 = time.time() if num_thread > 1: with Pool(10) as p: p.map( partial(self._decode_one, session), [sent_id for sent_id in range(low, high + 1)] ) else: for sent_id in range(low, high + 1): self._decode_one(session, sent_id) print(f'Finished {low} to {high} in {time.time() - t0}s')
Example #11
Source File: _compression.py From arctic with GNU Lesser General Public License v2.1 | 6 votes |
def set_compression_pool_size(pool_size): """ Set the size of the compression workers thread pool. If the pool is already created, it waits until all jobs are finished, and then proceeds with setting the new size. Parameters ---------- pool_size : `int` The size of the pool (must be a positive integer) Returns ------- `None` """ pool_size = int(pool_size) if pool_size < 1: raise ValueError("The compression thread pool size cannot be of size {}".format(pool_size)) global _compress_thread_pool if _compress_thread_pool is not None: _compress_thread_pool.close() _compress_thread_pool.join() _compress_thread_pool = ThreadPool(pool_size)
Example #12
Source File: selenium_downloader.py From fetchman with Apache License 2.0 | 6 votes |
def download(self, batch): if self.driver_pool_size: pool = Pool(processes=self.driver_pool_size) else: pool = Pool(processes=default_settings.DRIVER_POOL_SIZE) results = [] for request in batch: results.append(pool.apply_async(self.download_one, (request,))) pool.close() pool.join() true_responses = [] for result in results: true_response = result.get() true_responses.append(true_response) FetchManLogger.logger.info(true_response) return true_responses
Example #13
Source File: test_multi_thread.py From recruit with Apache License 2.0 | 6 votes |
def test_multi_thread_string_io_read_csv(all_parsers): # see gh-11786 parser = all_parsers max_row_range = 10000 num_files = 100 bytes_to_df = [ "\n".join( ["%d,%d,%d" % (i, i, i) for i in range(max_row_range)] ).encode() for _ in range(num_files)] files = [BytesIO(b) for b in bytes_to_df] # Read all files in many threads. pool = ThreadPool(8) results = pool.map(parser.read_csv, files) first_result = results[0] for result in results: tm.assert_frame_equal(first_result, result)
Example #14
Source File: dataset.py From MONAI with Apache License 2.0 | 5 votes |
def __init__( self, data, transform: Callable, cache_num: int = sys.maxsize, cache_rate: float = 1.0, num_workers: int = 0 ): """ Args: data (Iterable): input data to load and transform to generate dataset for model. transform: transforms to execute operations on input data. cache_num: number of items to be cached. Default is `sys.maxsize`. will take the minimum of (cache_num, data_length x cache_rate, data_length). cache_rate: percentage of cached data in total, default is 1.0 (cache all). will take the minimum of (cache_num, data_length x cache_rate, data_length). num_workers: the number of worker threads to use. If 0 a single thread will be used. Default is 0. """ if not isinstance(transform, Compose): transform = Compose(transform) super().__init__(data, transform) self.cache_num = min(cache_num, int(len(self) * cache_rate), len(self)) if self.cache_num > 0: self._cache = [None] * self.cache_num if num_workers > 0: self._item_processed = 0 self._thread_lock = threading.Lock() with ThreadPool(num_workers) as p: p.map( self._load_cache_item_thread, [(i, data[i], transform.transforms) for i in range(self.cache_num)], ) else: for i in range(self.cache_num): self._cache[i] = self._load_cache_item(data[i], transform.transforms) progress_bar(i + 1, self.cache_num, "Load and cache transformed data: ")
Example #15
Source File: project.py From signac with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _update_in_memory_cache(self): "Update the in-memory state point cache to reflect the workspace." logger.debug("Updating in-memory cache...") start = time.time() job_ids = set(self._job_dirs()) cached_ids = set(self._sp_cache) to_add = job_ids.difference(cached_ids) to_remove = cached_ids.difference(job_ids) if to_add or to_remove: for _id in to_remove: del self._sp_cache[_id] def _add(_id): self._sp_cache[_id] = self._get_statepoint_from_workspace(_id) to_add_chunks = split_and_print_progress( iterable=list(to_add), num_chunks=max(1, min(100, int(len(to_add) / 1000))), write=logger.info, desc="Read metadata: ") with ThreadPool() as pool: for chunk in to_add_chunks: pool.map(_add, chunk) delta = time.time() - start logger.debug("Updated in-memory cache in {:.3f} seconds.".format(delta)) return to_add, to_remove else: logger.debug("In-memory cache is up to date.")
Example #16
Source File: test_h5store.py From signac with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_multithreading(self): def set_x(x): self.get_h5store()['x'] = x with closing(ThreadPool(2)) as pool: pool.map(set_x, range(100)) pool.join() assert self.get_h5store()['x'] in set(range(100))
Example #17
Source File: test_h5store.py From signac with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_multithreading_with_error(self): def set_x(x): self.get_h5store()['x'] = x if x == 50: raise RuntimeError() with pytest.raises(RuntimeError): with closing(ThreadPool(2)) as pool: pool.map(set_x, range(100)) pool.join() assert self.get_h5store()['x'] in set(range(100))
Example #18
Source File: remote.py From testplan with Apache License 2.0 | 5 votes |
def _start_thread_pool(self): size = len(self._instances) try: if size > 2: self.pool = ThreadPool(5 if size > 5 else size) except Exception as exc: if isinstance(exc, AttributeError): self.logger.warning( "Please upgrade to the suggested python interpreter." )
Example #19
Source File: AffineInvariantFeatures.py From DoNotSnap with GNU General Public License v3.0 | 5 votes |
def __init__(self, detector, extractor): self.detector = detector self.extractor = extractor self.pool = ThreadPool(processes=cv2.getNumberOfCPUs())
Example #20
Source File: batch.py From pyEX with Apache License 2.0 | 5 votes |
def bulkMinuteBars(symbol, dates, token='', version='', filter=''): '''fetch many dates worth of minute-bars for a given symbol''' _raiseIfNotStr(symbol) dates = [_strOrDate(date) for date in dates] list_orig = dates.__class__ args = [] for date in dates: args.append((symbol, '1d', date, token, version, filter)) pool = ThreadPool(20) rets = pool.starmap(chart, args) pool.close() return list_orig(itertools.chain(*rets))
Example #21
Source File: utils.py From kickoff-player with GNU General Public License v3.0 | 5 votes |
def thread_pool(callback, args, flatten=True): pool = ThreadPool(processes=cpu_count()) data = pool.map(callback, args) pool.close() pool.join() if flatten: data = flatten_list(data) return data
Example #22
Source File: utils.py From bioconda-utils with MIT License | 5 votes |
def fetch(cls, urls, descs, cb, datas): """Fetch data from URLs. This will use asyncio to manage a pool of connections at once, speeding up download as compared to iterative use of ``requests`` significantly. It will also retry on non-permanent HTTP error codes (i.e. 429, 502, 503 and 504). Args: urls: List of URLS descs: Matching list of descriptions (for progress display) cb: As each download is completed, data is passed through this function. Use to e.g. offload json parsing into download loop. """ try: loop = asyncio.get_event_loop() except RuntimeError: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) if loop.is_running(): logger.warning("Running AsyncRequests.fetch from within running loop") # Workaround the fact that asyncio's loop is marked as not-reentrant # (it is apparently easy to patch, but not desired by the devs, with ThreadPool(1) as pool: res = pool.apply(cls.fetch, (urls, descs, cb, datas)) return res task = asyncio.ensure_future(cls.async_fetch(urls, descs, cb, datas)) try: loop.run_until_complete(task) except KeyboardInterrupt: task.cancel() loop.run_forever() task.exception() return task.result()
Example #23
Source File: api_client.py From APIv3-python-library with MIT License | 5 votes |
def pool(self): if self._pool is None: self._pool = ThreadPool() return self._pool
Example #24
Source File: api_client.py From APIv3-python-library with MIT License | 5 votes |
def __init__(self, configuration=None, header_name=None, header_value=None, cookie=None): if configuration is None: configuration = Configuration() self.configuration = configuration # Use the pool property to lazily initialize the ThreadPool. self._pool = None self.rest_client = rest.RESTClientObject(configuration) self.default_headers = {} if header_name is not None: self.default_headers[header_name] = header_value self.cookie = cookie # Set default User-Agent. self.user_agent = 'Swagger-Codegen/1.0.0/python'
Example #25
Source File: __init__.py From oss-ftp with MIT License | 5 votes |
def Pool(processes=None, initializer=None, initargs=()): from multiprocessing.pool import ThreadPool return ThreadPool(processes, initializer, initargs)
Example #26
Source File: __init__.py From BinderFilter with MIT License | 5 votes |
def Pool(processes=None, initializer=None, initargs=()): from multiprocessing.pool import ThreadPool return ThreadPool(processes, initializer, initargs)
Example #27
Source File: _parallel_backends.py From mlens with MIT License | 5 votes |
def configure(self, n_jobs=1, parallel=None, **backend_args): """Build a process or thread pool and return the number of workers""" n_jobs = self.effective_n_jobs(n_jobs) if n_jobs == 1: # Avoid unnecessary overhead and use sequential backend instead. raise FallbackToBackend(SequentialBackend()) self.parallel = parallel self._pool = ThreadPool(n_jobs) return n_jobs
Example #28
Source File: async_pubsub.py From monaco with MIT License | 5 votes |
def __init__(self, connection_pool, threadpool_size=5, **kwargs): super(AsyncPubSub, self).__init__(connection_pool, **kwargs) if not hasattr(threading.current_thread(), "_children"): threading.current_thread()._children = WeakKeyDictionary() self.threadpool = ThreadPool(threadpool_size) self.running = []
Example #29
Source File: slave.py From monaco with MIT License | 5 votes |
def __init__(self): monaco = schema.Monaco() self.r = redis.StrictRedis(port=config['mgmt_port']) # terniaries are always a bad idea. this is a mess of exceptions waiting to cascade so FIXME if self.r.info()['role'] == 'master': self.rmaster = redis.StrictRedis(port=config['mgmt_port']) else: self.rmaster = redis.StrictRedis(host=self.r.info()['master_host'], port=config['mgmt_port'], socket_connect_timeout=1, socket_timeout=1) monaco.refresh(self.r) node_id = monaco.node_ids_by_hostname[config['hostname']] self.node = schema.MonacoNode(node_id=node_id) self.health_data = {} # dictionary of app_id -> DB health self.app_clients = {} # dictionary of app_id -> redis clients self.rps = redis.StrictRedis(port=config['mgmt_port']) self.pubsub = self.rps.pubsub(ignore_subscribe_messages=True) self.lock = threading.Lock() self._subscriptions = {} self.logger = logging.getLogger('monaco.slave') self.redmanager = RedisMgmt() self.nutmanager = NutMgmt() # for slave based health-checks self.sched = Scheduler(daemon=True) self.sched.start() self.sched.add_interval_job(self.node_health, seconds=5) # TODO: Tune self.health_check_pool = ThreadPool(10) atexit.register(lambda: self.sched.shutdown(wait=False))
Example #30
Source File: core.py From deplicate with MIT License | 5 votes |
def _splitpaths(paths, followlinks): with closing(ThreadPool()) as pool: upaths = pool.imap(fsdecode, paths) return splitpaths(set(upaths), followlinks)