Python Examples of concurrent.futures.as

Source File: ilsvrc_det.py From gluon-cv with Apache License 2.0

8 votes

def par_crop(args):
    """
    Dataset curation,crop data and transform the format of a label
    """
    crop_path = os.path.join(args.download_dir, './crop{:d}'.format(args.instance_size))
    if not os.path.isdir(crop_path): makedirs(crop_path)
    VID_base_path = os.path.join(args.download_dir, './ILSVRC')
    ann_base_path = os.path.join(VID_base_path, 'Annotations/DET/train/')
    sub_sets = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i')
    for sub_set in sub_sets:
        sub_set_base_path = os.path.join(ann_base_path, sub_set)
        if 'a' == sub_set:
            xmls = sorted(glob.glob(os.path.join(sub_set_base_path, '*', '*.xml')))
        else:
            xmls = sorted(glob.glob(os.path.join(sub_set_base_path, '*.xml')))
        n_imgs = len(xmls)
        sub_set_crop_path = os.path.join(crop_path, sub_set)
        with futures.ProcessPoolExecutor(max_workers=args.num_threads) as executor:
            fs = [executor.submit(crop_xml, args, xml, sub_set_crop_path, args.instance_size) for xml in xmls]
            for i, f in enumerate(futures.as_completed(fs)):
                printProgress(i, n_imgs, prefix=sub_set, suffix='Done ', barLength=80)

Source File: ilsvrc_vid.py From gluon-cv with Apache License 2.0

7 votes

def par_crop(args, ann_base_path):
    """
    Dataset curation, crop data and transform the format of label
    Parameters
    ----------
    ann_base_path: str, Annotations base path
    """
    crop_path = os.path.join(args.download_dir, './crop{:d}'.format(int(args.instance_size)))
    if not os.path.isdir(crop_path):
        makedirs(crop_path)
    sub_sets = sorted({'a', 'b', 'c', 'd', 'e'})
    for sub_set in sub_sets:
        sub_set_base_path = os.path.join(ann_base_path, sub_set)
        videos = sorted(os.listdir(sub_set_base_path))
        n_videos = len(videos)
        with futures.ProcessPoolExecutor(max_workers=args.num_threads) as executor:
            fs = [executor.submit(crop_video, args, sub_set, video, crop_path, ann_base_path) for video in videos]
            for i, f in enumerate(futures.as_completed(fs)):
                # Write progress to error so that it can be seen
                printProgress(i, n_videos, prefix=sub_set, suffix='Done ', barLength=40)

Source File: flags_threadpool_ac.py From concurrency2017 with MIT License

7 votes

def download_many(cc_list):
    cc_list = cc_list[:5]  # <1>
    with futures.ThreadPoolExecutor(max_workers=3) as executor:  # <2>
        to_do = []
        for cc in sorted(cc_list):  # <3>
            future = executor.submit(download_one, cc)  # <4>
            to_do.append(future)  # <5>
            msg = 'Scheduled for {}: {}'
            print(msg.format(cc, future))  # <6>

        results = []
        for future in futures.as_completed(to_do):  # <7>
            res = future.result()  # <8>
            msg = '{} result: {!r}'
            print(msg.format(future, res))  # <9>
            results.append(res)

    return len(results)

Source File: chromeboy.py From falsy with MIT License

6 votes

def run(self, data, max=4):
        results = []
        with futures.ThreadPoolExecutor(max_workers=max) as executor:
            future_to_url = {}
            for i, payload in enumerate(data):
                payload['chrome_id'] = i
                future_to_url[executor.submit(self.run1, payload)] = payload
                # future_to_url[executor.submit(self.run1_core, payload, browser, begin_time)] = payload
            for future in futures.as_completed(future_to_url):
                url = future_to_url[future]
                try:
                    data = future.result()
                except Exception as exc:
                    print('%r generated an exception: %s' % (url, exc))
                else:
                    data['chrome_id'] = url['chrome_id']
                    results.append(data)

        sorted_results = sorted(results, key=lambda tup: tup['chrome_id'])
        return sorted_results

Source File: stac_validator.py From stac-validator with Apache License 2.0

6 votes

def run(self, concurrent=10):
        """
        Entry point.
        :param concurrent: number of threads to use
        :return: message json
        """

        children = [self.stac_file]
        logger.info(f"Using {concurrent} threads")
        while True:
            with futures.ThreadPoolExecutor(max_workers=int(concurrent)) as executor:
                future_tasks = [executor.submit(self._validate, url) for url in children]
                children = []
                for task in futures.as_completed(future_tasks):
                    message, status, new_children = task.result()
                    self.status = self._update_status(self.status, status)
                    self.message.append(message)
                    children.extend(new_children)

            if not children:
                break

        return json.dumps(self.message)

Source File: servers.py From Pyro5 with MIT License

6 votes

def count(self, lines):
        # use the name server's prefix lookup to get all registered wordcounters
        with locate_ns() as ns:
            all_counters = ns.list(prefix="example.dc2.wordcount.")

        # chop the text into chunks that can be distributed across the workers
        # uses futures so that it runs the counts in parallel
        # counter is selected in a round-robin fashion from list of all available counters
        with futures.ThreadPoolExecutor() as pool:
            roundrobin_counters = cycle(all_counters.values())
            tasks = []
            for chunk in grouper(200, lines):
                tasks.append(pool.submit(self.count_chunk, next(roundrobin_counters), chunk))

            # gather the results
            print("Collecting %d results (counted in parallel)..." % len(tasks))
            totals = Counter()
            for task in futures.as_completed(tasks):
                try:
                    totals.update(task.result())
                except Pyro5.errors.CommunicationError as x:
                    raise Pyro5.errors.PyroError("Something went wrong in the server when collecting the responses: "+str(x))
            return totals

Source File: takeover.py From takeover with MIT License

6 votes

def runner(k):
        threadpool = thread.ThreadPoolExecutor(max_workers=k.get('threads'))
        if k.get('verbose'):
            info('Set %s threads..'%k.get('threads'))
        futures = (threadpool.submit(requester,domain,k.get("proxy"),k.get("timeout"),
                k.get("output"),k.get('process'),k.get('verbose')) for domain in k.get("domains"))
        for i,results in enumerate(thread.as_completed(futures)):
            if k.get('verbose') and k.get('d_list'):
                str_ = "{i}{b:.2f}% Domain: {d}".format(
                    i=_info(),
                    b=PERCENT(int(i),
                        int(k.get('dict_len'))),d=k.get('domains')[i]
                    )
                print_(str_)
            else:
                info('Domain: {}'.format(k.get('domains')[i]))
            pass

Source File: osc_uploader.py From upload-scripts with MIT License

6 votes

def _visual_items_upload_with_operation(self, sequence, visual_item_upload_operation):
        items_to_upload = []
        for visual_item in sequence.visual_items:
            if str(visual_item.index) not in sequence.progress:
                items_to_upload.append(visual_item)

        with THREAD_LOCK:
            self.manager.progress_bar.update(len(sequence.visual_items) - len(items_to_upload))

        with ThreadPoolExecutor(max_workers=self.workers) as executor:
            future_events = [executor.submit(visual_item_upload_operation.upload,
                                             visual_item) for visual_item in items_to_upload]
            for completed_event in as_completed(future_events):
                uploaded, index = completed_event.result()
                with THREAD_LOCK:
                    if uploaded:
                        self.__persist_upload_index(index, sequence.path)
                        sequence.progress.append(index)
                    self.manager.progress_bar.update(1)

Source File: test_ddl.py From ibis with Apache License 2.0

6 votes

def test_temp_table_concurrency(con, test_data_dir):
    # we don't install futures on windows in CI and we can't run this test
    # there anyway so we import here
    import concurrent.futures
    from concurrent.futures import as_completed

    def limit_10(i, hdfs_path):
        t = con.parquet_file(hdfs_path)
        return t.sort_by(t.r_regionkey).limit(1, offset=i).execute()

    nthreads = 4
    hdfs_path = pjoin(test_data_dir, 'parquet/tpch_region')

    with concurrent.futures.ThreadPoolExecutor(max_workers=nthreads) as e:
        futures = [e.submit(limit_10, i, hdfs_path) for i in range(nthreads)]
    assert all(map(len, (future.result() for future in as_completed(futures))))

Source File: bfile.py From Sitadel with GNU General Public License v3.0

6 votes

def process(self, start_url, crawled_urls):
        self.output.info("Checking common backup files..")
        db = self.datastore.open("bfile.txt", "r")
        dbfiles = [x.strip() for x in db.readlines()]
        db1 = self.datastore.open("cfile.txt", "r")
        dbfiles1 = [x.strip() for x in db1.readlines()]
        urls = []
        for b in dbfiles:
            for d in dbfiles1:
                bdir = b.replace("[name]", d)
                urls.append(urljoin(str(start_url), str(bdir)))
        # We launch ThreadPoolExecutor with max_workers to None to get default optimization
        # https://docs.python.org/3/library/concurrent.futures.html
        with ThreadPoolExecutor(max_workers=None) as executor:
            futures = [executor.submit(self.check_url, url) for url in urls]
            try:
                for future in as_completed(futures):
                    future.result()
            except KeyboardInterrupt:
                executor.shutdown(False)
                raise

Source File: admin.py From Sitadel with GNU General Public License v3.0

6 votes

def process(self, start_url, crawled_urls):
        self.output.info("Checking admin interfaces...")
        with self.datastore.open("admin.txt", "r") as db:
            dbfiles = [x.strip() for x in db.readlines()]
            urls = map(
                lambda adminpath: urljoin(str(start_url), str(adminpath)), dbfiles
            )
            # We launch ThreadPoolExecutor with max_workers to None to get default optimization
            # https://docs.python.org/3/library/concurrent.futures.html
            with ThreadPoolExecutor(max_workers=None) as executor:
                futures = [executor.submit(self.check_url, url) for url in urls]
                try:
                    for future in as_completed(futures):
                        future.result()
                except KeyboardInterrupt:
                    executor.shutdown(False)
                    raise

Source File: federation_cluster.py From FATE with Apache License 2.0

6 votes

def async_get(self, name: str, tag: str, parties: list) -> typing.Generator:
        rubbish = Rubbish(name, tag)
        futures = self._check_get_status_async(name, tag, parties)
        for future in as_completed(futures):
            party = futures[future]
            obj, head, frags = future.result()
            if isinstance(obj, _DTable):
                rubbish.add_table(obj)
                yield (party, obj)
            else:
                table, key = head
                rubbish.add_obj(table, key)
                if not is_split_head(obj):
                    yield (party, obj)
                else:
                    frag_table, frag_keys = frags
                    rubbish.add_table(frag_table)
                    fragments = [frag_table.get(key) for key in frag_keys]
                    yield (party, split_get(fragments))
        yield (None, rubbish)

Source File: test_concurrent_futures.py From Fluid-Designer with GNU General Public License v3.0

6 votes

def test_zero_timeout(self):
        future1 = self.executor.submit(time.sleep, 2)
        completed_futures = set()
        try:
            for future in futures.as_completed(
                    [CANCELLED_AND_NOTIFIED_FUTURE,
                     EXCEPTION_FUTURE,
                     SUCCESSFUL_FUTURE,
                     future1],
                    timeout=0):
                completed_futures.add(future)
        except futures.TimeoutError:
            pass

        self.assertEqual(set([CANCELLED_AND_NOTIFIED_FUTURE,
                              EXCEPTION_FUTURE,
                              SUCCESSFUL_FUTURE]),
                         completed_futures)

Source File: replay_gain.py From linux-show-player with GNU General Public License v3.0

6 votes

def run(self):
        self._running = True

        with ThreadPoolExecutor(max_workers=self.threads) as executor:
            for file in self.files.keys():
                gain = GstGain(file, self.ref_level)
                self._futures[executor.submit(gain.gain)] = gain

            for future in futures_completed(self._futures):
                if self._running:
                    try:
                        self._post_process(*future.result())
                    except Exception:
                        # Call with the value stored in the GstGain object
                        self._post_process(*self._futures[future].result)
                else:
                    break

        if self._running:
            MainActionsHandler.do_action(self._action)
        else:
            logging.info('REPLY-GAIN:: Stopped by user')

        self.on_progress.emit(-1)
        self.on_progress.disconnect()

Source File: main.py From topcoder-dl with GNU General Public License v3.0

6 votes

def fetch(self):
        try:
            if not os.path.exists(self.target_dir):
                os.mkdir(self.target_dir)
        except Exception as e:
            print(e)
        self.page = urllib2.urlopen(self.base_url)
        self.data = BeautifulSoup(self.page.read(), "lxml")
        if not self.flag:
            table = self.data.findAll("table")[0]
            all_a = table.findAll("a")
            member_a = table.findAll("a", class_="tc_coder coder")
            all_set = set(all_a)
            member_set = set(member_a)
            post = list(set(all_set).difference(member_set))
        else:
            post = [self.base_url]

        with ThreadPoolExecutor(max_workers=4) as executor:
            future_to_url = {
                executor.submit(self.download, url): url for url in post}
            for future in as_completed(future_to_url):
                url = future_to_url[future]

Source File: test_concurrent_futures.py From ironpython3 with Apache License 2.0

6 votes

def test_zero_timeout(self):
        future1 = self.executor.submit(time.sleep, 2)
        completed_futures = set()
        try:
            for future in futures.as_completed(
                    [CANCELLED_AND_NOTIFIED_FUTURE,
                     EXCEPTION_FUTURE,
                     SUCCESSFUL_FUTURE,
                     future1],
                    timeout=0):
                completed_futures.add(future)
        except futures.TimeoutError:
            pass

        self.assertEqual(set([CANCELLED_AND_NOTIFIED_FUTURE,
                              EXCEPTION_FUTURE,
                              SUCCESSFUL_FUTURE]),
                         completed_futures)

Source File: ldap.py From Sitadel with GNU General Public License v3.0

6 votes

def process(self, start_url, crawled_urls):
        self.output.info("Checking ldap injection...")
        db = self.datastore.open("ldap.txt", "r")
        dbfiles = [x.strip() for x in db]

        for payload in dbfiles:
            with ThreadPoolExecutor(max_workers=None) as executor:
                futures = [
                    executor.submit(self.attack, payload, url) for url in crawled_urls
                ]
        try:
            for future in as_completed(futures):
                future.result()
        except KeyboardInterrupt:
            executor.shutdown(False)
            raise

Source File: downloader.py From PyCon-Mobile-App with GNU General Public License v3.0

6 votes

def _check_executor(self, dt):
        start = time()
        try:
            for future in as_completed(self._futures[:], 0):
                self._futures.remove(future)
                try:
                    result = future.result()
                except Exception:
                    traceback.print_exc()
                    # make an error tile?
                    continue
                if result is None:
                    continue
                callback, args = result
                callback(*args)

                # capped executor in time, in order to prevent too much
                # slowiness.
                # seems to works quite great with big zoom-in/out
                if time() - start > self.cap_time:
                    break
        except TimeoutError:
            pass

Source File: algorithms_distances.py From struc2vec with MIT License

5 votes

def exec_bfs(G,workers,calcUntilLayer):

    futures = {}
    degreeList = {}

    t0 = time()
    vertices = G.keys()
    parts = workers
    chunks = partition(vertices,parts)

    with ProcessPoolExecutor(max_workers=workers) as executor:

        part = 1
        for c in chunks:
            job = executor.submit(getDegreeListsVertices,G,c,calcUntilLayer)
            futures[job] = part
            part += 1

        for job in as_completed(futures):
            dl = job.result()
            v = futures[job]
            degreeList.update(dl)

    logging.info("Saving degreeList on disk...")
    saveVariableOnDisk(degreeList,'degreeList')
    t1 = time()
    logging.info('Execution time - BFS: {}m'.format((t1-t0)/60))


    return

Source File: dir.py From Sitadel with GNU General Public License v3.0

5 votes

def process(self, start_url, crawled_urls):
        self.output.info("Checking common dirs..")
        with self.datastore.open("cdir.txt", "r") as db:
            dbfiles = [x.strip() for x in db.readlines()]
            urls = map(lambda d: urljoin(str(start_url), str(d)), dbfiles)
            # We launch ThreadPoolExecutor with max_workers to None to get default optimization
            # https://docs.python.org/3/library/concurrent.futures.html
            with ThreadPoolExecutor(max_workers=None) as executor:
                futures = [executor.submit(self.check_url, url) for url in urls]
                try:
                    for future in as_completed(futures):
                        future.result()
                except KeyboardInterrupt:
                    executor.shutdown(False)
                    raise

Source File: backdoor.py From Sitadel with GNU General Public License v3.0

5 votes

def process(self, start_url, crawled_urls):
        self.output.info("Checking common backdoors...")
        with self.datastore.open("backdoor.txt", "r") as db:
            dbfiles = [x.strip() for x in db.readlines()]
            urls = map(lambda backdoor: urljoin(str(start_url), str(backdoor)), dbfiles)
            # We launch ThreadPoolExecutor with max_workers to None to get default optimization
            # https://docs.python.org/3/library/concurrent.futures.html
            with ThreadPoolExecutor(max_workers=None) as executor:
                futures = [executor.submit(self.check_url, url) for url in urls]
                try:
                    for future in as_completed(futures):
                        future.result()
                except KeyboardInterrupt:
                    executor.shutdown(False)
                    raise

Source File: client_graphics.py From Pyro5 with MIT License

5 votes

def draw_results(self):
        for task in futures.as_completed(self.tasks):
            y, pixeldata = task.result()
            self.img.put(pixeldata, (0, y))
            self.root.update()
        duration = time.time() - self.start_time
        print("Calculation took: %.2f seconds" % duration)

Source File: log.py From Sitadel with GNU General Public License v3.0

5 votes

def process(self, start_url, crawled_urls):
        self.output.info("Checking common log files..")
        with self.datastore.open("log.txt", "r") as db:
            dbfiles = [x.strip() for x in db.readlines()]
            urls = map(lambda log: urljoin(str(start_url), str(log)), dbfiles)
            # We launch ThreadPoolExecutor with max_workers to None to get default optimization
            # https://docs.python.org/3/library/concurrent.futures.html
            with ThreadPoolExecutor(max_workers=None) as executor:
                futures = [executor.submit(self.check_url, url) for url in urls]
                try:
                    for future in as_completed(futures):
                        future.result()
                except KeyboardInterrupt:
                    executor.shutdown(False)
                    raise

Source File: file.py From Sitadel with GNU General Public License v3.0

5 votes

def process(self, start_url, crawled_urls):
        self.output.info("Checking common files...")
        with self.datastore.open("cfile.txt", "r") as db:
            dbfiles = [x.strip() for x in db.readlines()]
            urls = map(lambda filex: urljoin(str(start_url), str(filex)), dbfiles)
            # We launch ThreadPoolExecutor with max_workers to None to get default optimization
            # https://docs.python.org/3/library/concurrent.futures.html
            with ThreadPoolExecutor(max_workers=None) as executor:
                futures = [executor.submit(self.check_url, url) for url in urls]
                try:
                    for future in as_completed(futures):
                        future.result()
                except KeyboardInterrupt:
                    executor.shutdown(False)
                    raise

Source File: test_fix_point.py From FATE with Apache License 2.0

5 votes

def submit(func, *args, **kwargs):
    with ProcessPoolExecutor() as pool:
        num = NUM_HOSTS + 1
        result = [None] * num
        futures = {}
        for _idx in range(num):
            kv = kwargs.copy()
            kv["idx"] = _idx
            futures[pool.submit(func, *args, **kv)] = _idx
        for future in as_completed(futures):
            result[futures[future]] = future.result()
        return result

Source File: getgroups-esi.py From yamlloader with MIT License

5 votes

def getgroups(grouplist):
    groupfuture=[]
    print "getgroups"
    for groupid in grouplist:
        if isinstance(groupid,basestring) and groupid.startswith("https"):
            groupfuture.append(session.get(str(groupid)))
        else:
            groupfuture.append(session.get(grouplookupurl.format(groupid)))
    badlist=[]
    pbar = tqdm(total=len(grouplist))
    for groupdata in as_completed(groupfuture):
        if groupdata.result().status_code==200:
            itemjson=groupdata.result().json()
            item=itemjson.get('group_id')
            if int(item) in sdegrouplist:
                try:
                    connection.execute(invGroups.update().where(invGroups.c.groupID == literal_column(str(item))),
                               groupID=item,
                               groupName=itemjson['name'],
                               categoryID=itemjson.get('category_id',None),
                               published=itemjson.get('published',False),
                               )
                except:
                    pass
            else:
                    connection.execute(invGroups.insert(),
                               groupID=item,
                               groupName=itemjson['name'],
                               categoryID=itemjson.get('category_id',None),
                               published=itemjson.get('published',False),
                                )
        else:
            badlist.append(groupdata.result().url)
            print groupdata.result().url
        pbar.update(1)
    return badlist

Source File: algorithms.py From struc2vec with MIT License

5 votes

def generate_random_walks(num_walks,walk_length,workers,vertices):

    logging.info('Loading distances_nets on disk...')

    graphs = restoreVariableFromDisk('distances_nets_graphs')
    alias_method_j = restoreVariableFromDisk('nets_weights_alias_method_j')
    alias_method_q = restoreVariableFromDisk('nets_weights_alias_method_q')
    amount_neighbours = restoreVariableFromDisk('amount_neighbours')

    logging.info('Creating RWs...')
    t0 = time()
    
    walks = deque()
    initialLayer = 0

    if(workers > num_walks):
        workers = num_walks

    with ProcessPoolExecutor(max_workers=workers) as executor:
        futures = {}
        for walk_iter in range(num_walks):
            random.shuffle(vertices)
            job = executor.submit(exec_ramdom_walks_for_chunck,vertices,graphs,alias_method_j,alias_method_q,walk_length,amount_neighbours)
            futures[job] = walk_iter
            #part += 1
        logging.info("Receiving results...")
        for job in as_completed(futures):
            walk = job.result()
            r = futures[job]
            logging.info("Iteration {} executed.".format(r))
            walks.extend(walk)
            del futures[job]


    t1 = time()
    logging.info('RWs created. Time: {}m'.format((t1-t0)/60))
    logging.info("Saving Random Walks on disk...")
    save_random_walks(walks)

Source File: algorithms_distances.py From struc2vec with MIT License

5 votes

def exec_bfs_compact(G,workers,calcUntilLayer):

    futures = {}
    degreeList = {}

    t0 = time()
    vertices = G.keys()
    parts = workers
    chunks = partition(vertices,parts)

    logging.info('Capturing larger degree...')
    maxDegree = 0
    for v in vertices:
        if(len(G[v]) > maxDegree):
            maxDegree = len(G[v])
    logging.info('Larger degree captured')

    with ProcessPoolExecutor(max_workers=workers) as executor:

        part = 1
        for c in chunks:
            job = executor.submit(getCompactDegreeListsVertices,G,c,maxDegree,calcUntilLayer)
            futures[job] = part
            part += 1

        for job in as_completed(futures):
            dl = job.result()
            v = futures[job]
            degreeList.update(dl)

    logging.info("Saving degreeList on disk...")
    saveVariableOnDisk(degreeList,'compactDegreeList')
    t1 = time()
    logging.info('Execution time - BFS: {}m'.format((t1-t0)/60))


    return

Source File: xss.py From Sitadel with GNU General Public License v3.0

5 votes

def process(self, start_url, crawled_urls):
        db = self.datastore.open("xss.txt", "r")
        dbfiles = [x.split("\n") for x in db]
        self.output.info("Checking cross site scripting...")
        for payload in dbfiles:
            with ThreadPoolExecutor(max_workers=None) as executor:
                futures = [
                    executor.submit(self.attack, payload, url) for url in crawled_urls
                ]
        try:
            for future in as_completed(futures):
                future.result()
        except KeyboardInterrupt:
            executor.shutdown(False)
            raise

Source File: clean_dataset.py From nima.pytorch with MIT License

5 votes

def remove_all_not_found_image(df: pd.DataFrame, path_to_images: Path, num_workers: int) -> pd.DataFrame:
    futures = []
    results = []
    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        for df_batch in np.array_split(df, num_workers):
            future = executor.submit(_remove_all_not_found_image, df=df_batch, path_to_images=path_to_images)
            futures.append(future)
        for future in tqdm(as_completed(futures), total=len(futures)):
            results.append(future.result())
    new_df = pd.concat(results)
    return new_df

Python concurrent.futures.as_completed() Examples