Python concurrent.futures.as_completed() Examples

The following are 30 code examples of concurrent.futures.as_completed(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module concurrent.futures , or try the search function .
Example #1
Source File: ilsvrc_det.py    From gluon-cv with Apache License 2.0 8 votes vote down vote up
def par_crop(args):
    """
    Dataset curation,crop data and transform the format of a label
    """
    crop_path = os.path.join(args.download_dir, './crop{:d}'.format(args.instance_size))
    if not os.path.isdir(crop_path): makedirs(crop_path)
    VID_base_path = os.path.join(args.download_dir, './ILSVRC')
    ann_base_path = os.path.join(VID_base_path, 'Annotations/DET/train/')
    sub_sets = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i')
    for sub_set in sub_sets:
        sub_set_base_path = os.path.join(ann_base_path, sub_set)
        if 'a' == sub_set:
            xmls = sorted(glob.glob(os.path.join(sub_set_base_path, '*', '*.xml')))
        else:
            xmls = sorted(glob.glob(os.path.join(sub_set_base_path, '*.xml')))
        n_imgs = len(xmls)
        sub_set_crop_path = os.path.join(crop_path, sub_set)
        with futures.ProcessPoolExecutor(max_workers=args.num_threads) as executor:
            fs = [executor.submit(crop_xml, args, xml, sub_set_crop_path, args.instance_size) for xml in xmls]
            for i, f in enumerate(futures.as_completed(fs)):
                printProgress(i, n_imgs, prefix=sub_set, suffix='Done ', barLength=80) 
Example #2
Source File: ilsvrc_vid.py    From gluon-cv with Apache License 2.0 7 votes vote down vote up
def par_crop(args, ann_base_path):
    """
    Dataset curation, crop data and transform the format of label
    Parameters
    ----------
    ann_base_path: str, Annotations base path
    """
    crop_path = os.path.join(args.download_dir, './crop{:d}'.format(int(args.instance_size)))
    if not os.path.isdir(crop_path):
        makedirs(crop_path)
    sub_sets = sorted({'a', 'b', 'c', 'd', 'e'})
    for sub_set in sub_sets:
        sub_set_base_path = os.path.join(ann_base_path, sub_set)
        videos = sorted(os.listdir(sub_set_base_path))
        n_videos = len(videos)
        with futures.ProcessPoolExecutor(max_workers=args.num_threads) as executor:
            fs = [executor.submit(crop_video, args, sub_set, video, crop_path, ann_base_path) for video in videos]
            for i, f in enumerate(futures.as_completed(fs)):
                # Write progress to error so that it can be seen
                printProgress(i, n_videos, prefix=sub_set, suffix='Done ', barLength=40) 
Example #3
Source File: flags_threadpool_ac.py    From concurrency2017 with MIT License 7 votes vote down vote up
def download_many(cc_list):
    cc_list = cc_list[:5]  # <1>
    with futures.ThreadPoolExecutor(max_workers=3) as executor:  # <2>
        to_do = []
        for cc in sorted(cc_list):  # <3>
            future = executor.submit(download_one, cc)  # <4>
            to_do.append(future)  # <5>
            msg = 'Scheduled for {}: {}'
            print(msg.format(cc, future))  # <6>

        results = []
        for future in futures.as_completed(to_do):  # <7>
            res = future.result()  # <8>
            msg = '{} result: {!r}'
            print(msg.format(future, res))  # <9>
            results.append(res)

    return len(results) 
Example #4
Source File: chromeboy.py    From falsy with MIT License 6 votes vote down vote up
def run(self, data, max=4):
        results = []
        with futures.ThreadPoolExecutor(max_workers=max) as executor:
            future_to_url = {}
            for i, payload in enumerate(data):
                payload['chrome_id'] = i
                future_to_url[executor.submit(self.run1, payload)] = payload
                # future_to_url[executor.submit(self.run1_core, payload, browser, begin_time)] = payload
            for future in futures.as_completed(future_to_url):
                url = future_to_url[future]
                try:
                    data = future.result()
                except Exception as exc:
                    print('%r generated an exception: %s' % (url, exc))
                else:
                    data['chrome_id'] = url['chrome_id']
                    results.append(data)

        sorted_results = sorted(results, key=lambda tup: tup['chrome_id'])
        return sorted_results 
Example #5
Source File: stac_validator.py    From stac-validator with Apache License 2.0 6 votes vote down vote up
def run(self, concurrent=10):
        """
        Entry point.
        :param concurrent: number of threads to use
        :return: message json
        """

        children = [self.stac_file]
        logger.info(f"Using {concurrent} threads")
        while True:
            with futures.ThreadPoolExecutor(max_workers=int(concurrent)) as executor:
                future_tasks = [executor.submit(self._validate, url) for url in children]
                children = []
                for task in futures.as_completed(future_tasks):
                    message, status, new_children = task.result()
                    self.status = self._update_status(self.status, status)
                    self.message.append(message)
                    children.extend(new_children)

            if not children:
                break

        return json.dumps(self.message) 
Example #6
Source File: servers.py    From Pyro5 with MIT License 6 votes vote down vote up
def count(self, lines):
        # use the name server's prefix lookup to get all registered wordcounters
        with locate_ns() as ns:
            all_counters = ns.list(prefix="example.dc2.wordcount.")

        # chop the text into chunks that can be distributed across the workers
        # uses futures so that it runs the counts in parallel
        # counter is selected in a round-robin fashion from list of all available counters
        with futures.ThreadPoolExecutor() as pool:
            roundrobin_counters = cycle(all_counters.values())
            tasks = []
            for chunk in grouper(200, lines):
                tasks.append(pool.submit(self.count_chunk, next(roundrobin_counters), chunk))

            # gather the results
            print("Collecting %d results (counted in parallel)..." % len(tasks))
            totals = Counter()
            for task in futures.as_completed(tasks):
                try:
                    totals.update(task.result())
                except Pyro5.errors.CommunicationError as x:
                    raise Pyro5.errors.PyroError("Something went wrong in the server when collecting the responses: "+str(x))
            return totals 
Example #7
Source File: takeover.py    From takeover with MIT License 6 votes vote down vote up
def runner(k):
        threadpool = thread.ThreadPoolExecutor(max_workers=k.get('threads'))
        if k.get('verbose'):
            info('Set %s threads..'%k.get('threads'))
        futures = (threadpool.submit(requester,domain,k.get("proxy"),k.get("timeout"),
                k.get("output"),k.get('process'),k.get('verbose')) for domain in k.get("domains"))
        for i,results in enumerate(thread.as_completed(futures)):
            if k.get('verbose') and k.get('d_list'):
                str_ = "{i}{b:.2f}% Domain: {d}".format(
                    i=_info(),
                    b=PERCENT(int(i),
                        int(k.get('dict_len'))),d=k.get('domains')[i]
                    )
                print_(str_)
            else:
                info('Domain: {}'.format(k.get('domains')[i]))
            pass 
Example #8
Source File: osc_uploader.py    From upload-scripts with MIT License 6 votes vote down vote up
def _visual_items_upload_with_operation(self, sequence, visual_item_upload_operation):
        items_to_upload = []
        for visual_item in sequence.visual_items:
            if str(visual_item.index) not in sequence.progress:
                items_to_upload.append(visual_item)

        with THREAD_LOCK:
            self.manager.progress_bar.update(len(sequence.visual_items) - len(items_to_upload))

        with ThreadPoolExecutor(max_workers=self.workers) as executor:
            future_events = [executor.submit(visual_item_upload_operation.upload,
                                             visual_item) for visual_item in items_to_upload]
            for completed_event in as_completed(future_events):
                uploaded, index = completed_event.result()
                with THREAD_LOCK:
                    if uploaded:
                        self.__persist_upload_index(index, sequence.path)
                        sequence.progress.append(index)
                    self.manager.progress_bar.update(1) 
Example #9
Source File: test_ddl.py    From ibis with Apache License 2.0 6 votes vote down vote up
def test_temp_table_concurrency(con, test_data_dir):
    # we don't install futures on windows in CI and we can't run this test
    # there anyway so we import here
    import concurrent.futures
    from concurrent.futures import as_completed

    def limit_10(i, hdfs_path):
        t = con.parquet_file(hdfs_path)
        return t.sort_by(t.r_regionkey).limit(1, offset=i).execute()

    nthreads = 4
    hdfs_path = pjoin(test_data_dir, 'parquet/tpch_region')

    with concurrent.futures.ThreadPoolExecutor(max_workers=nthreads) as e:
        futures = [e.submit(limit_10, i, hdfs_path) for i in range(nthreads)]
    assert all(map(len, (future.result() for future in as_completed(futures)))) 
Example #10
Source File: bfile.py    From Sitadel with GNU General Public License v3.0 6 votes vote down vote up
def process(self, start_url, crawled_urls):
        self.output.info("Checking common backup files..")
        db = self.datastore.open("bfile.txt", "r")
        dbfiles = [x.strip() for x in db.readlines()]
        db1 = self.datastore.open("cfile.txt", "r")
        dbfiles1 = [x.strip() for x in db1.readlines()]
        urls = []
        for b in dbfiles:
            for d in dbfiles1:
                bdir = b.replace("[name]", d)
                urls.append(urljoin(str(start_url), str(bdir)))
        # We launch ThreadPoolExecutor with max_workers to None to get default optimization
        # https://docs.python.org/3/library/concurrent.futures.html
        with ThreadPoolExecutor(max_workers=None) as executor:
            futures = [executor.submit(self.check_url, url) for url in urls]
            try:
                for future in as_completed(futures):
                    future.result()
            except KeyboardInterrupt:
                executor.shutdown(False)
                raise 
Example #11
Source File: admin.py    From Sitadel with GNU General Public License v3.0 6 votes vote down vote up
def process(self, start_url, crawled_urls):
        self.output.info("Checking admin interfaces...")
        with self.datastore.open("admin.txt", "r") as db:
            dbfiles = [x.strip() for x in db.readlines()]
            urls = map(
                lambda adminpath: urljoin(str(start_url), str(adminpath)), dbfiles
            )
            # We launch ThreadPoolExecutor with max_workers to None to get default optimization
            # https://docs.python.org/3/library/concurrent.futures.html
            with ThreadPoolExecutor(max_workers=None) as executor:
                futures = [executor.submit(self.check_url, url) for url in urls]
                try:
                    for future in as_completed(futures):
                        future.result()
                except KeyboardInterrupt:
                    executor.shutdown(False)
                    raise 
Example #12
Source File: federation_cluster.py    From FATE with Apache License 2.0 6 votes vote down vote up
def async_get(self, name: str, tag: str, parties: list) -> typing.Generator:
        rubbish = Rubbish(name, tag)
        futures = self._check_get_status_async(name, tag, parties)
        for future in as_completed(futures):
            party = futures[future]
            obj, head, frags = future.result()
            if isinstance(obj, _DTable):
                rubbish.add_table(obj)
                yield (party, obj)
            else:
                table, key = head
                rubbish.add_obj(table, key)
                if not is_split_head(obj):
                    yield (party, obj)
                else:
                    frag_table, frag_keys = frags
                    rubbish.add_table(frag_table)
                    fragments = [frag_table.get(key) for key in frag_keys]
                    yield (party, split_get(fragments))
        yield (None, rubbish) 
Example #13
Source File: test_concurrent_futures.py    From Fluid-Designer with GNU General Public License v3.0 6 votes vote down vote up
def test_zero_timeout(self):
        future1 = self.executor.submit(time.sleep, 2)
        completed_futures = set()
        try:
            for future in futures.as_completed(
                    [CANCELLED_AND_NOTIFIED_FUTURE,
                     EXCEPTION_FUTURE,
                     SUCCESSFUL_FUTURE,
                     future1],
                    timeout=0):
                completed_futures.add(future)
        except futures.TimeoutError:
            pass

        self.assertEqual(set([CANCELLED_AND_NOTIFIED_FUTURE,
                              EXCEPTION_FUTURE,
                              SUCCESSFUL_FUTURE]),
                         completed_futures) 
Example #14
Source File: replay_gain.py    From linux-show-player with GNU General Public License v3.0 6 votes vote down vote up
def run(self):
        self._running = True

        with ThreadPoolExecutor(max_workers=self.threads) as executor:
            for file in self.files.keys():
                gain = GstGain(file, self.ref_level)
                self._futures[executor.submit(gain.gain)] = gain

            for future in futures_completed(self._futures):
                if self._running:
                    try:
                        self._post_process(*future.result())
                    except Exception:
                        # Call with the value stored in the GstGain object
                        self._post_process(*self._futures[future].result)
                else:
                    break

        if self._running:
            MainActionsHandler.do_action(self._action)
        else:
            logging.info('REPLY-GAIN:: Stopped by user')

        self.on_progress.emit(-1)
        self.on_progress.disconnect() 
Example #15
Source File: main.py    From topcoder-dl with GNU General Public License v3.0 6 votes vote down vote up
def fetch(self):
        try:
            if not os.path.exists(self.target_dir):
                os.mkdir(self.target_dir)
        except Exception as e:
            print(e)
        self.page = urllib2.urlopen(self.base_url)
        self.data = BeautifulSoup(self.page.read(), "lxml")
        if not self.flag:
            table = self.data.findAll("table")[0]
            all_a = table.findAll("a")
            member_a = table.findAll("a", class_="tc_coder coder")
            all_set = set(all_a)
            member_set = set(member_a)
            post = list(set(all_set).difference(member_set))
        else:
            post = [self.base_url]

        with ThreadPoolExecutor(max_workers=4) as executor:
            future_to_url = {
                executor.submit(self.download, url): url for url in post}
            for future in as_completed(future_to_url):
                url = future_to_url[future] 
Example #16
Source File: test_concurrent_futures.py    From ironpython3 with Apache License 2.0 6 votes vote down vote up
def test_zero_timeout(self):
        future1 = self.executor.submit(time.sleep, 2)
        completed_futures = set()
        try:
            for future in futures.as_completed(
                    [CANCELLED_AND_NOTIFIED_FUTURE,
                     EXCEPTION_FUTURE,
                     SUCCESSFUL_FUTURE,
                     future1],
                    timeout=0):
                completed_futures.add(future)
        except futures.TimeoutError:
            pass

        self.assertEqual(set([CANCELLED_AND_NOTIFIED_FUTURE,
                              EXCEPTION_FUTURE,
                              SUCCESSFUL_FUTURE]),
                         completed_futures) 
Example #17
Source File: ldap.py    From Sitadel with GNU General Public License v3.0 6 votes vote down vote up
def process(self, start_url, crawled_urls):
        self.output.info("Checking ldap injection...")
        db = self.datastore.open("ldap.txt", "r")
        dbfiles = [x.strip() for x in db]

        for payload in dbfiles:
            with ThreadPoolExecutor(max_workers=None) as executor:
                futures = [
                    executor.submit(self.attack, payload, url) for url in crawled_urls
                ]
        try:
            for future in as_completed(futures):
                future.result()
        except KeyboardInterrupt:
            executor.shutdown(False)
            raise 
Example #18
Source File: downloader.py    From PyCon-Mobile-App with GNU General Public License v3.0 6 votes vote down vote up
def _check_executor(self, dt):
        start = time()
        try:
            for future in as_completed(self._futures[:], 0):
                self._futures.remove(future)
                try:
                    result = future.result()
                except Exception:
                    traceback.print_exc()
                    # make an error tile?
                    continue
                if result is None:
                    continue
                callback, args = result
                callback(*args)

                # capped executor in time, in order to prevent too much
                # slowiness.
                # seems to works quite great with big zoom-in/out
                if time() - start > self.cap_time:
                    break
        except TimeoutError:
            pass 
Example #19
Source File: algorithms_distances.py    From struc2vec with MIT License 5 votes vote down vote up
def exec_bfs(G,workers,calcUntilLayer):

    futures = {}
    degreeList = {}

    t0 = time()
    vertices = G.keys()
    parts = workers
    chunks = partition(vertices,parts)

    with ProcessPoolExecutor(max_workers=workers) as executor:

        part = 1
        for c in chunks:
            job = executor.submit(getDegreeListsVertices,G,c,calcUntilLayer)
            futures[job] = part
            part += 1

        for job in as_completed(futures):
            dl = job.result()
            v = futures[job]
            degreeList.update(dl)

    logging.info("Saving degreeList on disk...")
    saveVariableOnDisk(degreeList,'degreeList')
    t1 = time()
    logging.info('Execution time - BFS: {}m'.format((t1-t0)/60))


    return 
Example #20
Source File: dir.py    From Sitadel with GNU General Public License v3.0 5 votes vote down vote up
def process(self, start_url, crawled_urls):
        self.output.info("Checking common dirs..")
        with self.datastore.open("cdir.txt", "r") as db:
            dbfiles = [x.strip() for x in db.readlines()]
            urls = map(lambda d: urljoin(str(start_url), str(d)), dbfiles)
            # We launch ThreadPoolExecutor with max_workers to None to get default optimization
            # https://docs.python.org/3/library/concurrent.futures.html
            with ThreadPoolExecutor(max_workers=None) as executor:
                futures = [executor.submit(self.check_url, url) for url in urls]
                try:
                    for future in as_completed(futures):
                        future.result()
                except KeyboardInterrupt:
                    executor.shutdown(False)
                    raise 
Example #21
Source File: backdoor.py    From Sitadel with GNU General Public License v3.0 5 votes vote down vote up
def process(self, start_url, crawled_urls):
        self.output.info("Checking common backdoors...")
        with self.datastore.open("backdoor.txt", "r") as db:
            dbfiles = [x.strip() for x in db.readlines()]
            urls = map(lambda backdoor: urljoin(str(start_url), str(backdoor)), dbfiles)
            # We launch ThreadPoolExecutor with max_workers to None to get default optimization
            # https://docs.python.org/3/library/concurrent.futures.html
            with ThreadPoolExecutor(max_workers=None) as executor:
                futures = [executor.submit(self.check_url, url) for url in urls]
                try:
                    for future in as_completed(futures):
                        future.result()
                except KeyboardInterrupt:
                    executor.shutdown(False)
                    raise 
Example #22
Source File: client_graphics.py    From Pyro5 with MIT License 5 votes vote down vote up
def draw_results(self):
        for task in futures.as_completed(self.tasks):
            y, pixeldata = task.result()
            self.img.put(pixeldata, (0, y))
            self.root.update()
        duration = time.time() - self.start_time
        print("Calculation took: %.2f seconds" % duration) 
Example #23
Source File: log.py    From Sitadel with GNU General Public License v3.0 5 votes vote down vote up
def process(self, start_url, crawled_urls):
        self.output.info("Checking common log files..")
        with self.datastore.open("log.txt", "r") as db:
            dbfiles = [x.strip() for x in db.readlines()]
            urls = map(lambda log: urljoin(str(start_url), str(log)), dbfiles)
            # We launch ThreadPoolExecutor with max_workers to None to get default optimization
            # https://docs.python.org/3/library/concurrent.futures.html
            with ThreadPoolExecutor(max_workers=None) as executor:
                futures = [executor.submit(self.check_url, url) for url in urls]
                try:
                    for future in as_completed(futures):
                        future.result()
                except KeyboardInterrupt:
                    executor.shutdown(False)
                    raise 
Example #24
Source File: file.py    From Sitadel with GNU General Public License v3.0 5 votes vote down vote up
def process(self, start_url, crawled_urls):
        self.output.info("Checking common files...")
        with self.datastore.open("cfile.txt", "r") as db:
            dbfiles = [x.strip() for x in db.readlines()]
            urls = map(lambda filex: urljoin(str(start_url), str(filex)), dbfiles)
            # We launch ThreadPoolExecutor with max_workers to None to get default optimization
            # https://docs.python.org/3/library/concurrent.futures.html
            with ThreadPoolExecutor(max_workers=None) as executor:
                futures = [executor.submit(self.check_url, url) for url in urls]
                try:
                    for future in as_completed(futures):
                        future.result()
                except KeyboardInterrupt:
                    executor.shutdown(False)
                    raise 
Example #25
Source File: test_fix_point.py    From FATE with Apache License 2.0 5 votes vote down vote up
def submit(func, *args, **kwargs):
    with ProcessPoolExecutor() as pool:
        num = NUM_HOSTS + 1
        result = [None] * num
        futures = {}
        for _idx in range(num):
            kv = kwargs.copy()
            kv["idx"] = _idx
            futures[pool.submit(func, *args, **kv)] = _idx
        for future in as_completed(futures):
            result[futures[future]] = future.result()
        return result 
Example #26
Source File: getgroups-esi.py    From yamlloader with MIT License 5 votes vote down vote up
def getgroups(grouplist):
    groupfuture=[]
    print "getgroups"
    for groupid in grouplist:
        if isinstance(groupid,basestring) and groupid.startswith("https"):
            groupfuture.append(session.get(str(groupid)))
        else:
            groupfuture.append(session.get(grouplookupurl.format(groupid)))
    badlist=[]
    pbar = tqdm(total=len(grouplist))
    for groupdata in as_completed(groupfuture):
        if groupdata.result().status_code==200:
            itemjson=groupdata.result().json()
            item=itemjson.get('group_id')
            if int(item) in sdegrouplist:
                try:
                    connection.execute(invGroups.update().where(invGroups.c.groupID == literal_column(str(item))),
                               groupID=item,
                               groupName=itemjson['name'],
                               categoryID=itemjson.get('category_id',None),
                               published=itemjson.get('published',False),
                               )
                except:
                    pass
            else:
                    connection.execute(invGroups.insert(),
                               groupID=item,
                               groupName=itemjson['name'],
                               categoryID=itemjson.get('category_id',None),
                               published=itemjson.get('published',False),
                                )
        else:
            badlist.append(groupdata.result().url)
            print groupdata.result().url
        pbar.update(1)
    return badlist 
Example #27
Source File: algorithms.py    From struc2vec with MIT License 5 votes vote down vote up
def generate_random_walks(num_walks,walk_length,workers,vertices):

    logging.info('Loading distances_nets on disk...')

    graphs = restoreVariableFromDisk('distances_nets_graphs')
    alias_method_j = restoreVariableFromDisk('nets_weights_alias_method_j')
    alias_method_q = restoreVariableFromDisk('nets_weights_alias_method_q')
    amount_neighbours = restoreVariableFromDisk('amount_neighbours')

    logging.info('Creating RWs...')
    t0 = time()
    
    walks = deque()
    initialLayer = 0

    if(workers > num_walks):
        workers = num_walks

    with ProcessPoolExecutor(max_workers=workers) as executor:
        futures = {}
        for walk_iter in range(num_walks):
            random.shuffle(vertices)
            job = executor.submit(exec_ramdom_walks_for_chunck,vertices,graphs,alias_method_j,alias_method_q,walk_length,amount_neighbours)
            futures[job] = walk_iter
            #part += 1
        logging.info("Receiving results...")
        for job in as_completed(futures):
            walk = job.result()
            r = futures[job]
            logging.info("Iteration {} executed.".format(r))
            walks.extend(walk)
            del futures[job]


    t1 = time()
    logging.info('RWs created. Time: {}m'.format((t1-t0)/60))
    logging.info("Saving Random Walks on disk...")
    save_random_walks(walks) 
Example #28
Source File: algorithms_distances.py    From struc2vec with MIT License 5 votes vote down vote up
def exec_bfs_compact(G,workers,calcUntilLayer):

    futures = {}
    degreeList = {}

    t0 = time()
    vertices = G.keys()
    parts = workers
    chunks = partition(vertices,parts)

    logging.info('Capturing larger degree...')
    maxDegree = 0
    for v in vertices:
        if(len(G[v]) > maxDegree):
            maxDegree = len(G[v])
    logging.info('Larger degree captured')

    with ProcessPoolExecutor(max_workers=workers) as executor:

        part = 1
        for c in chunks:
            job = executor.submit(getCompactDegreeListsVertices,G,c,maxDegree,calcUntilLayer)
            futures[job] = part
            part += 1

        for job in as_completed(futures):
            dl = job.result()
            v = futures[job]
            degreeList.update(dl)

    logging.info("Saving degreeList on disk...")
    saveVariableOnDisk(degreeList,'compactDegreeList')
    t1 = time()
    logging.info('Execution time - BFS: {}m'.format((t1-t0)/60))


    return 
Example #29
Source File: xss.py    From Sitadel with GNU General Public License v3.0 5 votes vote down vote up
def process(self, start_url, crawled_urls):
        db = self.datastore.open("xss.txt", "r")
        dbfiles = [x.split("\n") for x in db]
        self.output.info("Checking cross site scripting...")
        for payload in dbfiles:
            with ThreadPoolExecutor(max_workers=None) as executor:
                futures = [
                    executor.submit(self.attack, payload, url) for url in crawled_urls
                ]
        try:
            for future in as_completed(futures):
                future.result()
        except KeyboardInterrupt:
            executor.shutdown(False)
            raise 
Example #30
Source File: clean_dataset.py    From nima.pytorch with MIT License 5 votes vote down vote up
def remove_all_not_found_image(df: pd.DataFrame, path_to_images: Path, num_workers: int) -> pd.DataFrame:
    futures = []
    results = []
    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        for df_batch in np.array_split(df, num_workers):
            future = executor.submit(_remove_all_not_found_image, df=df_batch, path_to_images=path_to_images)
            futures.append(future)
        for future in tqdm(as_completed(futures), total=len(futures)):
            results.append(future.result())
    new_df = pd.concat(results)
    return new_df