Python Examples of joblib.delayed

Source File: utils.py From nmp_qc with MIT License

8 votes

def get_graph_stats(graph_obj_handle, prop='degrees'):
    # if prop == 'degrees':
    num_cores = multiprocessing.cpu_count()
    inputs = [int(i*len(graph_obj_handle)/num_cores) for i in range(num_cores)] + [len(graph_obj_handle)]
    res = Parallel(n_jobs=num_cores)(delayed(get_values)(graph_obj_handle, inputs[i], inputs[i+1], prop) for i in range(num_cores))

    stat_dict = {}

    if 'degrees' in prop:
        stat_dict['degrees'] = list(set([d for core_res in res for file_res in core_res for d in file_res['degrees']]))
    if 'edge_labels' in prop:
        stat_dict['edge_labels'] = list(set([d for core_res in res for file_res in core_res for d in file_res['edge_labels']]))
    if 'target_mean' in prop or 'target_std' in prop:
        param = np.array([file_res['params'] for core_res in res for file_res in core_res])
    if 'target_mean' in prop:
        stat_dict['target_mean'] = np.mean(param, axis=0)
    if 'target_std' in prop:
        stat_dict['target_std'] = np.std(param, axis=0)

    return stat_dict

Source File: ShotgunClassifier.py From SFA_Python with GNU General Public License v3.0

6 votes

def fitEnsemble(self, normMean, samples, factor):
        minWindowLength = 5
        maxWindowLength = getMax(samples, self.MAX_WINDOW_LENGTH)
        windows = self.getWindowsBetween(minWindowLength, maxWindowLength)
        self.logger.Log("Windows: %s" % windows)

        correctTraining = 0
        self.results = []

        self.logger.Log("%s  Fitting for a norm of %s" % (self.NAME, str(normMean)))
        Parallel(n_jobs=1, backend="threading")(delayed(self.fitIndividual, check_pickle=False)(normMean, samples, windows, i) for i in range(len(windows)))

        # Find best correctTraining
        for i in range(len(self.results)):
            if self.results[i].correct > correctTraining:
                correctTraining = self.results[i].correct

        # Remove Results that are no longer satisfactory
        new_results = []
        for i in range(len(self.results)):
            if self.results[i].correct >= (correctTraining * factor):
                new_results.append(self.results[i])

        return new_results, correctTraining

Source File: batched_inv_joblib.py From content_wmf with MIT License

6 votes

def recompute_factors_batched(Y, S, lambda_reg, W=None, X=None,
                              dtype='float32', batch_size=10000, n_jobs=4):
    m = S.shape[0]  # m = number of users
    f = Y.shape[1]  # f = number of factors

    YTY = np.dot(Y.T, Y)  # precompute this
    YTYpR = YTY + lambda_reg * np.eye(f)
    if W is not None:
        WX = lambda_reg * (X.dot(W)).T
    else:
        WX = None
    X_new = np.zeros((m, f), dtype=dtype)

    num_batches = int(np.ceil(m / float(batch_size)))

    res = Parallel(n_jobs=n_jobs)(delayed(solve_batch)(b, S, Y, WX, YTYpR,
                                                       batch_size, m, f, dtype)
                                  for b in xrange(num_batches))
    X_new = np.concatenate(res, axis=0)

    return X_new

Source File: extract_frame.py From DPC with MIT License

6 votes

def main_kinetics400(v_root, f_root, dim=150):
    print('extracting Kinetics400 ... ')
    for basename in ['train_split', 'val_split']:
        v_root_real = v_root + '/' + basename
        if not os.path.exists(v_root_real):
            print('Wrong v_root'); sys.exit()
        f_root_real = '/scratch/local/ssd/htd/kinetics400/frame_full' + '/' + basename 
        print('Extract to: \nframe: %s' % f_root_real)
        if not os.path.exists(f_root_real): os.makedirs(f_root_real)
        v_act_root = glob.glob(os.path.join(v_root_real, '*/'))
        v_act_root = sorted(v_act_root)

        # if resume, remember to delete the last video folder
        for i, j in tqdm(enumerate(v_act_root), total=len(v_act_root)):
            v_paths = glob.glob(os.path.join(j, '*.mp4'))
            v_paths = sorted(v_paths)
            # for resume:
            v_class = j.split('/')[-2]
            out_dir = os.path.join(f_root_real, v_class)
            if os.path.exists(out_dir): print(out_dir, 'exists!'); continue
            print('extracting: %s' % v_class)
            # dim = 150 (crop to 128 later) or 256 (crop to 224 later)
            Parallel(n_jobs=32)(delayed(extract_video_opencv)(p, f_root_real, dim=dim) for p in tqdm(v_paths, total=len(v_paths)))

Source File: __init__.py From s3tk with MIT License

6 votes

def parallelize(bucket, only, _except, fn, args=(), versions=False):
    bucket = s3().Bucket(bucket)

    # use prefix for performance
    prefix = None
    if only:
        # get the first prefix before wildcard
        prefix = '/'.join(only.split('*')[0].split('/')[:-1])
        if prefix:
            prefix = prefix + '/'

    if versions:
        object_versions = bucket.object_versions.filter(Prefix=prefix) if prefix else bucket.object_versions.all()
        # delete markers have no size
        return Parallel(n_jobs=24)(delayed(fn)(bucket.name, ov.object_key, ov.id, *args) for ov in object_versions if object_matches(ov.object_key, only, _except) and not ov.is_latest and ov.size is not None)
    else:
        objects = bucket.objects.filter(Prefix=prefix) if prefix else bucket.objects.all()

        if only and not '*' in only:
            objects = [s3().Object(bucket, only)]

        return Parallel(n_jobs=24)(delayed(fn)(bucket.name, os.key, *args) for os in objects if object_matches(os.key, only, _except))

Source File: convert_videos.py From dmc-net with MIT License

6 votes

def convert_video_wapper(src_videos, 
                         dst_videos, 
                         cmd_format,
                         in_parallel=True):
    commands = []
    for src, dst in zip(src_videos, dst_videos):
        cmd = cmd_format.format(src, dst)
        commands.append(cmd)

    logging.info("- {} commonds to excute".format(len(commands)))

    if not in_parallel:
        for i, cmd in enumerate(commands):
            # if i % 100 == 0:
            #     logging.info("{} / {}: '{}'".format(i, len(commands), cmd))
            exe_cmd(cmd=cmd)
    else:
        num_jobs = 24
        logging.info("processing videos in parallel, num_jobs={}".format(num_jobs))
        Parallel(n_jobs=num_jobs)(delayed(exe_cmd)(cmd) for cmd in commands)

Source File: _glm_reporter_visual_inspection_suite_.py From nistats with BSD 3-Clause "New" or "Revised" License

6 votes

def prefer_parallel_execution(functions_to_be_called):  # pragma: no cover
    try:
        import joblib
        import multiprocessing
    except ImportError:
        print('Joblib not installed, switching to serial execution')
        [run_function(fn) for fn in functions_to_be_called]
    else:
        try:
            import tqdm
        except ImportError:
            inputs = functions_to_be_called
        else:
            inputs = tqdm.tqdm(functions_to_be_called)
        n_jobs = multiprocessing.cpu_count()
        print('Parallelizing execution using Joblib')
        joblib.Parallel(n_jobs=n_jobs)(
                joblib.delayed(run_function)(fn) for fn in inputs)

Source File: BOSSVSClassifier.py From SFA_Python with GNU General Public License v3.0

6 votes

def fitEnsemble(self, windows, normMean, samples):
        correctTraining = 0
        self.results = []

        self.logger.Log("%s  Fitting for a norm of %s" % (self.NAME, str(normMean)))
        Parallel(n_jobs=1, backend="threading")(delayed(self.fitIndividual, check_pickle=False)(normMean, samples, windows, i) for i in range(len(windows)))

        # Find best correctTraining
        for i in range(len(self.results)):
            if self.results[i].score > correctTraining:
                correctTraining = self.results[i].score

        # Remove Results that are no longer satisfactory
        new_results = []
        self.logger.Log("Stored Models for Norm=%s" % normMean)
        for i in range(len(self.results)):
            if self.results[i].score >= (correctTraining * self.factor):
                self.logger.Log("WindowLength:%s  Features:%s  TrainScore:%s" % (self.results[i].windowLength, self.results[i].features, self.results[i].score))
                new_results.append(self.results[i])

        return new_results

Source File: ShotgunEnsembleClassifier.py From SFA_Python with GNU General Public License v3.0

6 votes

def fitEnsemble(self, normMean, samples, factor):
        minWindowLength = 5
        maxWindowLength = getMax(samples, self.MAX_WINDOW_LENGTH)
        windows = self.getWindowsBetween(minWindowLength, maxWindowLength)
        self.logger.Log("Windows: %s" % windows)

        correctTraining = 0
        self.results = []

        self.logger.Log("%s  Fitting for a norm of %s" % (self.NAME, str(normMean)))
        Parallel(n_jobs=-1, backend="threading")(delayed(self.fitIndividual, check_pickle=False)(normMean, samples, windows, i) for i in range(len(windows)))

        # Find best correctTraining
        for i in range(len(self.results)):
            if self.results[i].correct > correctTraining:
                correctTraining = self.results[i].correct

        # Remove Results that are no longer satisfactory
        new_results = []
        for i in range(len(self.results)):
            if self.results[i].correct >= (correctTraining * factor):
                new_results.append(self.results[i])

        return new_results, correctTraining

Source File: utils.py From DeepLab_v3 with MIT License

6 votes

def next_minibatch(self):

        image_filenames_minibatch = self.image_filenames[self.current_index: self.current_index + self.minibatch_size]
        label_filenames_minibatch = self.label_filenames[self.current_index: self.current_index + self.minibatch_size]
        self.current_index += self.minibatch_size
        if self.current_index >= self.dataset_size:
            self.current_index = 0

        # Multithread image processing
        # Reference: https://www.kaggle.com/inoryy/fast-image-pre-process-in-parallel

        results = Parallel(n_jobs=self.num_jobs)(delayed(self.process_func)(image_filename, label_filename) for image_filename, label_filename in zip(image_filenames_minibatch, label_filenames_minibatch))
        images, labels = zip(*results)

        images = np.asarray(images)
        labels = np.asarray(labels)

        return images, labels

Source File: BOSSEnsembleClassifier.py From SFA_Python with GNU General Public License v3.0

6 votes

def fitEnsemble(self, NormMean, samples):
        correctTraining = 0
        self.results = []
        self.logger.Log("%s  Fitting for a norm of %s" % (self.NAME, str(NormMean)))

        Parallel(n_jobs=1, backend="threading")(delayed(self.fitIndividual, check_pickle=False)(NormMean, samples, i) for i in range(len(self.windows)))

        #Find best correctTraining
        for i in range(len(self.results)):
            if self.results[i].score > correctTraining:
                correctTraining = self.results[i].score

        self.logger.Log("CorrectTrain for a norm of %s" % (correctTraining))
        # Remove Results that are no longer satisfactory
        new_results = []
        self.logger.Log("Stored Models for Norm=%s" % NormMean)
        for i in range(len(self.results)):
            if self.results[i].score >= (correctTraining * self.factor):
                self.logger.Log("WindowLength:%s  Features:%s  TrainScore:%s" % (self.results[i].windowLength, self.results[i].features, self.results[i].score))
                new_results.append(self.results[i])

        return new_results, correctTraining

Source File: preprocessor.py From AutoSmart with GNU General Public License v3.0

6 votes

def fit(self,X):
        def func(ss):
            length = len(ss.unique())
            if length >= len(ss)-10:
                return True
            else:  
                return False
        
        df = X.data
        todo_cols = X.cat_cols
        res = Parallel(n_jobs=CONSTANT.JOBS,require='sharedmem')(delayed(func)(df[col]) for col in todo_cols)
        
        drop_cols = []
        for col,all_diff in zip(todo_cols,res):
            if all_diff:
                drop_cols.append(col)
        
        self.drop_cols = drop_cols

Source File: librispeech.py From End-to-end-ASR-Pytorch with MIT License

6 votes

def __init__(self, path, split, tokenizer, bucket_size, ascending=False):
        # Setup
        self.path = path
        self.bucket_size = bucket_size

        # List all wave files
        file_list = []
        for s in split:
            split_list = list(Path(join(path, s)).rglob("*.flac"))
            assert len(split_list) > 0, "No data found @ {}".format(join(path,s))
            file_list += split_list
        # Read text
        text = Parallel(n_jobs=READ_FILE_THREADS)(
            delayed(read_text)(str(f)) for f in file_list)
        #text = Parallel(n_jobs=-1)(delayed(tokenizer.encode)(txt) for txt in text)
        text = [tokenizer.encode(txt) for txt in text]

        # Sort dataset by text length
        #file_len = Parallel(n_jobs=READ_FILE_THREADS)(delayed(getsize)(f) for f in file_list)
        self.file_list, self.text = zip(*[(f_name, txt)
                                          for f_name, txt in sorted(zip(file_list, text), reverse=not ascending, key=lambda x:len(x[1]))])

Source File: decomposition.py From tridesclous with MIT License

6 votes

def transform(self, waveforms):
        #~ print('ici', waveforms.shape, self.ind_peak)
        features = waveforms[:, self.ind_peak, : ].copy()
        return features



#~ Parallel(n_jobs=n_jobs)(delayed(count_match_spikes)(sorting1.get_unit_spike_train(u1),
                                                                                  #~ s2_spiketrains, delta_frames) for
                                                      #~ i1, u1 in enumerate(unit1_ids))

#~ def get_pca_one_channel(wf_chan, chan, thresh, n_left, n_components_by_channel, params):
    #~ print(chan)
    #~ pca = sklearn.decomposition.IncrementalPCA(n_components=n_components_by_channel, **params)
    #~ wf_chan = waveforms[:,:,chan]
    #~ print(wf_chan.shape)
    #~ print(wf_chan[:, -n_left].shape)
    #~ keep = np.any((wf_chan>thresh) | (wf_chan<-thresh))
    #~ keep = (wf_chan[:, -n_left]>thresh) | (wf_chan[:, -n_left]<-thresh)

    #~ if keep.sum() >=n_components_by_channel:
        #~ pca.fit(wf_chan[keep, :])
        #~ return pca
    #~ else:
        #~ return None

Source File: utils.py From contextualbandits with BSD 2-Clause "Simplified" License

6 votes

def partial_fit(self, X, y, classes=None):
        if self.partial_method == "gamma":
            w_all = -np.log(self
                            .random_state
                            .random(size=(X.shape[0], self.nsamples))
                            .clip(min=1e-12, max=None))
            appear_times = None
            rng = None
        elif self.partial_method == "poisson":
            w_all = None
            appear_times = self.random_state.poisson(1, size = (X.shape[0], self.nsamples))
            rng = np.arange(X.shape[0])
        else:
            raise ValueError(_unexpected_err_msg)
        Parallel(n_jobs=self.njobs, verbose=0, require="sharedmem")\
                (delayed(self._partial_fit_single)\
                    (sample, w_all, appear_times, rng, X, y) \
                        for sample in range(self.nsamples))

Source File: preprocessor.py From AutoSmart with GNU General Public License v3.0

6 votes

def fit(self,X):
        def func(ss):
            length = len(ss.unique())
            if length <= 1:
                return True
            else:
                return False
            
        df = X.data
        todo_cols = X.cat_cols + X.multi_cat_cols + X.num_cols + X.time_cols + X.binary_cols
        res = Parallel(n_jobs=CONSTANT.JOBS,require='sharedmem')(delayed(func)(df[col]) for col in todo_cols)
        
        drop_cols = []
        for col,unique in zip(todo_cols,res):
            if unique:
                drop_cols.append(col)
        
        self.drop_cols = drop_cols

Source File: atlas2.py From ssbio with MIT License

6 votes

def build_strain_specific_models(self, joblib=False, cores=1, force_rerun=False):
        """Wrapper function for _build_strain_specific_model"""
        if len(self.df_orthology_matrix) == 0:
            raise RuntimeError('Empty orthology matrix, please calculate first!')
        ref_functional_genes = [g.id for g in self.reference_gempro.functional_genes]
        log.info('Building strain specific models...')
        if joblib:
            result = DictList(Parallel(n_jobs=cores)(delayed(self._build_strain_specific_model)(s, ref_functional_genes, self.df_orthology_matrix, force_rerun=force_rerun) for s in self.strain_ids))
        # if sc:
        #     strains_rdd = sc.parallelize(self.strain_ids)
        #     result = strains_rdd.map(self._build_strain_specific_model).collect()
        else:
            result = []
            for s in tqdm(self.strain_ids):
                result.append(self._build_strain_specific_model(s, ref_functional_genes, self.df_orthology_matrix, force_rerun=force_rerun))

        for strain_id, gp_noseqs_path in result:
            self.strain_infodict[strain_id]['gp_noseqs_path'] = gp_noseqs_path

Source File: graph.py From AutoSmart with GNU General Public License v3.0

6 votes

def recognize_binary_col(self,data,cat_cols):
        def func(ss):
            ss = ss.unique()
            if len(ss) == 3:
                if pd.isna(ss).sum() == 1:
                    return True
            if len(ss) == 2:
                return True
            return False
        
        binary_cols = []
        
        res = Parallel(n_jobs=CONSTANT.JOBS,require='sharedmem')(delayed(func)(data[col]) for col in cat_cols)
        
        for col,is_binary in zip(cat_cols,res):
            if is_binary:
                binary_cols.append(col)
        
        return binary_cols

Source File: optim_par_L2M2019Ctrl_2D.py From osim-rl with MIT License

5 votes

def f(self, v_params):
        self.n_gen += 1
        timeout_error = True
        error_count = 0
        while timeout_error:
            try:
                v_total_reward = Parallel(n_jobs=N_PROC, timeout=TIMEOUT)\
                (delayed(f_ind)(self.n_gen, i, p) for i, p in enumerate(v_params))
                timeout_error = False
            except Exception as e_msg:
                error_count += 1
                print('\ntimeout error (x{})!!!'.format(error_count))
                #print(e_msg)

        for total_reward in v_total_reward:
            if self.best_total_reward  < total_reward:
                filename = "./optim_data/cma/" + trial_name + "best_w.txt"
                print("\n")
                print("----")
                print("update the best score!!!!")
                print("\tprev = %.8f" % self.best_total_reward )
                print("\tcurr = %.8f" % total_reward)
                print("\tsave to [%s]" % filename)
                print("----")
                print("")
                self.best_total_reward  = total_reward
                np.savetxt(filename, params)

        return [-r for r in v_total_reward]

Source File: aligning-docs-by-interlinks-demo2.py From comparable-text-miner with Apache License 2.0

5 votes

def main(argv):
	source_corpus_file = sys.argv[1]
	target_corpus_file = sys.argv[2]
	source_language = sys.argv[3]
	target_language = sys.argv[4]
	output_path = sys.argv[5]
	
	if not output_path.endswith('/'): output_path = output_path + '/'
	tp.check_dir(output_path) # if directory does not exist, then create
	
	logging.info( 'aligning %s and %s wikipeida documents using interlanguage links',  source_language, target_language)
	source_docs = tp.split_wikipedia_docs_into_array(source_corpus_file)
	logging.info( 'source corpus is loaded')
	target_docs = tp.split_wikipedia_docs_into_array(target_corpus_file)
	logging.info( 'target corpus is loaded ... start aligning ...')
	
	aligned_corpus = Parallel(n_jobs=3,verbose=100)(delayed(tp.aligning_doc_by_interlanguage_links)(d, target_docs, source_language, target_language, output_path) for d in source_docs)
	
	
	source_out = open(output_path +  source_language + '.wiki.txt', 'w') 
	target_out = open(output_path +  target_language + '.wiki.txt', 'w')
	
	for doc_pair in aligned_corpus:
		if doc_pair[0]: # if not None 
			text_out = doc_pair[0]
			print>>source_out, text_out.encode('utf-8')
			text_out = doc_pair[1]
			print>>target_out, text_out.encode('utf-8')
	
	

##################################################################

Source File: dataset.py From stable-baselines with MIT License

5 votes

def _run(self):
        start = True
        with Parallel(n_jobs=self.n_workers, batch_size="auto", backend=self.backend) as parallel:
            while start or self.infinite_loop:
                start = False

                if self.shuffle:
                    np.random.shuffle(self.indices)

                for minibatch_idx in range(self.n_minibatches):

                    self.start_idx = minibatch_idx * self.batch_size

                    obs = self.observations[self._minibatch_indices]
                    if self.load_images:
                        if self.n_workers <= 1:
                            obs = [self._make_batch_element(image_path)
                                   for image_path in obs]

                        else:
                            obs = parallel(delayed(self._make_batch_element)(image_path)
                                           for image_path in obs)

                        obs = np.concatenate(obs, axis=0)

                    actions = self.actions[self._minibatch_indices]

                    self.queue.put((obs, actions))

                    # Free memory
                    del obs

                self.queue.put(None)

Source File: main.py From lighter with MIT License

5 votes

def parse_services(filenames, canaryGroup=None, profiles=[]):
    # return [parse_service(filename) for filename in filenames]
    return Parallel(n_jobs=8, backend="threading")(delayed(parse_service)(filename, canaryGroup, profiles) for filename in filenames) if filenames else []

Source File: preprocessor.py From AutoSmart with GNU General Public License v3.0

5 votes

def transform(self,X):
        
        def func(ss,cats):
            codes = pd.Categorical(ss,categories=cats).codes
            codes = codes.astype('float16')
            codes[codes==-1] = np.nan
            
            return codes
        
        df = X.data
        todo_cols = X.binary_cols
        res = Parallel(n_jobs=CONSTANT.JOBS,require='sharedmem')(delayed(func)(df[col],self.col2cats[col]) for col in todo_cols)
        for col,codes in zip(todo_cols,res):
            df[col] = codes

Source File: preprocessor.py From AutoSmart with GNU General Public License v3.0

5 votes

def fit(self,X):
        def func(ss):
            cats = pd.Categorical(ss).categories 
            return cats
        
        df = X.data
        todo_cols = X.binary_cols
        
        res = Parallel(n_jobs=CONSTANT.JOBS,require='sharedmem')(delayed(func)(df[col]) for col in todo_cols)
        for col,cats in zip(todo_cols,res):
            self.col2cats[col] = cats

Source File: mtag.py From mtag with GNU General Public License v3.0

5 votes

def ss_estimation(args, betas, se, max_iter=1000, tol=1.0e-10,
                  starting_params =(0.5, 1.0e-3),
                  callback=False):
    '''
    Numerically fit the distribution of betas and standard errors to a spike slab distribution.

    Arguments:
    ----------
    betas: The Mx1 vector of betas
    se:    The Mx1 vector of standard errors. Must allign with the reported betas.
    max_iter: int,
            Maximum number of iterations
    tol:    float,
            Tolerance used in numerical optimization (for both fatol, xatol)

    starting_params: 2-tuple: (pi_0, tau_0)
            Starting parameters for optimization. Default is 0.5, 1.0e-3
    callback:       boolean ,default False
            If True, the parameters values will be printed at each step of optimization.
    '''
    M,T = betas.shape


    solver_opts = dict()
    solver_opts['maxiter'] = max_iter
    solver_opts['fatol'] = tol
    solver_opts['xatol'] = tol
    solver_opts['disp'] = True
    callback = cback_print if callback else None
    arg_list_ss = [(betas[:,t], se[:,t], starting_params, solver_opts) for t in range(T)]
    ss_results =  joblib.Parallel(n_jobs = args.cores,
                                          backend='multiprocessing',
                                          verbose=0,
                                          batch_size=1)(joblib.delayed(_optim_ss)(f_args) for f_args in arg_list_ss)
    return ss_results

Source File: WEASEL.py From SFA_Python with GNU General Public License v3.0

5 votes

def createWORDS(self, samples, data = 'Train'):
        self.words = [None for _ in range(len(self.windowLengths))]
        Parallel(n_jobs=1, backend="threading")(delayed(self.createWords, check_pickle=False)(samples, w, data) for w in range(len(self.windowLengths)))
        return self.words

Source File: atlas2.py From ssbio with MIT License

5 votes

def load_sequences_to_strains(self, joblib=False, cores=1, force_rerun=False):
        """Wrapper function for _load_sequences_to_strain"""
        log.info('Loading sequences to strain GEM-PROs...')
        if joblib:
            result = DictList(Parallel(n_jobs=cores)(delayed(self._load_sequences_to_strain)(s, force_rerun) for s in self.strain_ids))
        else:
            result = []
            for s in tqdm(self.strain_ids):
                result.append(self._load_sequences_to_strain(s, force_rerun))

        for strain_id, gp_seqs_path in result:
            self.strain_infodict[strain_id]['gp_seqs_path'] = gp_seqs_path

Source File: BOSSEnsembleClassifier.py From SFA_Python with GNU General Public License v3.0

5 votes

def predict(self, models, samples, testing=False):
        uniqueLabels = np.unique(samples["Labels"])
        self.Label_Matrix = [[None for _ in range(len(models))] for _ in range(samples['Samples'])]
        predictedLabels = [None for _ in range(samples['Samples'])]

        Parallel(n_jobs=1, backend="threading")(delayed(self.predictIndividual, check_pickle=False)(models, samples, testing, i) for i in range(len(models)))

        maxCounts = [None for _ in range(samples['Samples'])]
        for i in range(len(self.Label_Matrix)):
            counts = {l:0 for l in uniqueLabels}
            for k in self.Label_Matrix[i]:
                if (k != None) and (list(k.keys())[0] != None):
                    label = list(k.keys())[0]
                    count = counts[label] if label in counts.keys() else 0
                    increment = list(k.values())[0] if self.ENSEMBLE_WEIGHTS else 1
                    count = increment if (count == None) else count + increment
                    counts[label] = count

            maxCount = -1
            for e in uniqueLabels: # counts.keys():
                if (predictedLabels[i] == None) or (maxCount < counts[e]) or (maxCount == counts[e]) and (predictedLabels[i] <= e):
                    maxCount = counts[e]
                    predictedLabels[i] = e

        correctTesting = 0
        for i in range(samples["Samples"]):
            correctTesting += 1 if samples[i].label == predictedLabels[i] else 0

        return Predictions(correctTesting, predictedLabels)

Source File: Preprocessing.py From training_results_v0.6 with Apache License 2.0

5 votes

def write_data_csv(fname, frames, preproc):
   """Write data to csv file"""
   fdata = open(fname, "w")
   dr = Parallel()(delayed(get_data)(lst,preproc) for lst in frames)
   data,result = zip(*dr)
   for entry in data:
      fdata.write(','.join(entry)+'\r\n')
   print("All finished, %d slices in total" % len(data))
   fdata.close()
   result = np.ravel(result)
   return result

Source File: data_loader.py From srl-zoo with MIT License

5 votes

def _run(self):
        start = True
        with Parallel(n_jobs=self.n_workers, batch_size="auto", backend="threading") as parallel:
            while start or self.infinite_loop:
                start = False
                if self.shuffle:
                    indices = np.random.permutation(self.n_minibatches).astype(np.int64)
                else:
                    indices = np.arange(len(self.minibatchlist), dtype=np.int64)

                for minibatch_idx in indices:
                    images = self.images_path[self.minibatchlist[minibatch_idx]]

                    if self.n_workers <= 1:
                        batch = [self._makeBatchElement(image_path) for image_path in images]
                    else:
                        batch = parallel(delayed(self._makeBatchElement)(image_path) for image_path in images)

                    batch = th.cat(batch, dim=0)

                    if self.no_targets:
                        self.queue.put(batch)
                    else:
                        # th.tensor creates a copy
                        self.queue.put((batch, th.tensor(self.targets[minibatch_idx])))

                    # Free memory
                    del batch

                self.queue.put(None)

Python joblib.delayed() Examples