Python tqdm.tqdm.pandas() Examples
The following are 19
code examples of tqdm.tqdm.pandas().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tqdm.tqdm
, or try the search function
.
Example #1
Source File: tests_pandas.py From Tautulli with GNU General Public License v3.0 | 6 votes |
def test_pandas_apply_args_deprecation(): """Test warning info in `pandas.Dataframe(Series).progress_apply(func, *args)`""" try: from numpy.random import randint from tqdm import tqdm_pandas import pandas as pd except ImportError: raise SkipTest with closing(StringIO()) as our_file: tqdm_pandas(tqdm(file=our_file, leave=False, ascii=True, ncols=20)) df = pd.DataFrame(randint(0, 50, (500, 3))) df.progress_apply(lambda x: None, 1) # 1 shall cause a warning # Check deprecation message res = our_file.getvalue() assert all([i in res for i in ( "TqdmDeprecationWarning", "not supported", "keyword arguments instead")])
Example #2
Source File: tests_pandas.py From Tautulli with GNU General Public License v3.0 | 6 votes |
def test_pandas_leave(): """Test pandas with `leave=True`""" try: from numpy.random import randint import pandas as pd except ImportError: raise SkipTest with closing(StringIO()) as our_file: df = pd.DataFrame(randint(0, 100, (1000, 6))) tqdm.pandas(file=our_file, leave=True, ascii=True) df.groupby(0).progress_apply(lambda x: None) our_file.seek(0) exres = '100%|##########| 100/100' if exres not in our_file.read(): our_file.seek(0) raise AssertionError( "\nExpected:\n{0}\nIn:{1}\n".format(exres, our_file.read()))
Example #3
Source File: bert.py From nyaggle with MIT License | 6 votes |
def _process(self, X: pd.DataFrame, func: Callable[[str, np.ndarray], Any]): is_pandas = isinstance(X, pd.DataFrame) X = convert_input(X) tqdm.pandas() columns = self.text_columns or [c for c in X.columns if X[c].dtype == np.object] non_text_columns = [c for c in X.columns if c not in columns] column_names = [] processed = [] for c in columns: emb = np.vstack(X[c].progress_apply(lambda x: self._process_text(x))) emb = func(c, emb) processed.append(emb) column_names += [self.column_format.format(col=c, idx=i) for i in range(emb.shape[1])] processed_df = pd.DataFrame(np.hstack(processed), columns=column_names) if non_text_columns: X_ = X[non_text_columns].copy() X_ = pd.concat([X_, processed_df], axis=1) else: X_ = processed_df return X_ if self.return_same_type and is_pandas else X_.values
Example #4
Source File: loader.py From fine-grained-sentiment with MIT License | 6 votes |
def create_dataloader(self, df: pd.DataFrame, batch_size: int = 32, shuffle: bool = False, valid_pct: float = None): "Process rows in pd.DataFrame using n_cpus and return a DataLoader" tqdm.pandas() with ProcessPoolExecutor(max_workers=n_cpu) as executor: result = list( tqdm(executor.map(self.process_row, df.iterrows(), chunksize=8192), desc=f"Processing {len(df)} examples on {n_cpu} cores", total=len(df))) features = [r[0] for r in result] labels = [r[1] for r in result] dataset = TensorDataset(torch.tensor(features, dtype=torch.long), torch.tensor(labels, dtype=torch.long)) if valid_pct is not None: valid_size = int(valid_pct * len(df)) train_size = len(df) - valid_size valid_dataset, train_dataset = random_split(dataset, [valid_size, train_size]) valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) return train_loader, valid_loader data_loader = DataLoader(dataset, batch_size=batch_size, num_workers=0, shuffle=shuffle, pin_memory=torch.cuda.is_available()) return data_loader
Example #5
Source File: data_pack.py From MatchZoo-py with Apache License 2.0 | 5 votes |
def frame(self) -> 'DataPack.FrameView': """ View the data pack as a :class:`pandas.DataFrame`. Returned data frame is created by merging the left data frame, the right dataframe and the relation data frame. Use `[]` to access an item or a slice of items. :return: A :class:`matchzoo.DataPack.FrameView` instance. Example: >>> import matchzoo as mz >>> data_pack = mz.datasets.toy.load_data() >>> type(data_pack.frame) <class 'matchzoo.data_pack.data_pack.DataPack.FrameView'> >>> frame_slice = data_pack.frame[0:5] >>> type(frame_slice) <class 'pandas.core.frame.DataFrame'> >>> list(frame_slice.columns) ['id_left', 'text_left', 'id_right', 'text_right', 'label'] >>> full_frame = data_pack.frame() >>> len(full_frame) == len(data_pack) True """ return DataPack.FrameView(self)
Example #6
Source File: tests_pandas.py From Tautulli with GNU General Public License v3.0 | 5 votes |
def test_pandas_data_frame(): """Test pandas.DataFrame.progress_apply and .progress_applymap""" try: from numpy.random import randint import pandas as pd except ImportError: raise SkipTest with closing(StringIO()) as our_file: tqdm.pandas(file=our_file, leave=True, ascii=True) df = pd.DataFrame(randint(0, 50, (100, 200))) def task_func(x): return x + 1 # applymap res1 = df.progress_applymap(task_func) res2 = df.applymap(task_func) assert res1.equals(res2) # apply for axis in [0, 1]: res3 = df.progress_apply(task_func, axis=axis) res4 = df.apply(task_func, axis=axis) assert res3.equals(res4) our_file.seek(0) if our_file.read().count('100%') < 3: our_file.seek(0) raise AssertionError("\nExpected:\n{0}\nIn:\n{1}\n".format( '100% at least three times', our_file.read())) # apply_map, apply axis=0, apply axis=1 expects = ['20000/20000', '200/200', '100/100'] for exres in expects: our_file.seek(0) if our_file.getvalue().count(exres) < 1: our_file.seek(0) raise AssertionError( "\nExpected:\n{0}\nIn:\n {1}\n".format( exres + " at least once.", our_file.read()))
Example #7
Source File: tests_pandas.py From Tautulli with GNU General Public License v3.0 | 5 votes |
def test_pandas_series(): """Test pandas.Series.progress_apply and .progress_map""" try: from numpy.random import randint import pandas as pd except ImportError: raise SkipTest with closing(StringIO()) as our_file: tqdm.pandas(file=our_file, leave=True, ascii=True) series = pd.Series(randint(0, 50, (123,))) res1 = series.progress_apply(lambda x: x + 10) res2 = series.apply(lambda x: x + 10) assert res1.equals(res2) res3 = series.progress_map(lambda x: x + 10) res4 = series.map(lambda x: x + 10) assert res3.equals(res4) expects = ['100%', '123/123'] for exres in expects: our_file.seek(0) if our_file.getvalue().count(exres) < 2: our_file.seek(0) raise AssertionError( "\nExpected:\n{0}\nIn:\n{1}\n".format( exres + " at least twice.", our_file.read()))
Example #8
Source File: classifiers.py From fine-grained-sentiment with MIT License | 5 votes |
def predict(self, train_file: None, test_file: str, lower_case: bool) -> pd.DataFrame: "Use tqdm to display model prediction status bar" # pip install tqdm from tqdm import tqdm tqdm.pandas() df = self.read_data(test_file, lower_case) df['pred'] = df['text'].progress_apply(self.score) return df
Example #9
Source File: classifiers.py From fine-grained-sentiment with MIT License | 5 votes |
def predict(self, train_file: None, test_file: str, lower_case: bool) -> pd.DataFrame: "Use tqdm to display model prediction status bar" # pip install tqdm from tqdm import tqdm tqdm.pandas() df = self.read_data(test_file, lower_case) df['pred'] = df['text'].progress_apply(self.score) return df
Example #10
Source File: data_pack.py From MatchZoo with Apache License 2.0 | 5 votes |
def _apply_on_text_left(self, func, rename, verbose=1): name = rename or 'text_left' if verbose: tqdm.pandas(desc="Processing " + name + " with " + func.__name__) self._left[name] = self._left['text_left'].progress_apply(func) else: self._left[name] = self._left['text_left'].apply(func)
Example #11
Source File: data_pack.py From MatchZoo with Apache License 2.0 | 5 votes |
def _apply_on_text_right(self, func, rename, verbose=1): name = rename or 'text_right' if verbose: tqdm.pandas(desc="Processing " + name + " with " + func.__name__) self._right[name] = self._right['text_right'].progress_apply(func) else: self._right[name] = self._right['text_right'].apply(func)
Example #12
Source File: data_pack.py From MatchZoo with Apache License 2.0 | 5 votes |
def frame(self) -> 'DataPack.FrameView': """ View the data pack as a :class:`pandas.DataFrame`. Returned data frame is created by merging the left data frame, the right dataframe and the relation data frame. Use `[]` to access an item or a slice of items. :return: A :class:`matchzoo.DataPack.FrameView` instance. Example: >>> import matchzoo as mz >>> data_pack = mz.datasets.toy.load_data() >>> type(data_pack.frame) <class 'matchzoo.data_pack.data_pack.DataPack.FrameView'> >>> frame_slice = data_pack.frame[0:5] >>> type(frame_slice) <class 'pandas.core.frame.DataFrame'> >>> list(frame_slice.columns) ['id_left', 'text_left', 'id_right', 'text_right', 'label'] >>> full_frame = data_pack.frame() >>> len(full_frame) == len(data_pack) True """ return DataPack.FrameView(self)
Example #13
Source File: pandas.py From snorkel with Apache License 2.0 | 5 votes |
def apply( self, df: pd.DataFrame, progress_bar: bool = True, fault_tolerant: bool = False, return_meta: bool = False, ) -> Union[np.ndarray, Tuple[np.ndarray, ApplierMetadata]]: """Label Pandas DataFrame of data points with LFs. Parameters ---------- df Pandas DataFrame containing data points to be labeled by LFs progress_bar Display a progress bar? fault_tolerant Output ``-1`` if LF execution fails? return_meta Return metadata from apply call? Returns ------- np.ndarray Matrix of labels emitted by LFs ApplierMetadata Metadata, such as fault counts, for the apply call """ f_caller = _FunctionCaller(fault_tolerant) apply_fn = partial(apply_lfs_to_data_point, lfs=self._lfs, f_caller=f_caller) call_fn = df.apply if progress_bar: tqdm.pandas() call_fn = df.progress_apply labels = call_fn(apply_fn, axis=1) labels_with_index = rows_to_triplets(labels) L = self._numpy_from_row_data(labels_with_index) if return_meta: return L, ApplierMetadata(f_caller.fault_counts) return L
Example #14
Source File: data_pack.py From MatchZoo-py with Apache License 2.0 | 5 votes |
def _apply_on_text_left(self, func, rename, verbose=1): name = rename or 'text_left' if verbose: tqdm.pandas(desc="Processing " + name + " with " + func.__name__) self._left[name] = self._left['text_left'].progress_apply(func) else: self._left[name] = self._left['text_left'].apply(func)
Example #15
Source File: data_pack.py From MatchZoo-py with Apache License 2.0 | 5 votes |
def _apply_on_text_right(self, func, rename, verbose=1): name = rename or 'text_right' if verbose: tqdm.pandas(desc="Processing " + name + " with " + func.__name__) self._right[name] = self._right['text_right'].progress_apply(func) else: self._right[name] = self._right['text_right'].apply(func)
Example #16
Source File: preprocess_aclImdb_v1.py From lambda-deep-learning-demo with Apache License 2.0 | 5 votes |
def process_csv(args, split_name): raw_csv = os.path.join(args.output_dir, split_name + "_raw.csv") clean_csv = os.path.join(args.output_dir, split_name + ".csv") data, labels = load_dataset(os.path.join(args.input_dir, split_name)) # save as csv file, seperated by tab if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) with open(raw_csv, 'w') as f: writer = csv.writer(f, delimiter='\t') for sentence, label in zip(data, labels): writer.writerow([sentence, label]) data = ingest_data(raw_csv) tqdm.pandas(desc="progress-bar") data = post_process(data, args.remove_punctuation) data.to_csv(clean_csv, sep='\t', header=False, index=False)
Example #17
Source File: utils.py From DALEX with GNU General Public License v3.0 | 4 votes |
def aggregate_profiles(all_profiles, type, groups, intercept, span): if type == 'partial': aggregated_profiles = \ all_profiles.groupby(['_vname_', '_label_', '_x_'] + groups)['_yhat_'].mean().reset_index() else: # split all_profiles into groups tqdm.pandas(desc='Calculating accumulated dependency') if type == 'accumulated' else tqdm.pandas( desc="Calculating conditional dependency") aggregated_profiles = \ all_profiles. \ loc[:, ["_vname_", "_label_", "_x_", "_yhat_", "_ids_", "_original_"] + groups]. \ groupby(['_vname_', '_label_']). \ progress_apply(lambda split_profile: split_over_variables_and_labels(split_profile, type, groups, span)) aggregated_profiles.loc[:, '_ids_'] = 0 if type == 'partial': if not intercept: aggregated_profiles.loc[:, '_yhat_'] = aggregated_profiles.loc[:, '_yhat_'] - all_profiles[ '_yhat_'].mean() aggregated_profiles = aggregated_profiles elif type == 'conditional': if not intercept: aggregated_profiles.loc[:, '_yhat_'] = aggregated_profiles.loc[:, '_yhat_'] - all_profiles[ '_yhat_'].mean() aggregated_profiles = aggregated_profiles.reset_index().rename(columns={'level_2': '_grid_'}) else: if intercept: aggregated_profiles.loc[:, '_yhat_'] = aggregated_profiles.loc[:, '_yhat_'] + all_profiles[ '_yhat_'].mean() aggregated_profiles = aggregated_profiles.reset_index().rename(columns={'level_2': '_grid_'}) # postprocessing if len(groups) != 0: aggregated_profiles['_groups_'] = aggregated_profiles.loc[:, groups].apply(lambda row: '_'.join(row), axis=1) aggregated_profiles.drop(columns=groups) aggregated_profiles.loc[:, '_label_'] = \ aggregated_profiles.loc[:, ['_label_', '_groups_']].apply(lambda row: '_'.join(row), axis=1) return aggregated_profiles
Example #18
Source File: utils.py From DALEX with GNU General Public License v3.0 | 4 votes |
def split_over_variables_and_labels(split_profile, type, groups, span): """ Inner function that calculates actual conditional profiles for one variable only. Iterated over each variable and group. :param split_profile: pandas.DataFrame, one group of the dataset (with only one variable) :param groups: str, name of grouping variable :return: pd.DataFrame, dataframe with calculated conditional profile for only one variable """ if split_profile.shape[0] == 0: return None if pd.api.types.is_numeric_dtype(split_profile['_x_']): # for continuous variables we will calculate weighted average # where weights come from gaussian kernel and distance between points # scaling factor, range if the range i > 0 split_profile['_original_'] = split_profile['_original_'].astype('float') range_x = split_profile['_x_'].max() - split_profile['_x_'].min() if range_x == 0: range_x = 1 # scalled differences diffs = (split_profile['_original_'] - split_profile['_x_']) / range_x split_profile['_w_'] = norm(diffs, 0, span) else: # for categorical variables we will calculate weighted average # but weights are 0-1, 1 if it's the same level and 0 otherwise split_profile['_w_'] = split_profile['_original_'] == split_profile['_x_'] if type == 'accumulated': # diffs split_profile['_yhat_'] = split_profile. \ groupby('_ids_')['_yhat_']. \ transform(lambda column: column.diff()) # diff causes NaNs at the beginning of each group split_profile.loc[np.isnan(split_profile['_yhat_']), '_yhat_'] = 0 par_profile = split_profile.groupby(['_x_'] + groups). \ apply(lambda point: (point['_yhat_'] * point['_w_']).sum() / point['_w_'].sum() \ if point['_w_'].sum() != 0 else 0) par_profile.name = '_yhat_' par_profile = par_profile.reset_index() if type == 'accumulated': if len(groups) == 0: par_profile['_yhat_'] = par_profile['_yhat_'].cumsum() else: par_profile['_yhat_'] = par_profile.groupby(groups)['_yhat_'].transform( lambda column: column.cumsum()) return par_profile
Example #19
Source File: tests_pandas.py From Tautulli with GNU General Public License v3.0 | 4 votes |
def test_pandas_groupby_apply(): """Test pandas.DataFrame.groupby(...).progress_apply""" try: from numpy.random import randint import pandas as pd except ImportError: raise SkipTest with closing(StringIO()) as our_file: tqdm.pandas(file=our_file, leave=False, ascii=True) df = pd.DataFrame(randint(0, 50, (500, 3))) df.groupby(0).progress_apply(lambda x: None) dfs = pd.DataFrame(randint(0, 50, (500, 3)), columns=list('abc')) dfs.groupby(['a']).progress_apply(lambda x: None) our_file.seek(0) # don't expect final output since no `leave` and # high dynamic `miniters` nexres = '100%|##########|' if nexres in our_file.read(): our_file.seek(0) raise AssertionError("\nDid not expect:\n{0}\nIn:{1}\n".format( nexres, our_file.read())) with closing(StringIO()) as our_file: tqdm.pandas(file=our_file, leave=True, ascii=True) dfs = pd.DataFrame(randint(0, 50, (500, 3)), columns=list('abc')) dfs.loc[0] = [2, 1, 1] dfs['d'] = 100 expects = ['500/500', '1/1', '4/4', '2/2'] dfs.groupby(dfs.index).progress_apply(lambda x: None) dfs.groupby('d').progress_apply(lambda x: None) dfs.groupby(dfs.columns, axis=1).progress_apply(lambda x: None) dfs.groupby([2, 2, 1, 1], axis=1).progress_apply(lambda x: None) our_file.seek(0) if our_file.read().count('100%') < 4: our_file.seek(0) raise AssertionError("\nExpected:\n{0}\nIn:\n{1}\n".format( '100% at least four times', our_file.read())) for exres in expects: our_file.seek(0) if our_file.getvalue().count(exres) < 1: our_file.seek(0) raise AssertionError( "\nExpected:\n{0}\nIn:\n {1}\n".format( exres + " at least once.", our_file.read()))