Python pandas.read_csv() Examples
The following are 30
code examples of pandas.read_csv().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: NLP.py From Financial-NLP with Apache License 2.0 | 9 votes |
def load_label(self): """ load label dictionary into the object. the format must be like this: 积极,消极 p1,n1 p2,n2 ...,... pk,nk """ table=pd.read_csv(self.label_file) pos=table.loc[:,'积极'].tolist() neg=table.loc[:,'消极'].tolist() self.Label_index=pos+neg self.Label_dict=dict(zip(pos,[1]*len(pos))) self.Label_dict.update(dict(zip(neg,[-1]*len(neg))))
Example #2
Source File: atomic.py From comet-commonsense with Apache License 2.0 | 8 votes |
def load_data(self, path): if ".pickle" in path: print("Loading data from: {}".format(path)) data_utils.load_existing_data_loader(self, path) return True for split in self.data: file_name = "v4_atomic_{}.csv".format(map_name(split)) df = pandas.read_csv("{}/{}".format(path, file_name), index_col=0) df.iloc[:, :9] = df.iloc[:, :9].apply( lambda col: col.apply(json.loads)) for cat in self.categories: attr = df[cat] self.data[split]["total"] += utils.zipped_flatten(zip( attr.index, ["<{}>".format(cat)] * len(attr), attr.values)) if do_take_partial_dataset(self.opt.data): self.data["train"]["total"] = select_partial_dataset( self.opt.data, self.data["train"]["total"]) return False
Example #3
Source File: monitor.py From lirpg with MIT License | 7 votes |
def test_monitor(): env = gym.make("CartPole-v1") env.seed(0) mon_file = "/tmp/baselines-test-%s.monitor.csv" % uuid.uuid4() menv = Monitor(env, mon_file) menv.reset() for _ in range(1000): _, _, done, _ = menv.step(0) if done: menv.reset() f = open(mon_file, 'rt') firstline = f.readline() assert firstline.startswith('#') metadata = json.loads(firstline[1:]) assert metadata['env_id'] == "CartPole-v1" assert set(metadata.keys()) == {'env_id', 'gym_version', 't_start'}, "Incorrect keys in monitor metadata" last_logline = pandas.read_csv(f, index_col=None) assert set(last_logline.keys()) == {'l', 't', 'r'}, "Incorrect keys in monitor logline" f.close() os.remove(mon_file)
Example #4
Source File: monitor.py From HardRLWithYoutube with MIT License | 6 votes |
def test_monitor(): env = gym.make("CartPole-v1") env.seed(0) mon_file = "/tmp/baselines-test-%s.monitor.csv" % uuid.uuid4() menv = Monitor(env, mon_file) menv.reset() for _ in range(1000): _, _, done, _ = menv.step(0) if done: menv.reset() f = open(mon_file, 'rt') firstline = f.readline() assert firstline.startswith('#') metadata = json.loads(firstline[1:]) assert metadata['env_id'] == "CartPole-v1" assert set(metadata.keys()) == {'env_id', 'gym_version', 't_start'}, "Incorrect keys in monitor metadata" last_logline = pandas.read_csv(f, index_col=None) assert set(last_logline.keys()) == {'l', 't', 'r'}, "Incorrect keys in monitor logline" f.close() os.remove(mon_file)
Example #5
Source File: DataReader.py From tensorflow-DeepFM with MIT License | 6 votes |
def gen_feat_dict(self): if self.dfTrain is None: dfTrain = pd.read_csv(self.trainfile) else: dfTrain = self.dfTrain if self.dfTest is None: dfTest = pd.read_csv(self.testfile) else: dfTest = self.dfTest df = pd.concat([dfTrain, dfTest]) self.feat_dict = {} tc = 0 for col in df.columns: if col in self.ignore_cols: continue if col in self.numeric_cols: # map to a single index self.feat_dict[col] = tc tc += 1 else: us = df[col].unique() self.feat_dict[col] = dict(zip(us, range(tc, len(us)+tc))) tc += len(us) self.feat_dim = tc
Example #6
Source File: main.py From tensorflow-DeepFM with MIT License | 6 votes |
def _load_data(): dfTrain = pd.read_csv(config.TRAIN_FILE) dfTest = pd.read_csv(config.TEST_FILE) def preprocess(df): cols = [c for c in df.columns if c not in ["id", "target"]] df["missing_feat"] = np.sum((df[cols] == -1).values, axis=1) df["ps_car_13_x_ps_reg_03"] = df["ps_car_13"] * df["ps_reg_03"] return df dfTrain = preprocess(dfTrain) dfTest = preprocess(dfTest) cols = [c for c in dfTrain.columns if c not in ["id", "target"]] cols = [c for c in cols if (not c in config.IGNORE_COLS)] X_train = dfTrain[cols].values y_train = dfTrain["target"].values X_test = dfTest[cols].values ids_test = dfTest["id"].values cat_features_indices = [i for i,c in enumerate(cols) if c in config.CATEGORICAL_COLS] return dfTrain, dfTest, X_train, y_train, X_test, ids_test, cat_features_indices
Example #7
Source File: nistats.py From NiBetaSeries with MIT License | 6 votes |
def _lsa_events_converter(events_file): """Make a model where each trial has its own regressor using least squares all (LSA) Parameters ---------- events_file : str File that contains all events from the bold run Yields ------ events : DataFrame A DataFrame in which each trial has its own trial_type """ import pandas as pd events = pd.read_csv(events_file, sep='\t') events['original_trial_type'] = events['trial_type'] for cond, cond_df in events.groupby('trial_type'): cond_idx = cond_df.index for i_trial, trial_idx in enumerate(cond_idx): trial_name = '{0}_{1:04d}'.format(cond, i_trial+1) events.loc[trial_idx, 'trial_type'] = trial_name return events
Example #8
Source File: test_nistats.py From NiBetaSeries with MIT License | 6 votes |
def test_select_confounds_error(confounds_file, tmp_path): import pandas as pd import numpy as np confounds_df = pd.read_csv(str(confounds_file), sep='\t', na_values='n/a') confounds_df['white_matter'][0] = np.nan conf_file = tmp_path / "confounds.tsv" confounds_df.to_csv(str(conf_file), index=False, sep='\t', na_rep='n/a') with pytest.raises(ValueError) as val_err: _select_confounds(str(conf_file), ['white_matter', 'csf']) assert "The selected confounds contain nans" in str(val_err.value)
Example #9
Source File: test_nistats.py From NiBetaSeries with MIT License | 6 votes |
def test_select_confounds(confounds_file, selected_confounds, nan_confounds, expanded_confounds): import pandas as pd import numpy as np confounds_df = pd.read_csv(str(confounds_file), sep='\t', na_values='n/a') res_df = _select_confounds(str(confounds_file), selected_confounds) # check if the correct columns are selected assert set(expanded_confounds) == set(res_df.columns) # check if nans are being imputed when expected if nan_confounds: for nan_c in nan_confounds: vals = confounds_df[nan_c].values expected_result = np.nanmean(vals[vals != 0]) assert res_df[nan_c][0] == expected_result
Example #10
Source File: core.py From neuropythy with GNU Affero General Public License v3.0 | 6 votes |
def load_LUT(filename, to='data'): ''' load_LUT(filename) loads the given filename as a FreeSurfer LUT. The optional argument to (default: 'data') specifies how the LUT should be interpreted; it can be any of the following: * 'data' specifies that a dataframe should be returned. ''' from neuropythy.util import to_dataframe import pandas # start by slurping in the text: dat = pandas.read_csv(filename, comment='#', sep='\s+', names=['id', 'name', 'r','g','b','a']) # if all the alpha values are 0, we set them to 1 (not sure why freesurfer does this) dat['a'] = 255 - dat['a'] if pimms.is_str(to): to = to.lower() if to is None: return dat elif to == 'data': df = to_dataframe({'id': dat['id'].values, 'name': dat['name'].values}) df['color'] = dat.apply(lambda r: [r[k]/255.0 for k in ['r','g','b','a']], axis=1) df.set_index('id', inplace=True) return df else: raise ValueError('Unknown to argument: %s' % to) # A function to load in default data from the freesurfer home: e.g., the default LUTs
Example #11
Source File: bidirectional_lstm_autoencoder.py From keras-anomaly-detection with MIT License | 6 votes |
def main(): data_dir_path = './data' model_dir_path = './models' ecg_data = pd.read_csv(data_dir_path + '/ecg_discord_test.csv', header=None) print(ecg_data.head()) ecg_np_data = ecg_data.as_matrix() scaler = MinMaxScaler() ecg_np_data = scaler.fit_transform(ecg_np_data) print(ecg_np_data.shape) ae = BidirectionalLstmAutoEncoder() # fit the data and save model into model_dir_path if DO_TRAINING: ae.fit(ecg_np_data[:23, :], model_dir_path=model_dir_path, estimated_negative_sample_ratio=0.9) # load back the model saved in model_dir_path detect anomaly ae.load_model(model_dir_path) anomaly_information = ae.anomaly(ecg_np_data[:23, :]) reconstruction_error = [] for idx, (is_anomaly, dist) in enumerate(anomaly_information): print('# ' + str(idx) + ' is ' + ('abnormal' if is_anomaly else 'normal') + ' (dist: ' + str(dist) + ')') reconstruction_error.append(dist) visualize_reconstruction_error(reconstruction_error, ae.threshold)
Example #12
Source File: cnn_lstm_autoencoder.py From keras-anomaly-detection with MIT License | 6 votes |
def main(): data_dir_path = './data' model_dir_path = './models' ecg_data = pd.read_csv(data_dir_path + '/ecg_discord_test.csv', header=None) print(ecg_data.head()) ecg_np_data = ecg_data.as_matrix() scaler = MinMaxScaler() ecg_np_data = scaler.fit_transform(ecg_np_data) print(ecg_np_data.shape) ae = CnnLstmAutoEncoder() # fit the data and save model into model_dir_path if DO_TRAINING: ae.fit(ecg_np_data[:23, :], model_dir_path=model_dir_path, estimated_negative_sample_ratio=0.9) # load back the model saved in model_dir_path detect anomaly ae.load_model(model_dir_path) anomaly_information = ae.anomaly(ecg_np_data[:23, :]) reconstruction_error = [] for idx, (is_anomaly, dist) in enumerate(anomaly_information): print('# ' + str(idx) + ' is ' + ('abnormal' if is_anomaly else 'normal') + ' (dist: ' + str(dist) + ')') reconstruction_error.append(dist) visualize_reconstruction_error(reconstruction_error, ae.threshold)
Example #13
Source File: lstm_autoencoder.py From keras-anomaly-detection with MIT License | 6 votes |
def main(): data_dir_path = './data' model_dir_path = './models' ecg_data = pd.read_csv(data_dir_path + '/ecg_discord_test.csv', header=None) print(ecg_data.head()) ecg_np_data = ecg_data.as_matrix() scaler = MinMaxScaler() ecg_np_data = scaler.fit_transform(ecg_np_data) print(ecg_np_data.shape) ae = LstmAutoEncoder() # fit the data and save model into model_dir_path if DO_TRAINING: ae.fit(ecg_np_data[:23, :], model_dir_path=model_dir_path, estimated_negative_sample_ratio=0.9) # load back the model saved in model_dir_path detect anomaly ae.load_model(model_dir_path) anomaly_information = ae.anomaly(ecg_np_data[:23, :]) reconstruction_error = [] for idx, (is_anomaly, dist) in enumerate(anomaly_information): print('# ' + str(idx) + ' is ' + ('abnormal' if is_anomaly else 'normal') + ' (dist: ' + str(dist) + ')') reconstruction_error.append(dist) visualize_reconstruction_error(reconstruction_error, ae.threshold)
Example #14
Source File: data_utils.py From DOTA_models with Apache License 2.0 | 6 votes |
def read_names(names_path): """read data from downloaded file. See SmallNames.txt for example format or go to https://www.kaggle.com/kaggle/us-baby-names for full lists Args: names_path: path to the csv file similar to the example type Returns: Dataset: a namedtuple of two elements: deduped names and their associated counts. The names contain only 26 chars and are all lower case """ names_data = pd.read_csv(names_path) names_data.Name = names_data.Name.str.lower() name_data = names_data.groupby(by=["Name"])["Count"].sum() name_counts = np.array(name_data.tolist()) names_deduped = np.array(name_data.index.tolist()) Dataset = collections.namedtuple('Dataset', ['Name', 'Count']) return Dataset(names_deduped, name_counts)
Example #15
Source File: data2tensor.py From deep-summarization with MIT License | 6 votes |
def generate_vocabulary(self, review_summary_file): """ :param review_summary_file: :return: """ self.rev_sum_pair = pd.read_csv(review_summary_file, header=0).values for review,summary in self.rev_sum_pair: rev_lst = wordpunct_tokenize(review) sum_lst = wordpunct_tokenize(summary) self.__add_list_to_dict(rev_lst) self.__add_list_to_dict(sum_lst) # Now store the "" empty string as the last word of the voacabulary self.map[""] = len(self.map) self.revmap[len(self.map)] = ""
Example #16
Source File: mappers.py From bioservices with GNU General Public License v3.0 | 6 votes |
def get_data_from_biodbnet(self, df_hgnc): """keys are unique Gene names input is made of the df based on HGNC data web services uniprot accession are duplicated sometimes. If som this is actually the iprimary accession entry and all secondary ones. e.g. , ABHD11 >>>> Q8N723;Q8NFV2;Q8NFV3;Q6PJU0;Q8NFV4;H7BYM8;Q8N722;Q9HBS8 ABHDB_HUMAN Alpha/beta hydrolase domain-containing protein 11 correspond actually to the primary one : Q8NFV4 """ b = biodbnet.BioDBNet() res2 = b.db2db("Gene Symbol", ["HGNC ID", "UniProt Accession", "UniProt Entry Name", "UniProt Protein Name", "KEGG Gene ID", "Ensembl Gene ID"], res.keys()[0:2000]) import pandas as pd import StringIO c = pd.read_csv(StringIO.StringIO(res2), delimiter="\t", index_col="Gene Symbol") return c
Example #17
Source File: data.py From kuzushiji-recognition with MIT License | 6 votes |
def load_gt(fn, label_key='labels', has_height_width=True): labels = pd.read_csv(fn, dtype={'image_id': str, label_key: str}) labels = labels.fillna('') labels_ = defaultdict(list) all_labels = set() for img_id, label_str in zip(labels['image_id'], labels[label_key]): img_labels = label_str.split(' ') if has_height_width: l, x, y, h, w = img_labels[::5], img_labels[1::5], img_labels[2::5], \ img_labels[3::5], img_labels[4::5] for ll, xx, yy, hh, ww in zip(l, x, y, h, w): labels_[img_id].append((int(xx), int(yy), int(hh), int(ww), ll)) all_labels.add(ll) else: l, x, y = img_labels[::3], img_labels[1::3], img_labels[2::3] for ll, xx, yy in zip(l, x, y): labels_[img_id].append((int(xx), int(yy), ll)) all_labels.add(ll) label_to_int = {v: k for k, v in enumerate(sorted(list(all_labels)))} labels = dict(labels_) return labels, label_to_int
Example #18
Source File: test_control_curves.py From pywr with GNU General Public License v3.0 | 6 votes |
def test_control_curve_interpolated_json(use_parameters): # this is a little hack-y, as the parameters don't provide access to their # data once they've been initalised if use_parameters: model = load_model("reservoir_with_cc_param_values.json") else: model = load_model("reservoir_with_cc.json") reservoir1 = model.nodes["reservoir1"] model.setup() path = os.path.join(os.path.dirname(__file__), "models", "control_curve.csv") control_curve = pd.read_csv(path)["Control Curve"].values values = [-8, -6, -4] @assert_rec(model, reservoir1.cost) def expected_cost(timestep, si): # calculate expected cost manually and compare to parameter output volume_factor = reservoir1._current_pc[si.global_id] cc = control_curve[timestep.index] return np.interp(volume_factor, [0.0, cc, 1.0], values[::-1]) model.run()
Example #19
Source File: test_control_curves.py From pywr with GNU General Public License v3.0 | 6 votes |
def test_circular_control_curve_interpolated_json(): # this is a little hack-y, as the parameters don't provide access to their # data once they've been initalised model = load_model("reservoir_with_circular_cc.json") reservoir1 = model.nodes["reservoir1"] model.setup() path = os.path.join(os.path.dirname(__file__), "models", "control_curve.csv") control_curve = pd.read_csv(path)["Control Curve"].values values = [-8, -6, -4] @assert_rec(model, reservoir1.cost) def expected_cost(timestep, si): # calculate expected cost manually and compare to parameter output volume_factor = reservoir1._current_pc[si.global_id] cc = control_curve[timestep.index] return np.interp(volume_factor, [0.0, cc, 1.0], values[::-1]) model.run()
Example #20
Source File: test_recorders.py From pywr with GNU General Public License v3.0 | 6 votes |
def test_seasonal_fdc_recorder(self): """ Test the FlowDurationCurveRecorder """ model = load_model("timeseries4.json") df = pandas.read_csv(os.path.join(os.path.dirname(__file__), 'models', 'timeseries3.csv'), parse_dates=True, dayfirst=True, index_col=0) percentiles = np.linspace(20., 100., 5) summer_flows = df.loc[pandas.Timestamp("2014-06-01"):pandas.Timestamp("2014-08-31"), :] summer_fdc = np.percentile(summer_flows, percentiles, axis=0) model.run() rec = model.recorders["seasonal_fdc"] assert_allclose(rec.fdc, summer_fdc)
Example #21
Source File: info.py From xalpha with MIT License | 6 votes |
def _fetch_csv(self, path): """ fetch the information and pricetable from path+code.csv, not recommend to use manually, just set the fetch label to be true when init the object :param path: string of folder path """ try: content = pd.read_csv(path + self.code + ".csv") pricetable = content.iloc[1:] datel = list(pd.to_datetime(pricetable.date)) self.price = pricetable[["netvalue", "totvalue", "comment"]] self.price["date"] = datel saveinfo = json.loads(content.iloc[0].date) if not isinstance(saveinfo, dict): raise FundTypeError("This csv doesn't looks like from fundinfo") self.segment = saveinfo["segment"] self.feeinfo = saveinfo["feeinfo"] self.name = saveinfo["name"] self.rate = saveinfo["rate"] except FileNotFoundError as e: # print('no saved copy of fund %s' % self.code) raise e
Example #22
Source File: info.py From xalpha with MIT License | 6 votes |
def _fetch_csv(self, path): """ fetch the information and pricetable from path+code.csv, not recommend to use manually, just set the fetch label to be true when init the object :param path: string of folder path """ try: pricetable = pd.read_csv(path + self.code + ".csv") datel = list(pd.to_datetime(pricetable.date)) self.price = pricetable[["netvalue", "totvalue", "comment"]] self.price["date"] = datel except FileNotFoundError as e: # print('no saved copy of %s' % self.code) raise e
Example #23
Source File: info.py From xalpha with MIT License | 6 votes |
def _fetch_csv(self, path): """ fetch the information and pricetable from path+code.csv, not recommend to use manually, just set the fetch label to be true when init the object :param path: string of folder path """ try: content = pd.read_csv(path + self.code + ".csv") pricetable = content.iloc[1:] datel = list(pd.to_datetime(pricetable.date)) self.price = pricetable[["netvalue", "totvalue", "comment"]] self.price["date"] = datel self.name = content.iloc[0].comment except FileNotFoundError as e: # print('no saved copy of %s' % self.code) raise e
Example #24
Source File: universal.py From xalpha with MIT License | 6 votes |
def fetch_backend(key): prefix = ioconf.get("prefix", "") key = prefix + key backend = ioconf.get("backend") path = ioconf.get("path") if backend == "csv": key = key + ".csv" try: if backend == "csv": df0 = pd.read_csv(os.path.join(path, key)) elif backend == "sql": df0 = pd.read_sql(key, path) else: raise ValueError("no %s option for backend" % backend) return df0 except (FileNotFoundError, exc.ProgrammingError, KeyError): return None
Example #25
Source File: runTests.py From svviz with MIT License | 6 votes |
def saveTimingInfo(summary): timingsPath = "test_timings.csv" git_version = subprocess.check_output(["git", "describe"]).strip() new_row = summary[["timing"]].T new_row["date"] = [datetime.datetime.now()] new_row["version"] = git_version if os.path.exists(timingsPath): timings = pandas.read_csv(timingsPath, index_col=0) timings = pandas.concat([timings, new_row]) else: timings = new_row timings.to_csv(timingsPath) print(timings)
Example #26
Source File: monitor.py From lirpg with MIT License | 5 votes |
def load_results(dir): import pandas monitor_files = ( glob(osp.join(dir, "*monitor.json")) + glob(osp.join(dir, "*monitor.csv"))) # get both csv and (old) json files if not monitor_files: raise LoadMonitorResultsError("no monitor files of the form *%s found in %s" % (Monitor.EXT, dir)) dfs = [] headers = [] for fname in monitor_files: with open(fname, 'rt') as fh: if fname.endswith('csv'): firstline = fh.readline() assert firstline[0] == '#' header = json.loads(firstline[1:]) df = pandas.read_csv(fh, index_col=None) headers.append(header) elif fname.endswith('json'): # Deprecated json format episodes = [] lines = fh.readlines() header = json.loads(lines[0]) headers.append(header) for line in lines[1:]: episode = json.loads(line) episodes.append(episode) df = pandas.DataFrame(episodes) else: assert 0, 'unreachable' df['t'] += header['t_start'] dfs.append(df) df = pandas.concat(dfs) df.sort_values('t', inplace=True) df.reset_index(inplace=True) df['t'] -= min(header['t_start'] for header in headers) df.headers = headers # HACK to preserve backwards compatibility return df
Example #27
Source File: logger.py From lirpg with MIT License | 5 votes |
def read_csv(fname): import pandas return pandas.read_csv(fname, index_col=None, comment='#')
Example #28
Source File: test_basset_model.py From models with MIT License | 5 votes |
def test_ref_seq(): # Get pure fasta predictions model_dir = model_root + "./" model = kipoi.get_model(model_dir, source="dir") # The preprocessor Dataloader = kipoi.get_dataloader_factory(model_dir, source="dir") dataloader_arguments = { "fasta_file": "/nfs/research1/stegle/users/rkreuzhu/opt/manuscript_code/data/raw/dataloader_files/shared/hg19.fa", "intervals_file": "test_files/test_encode_roadmap.bed" } # predict using results preds = model.pipeline.predict(dataloader_arguments) # res_orig = pd.read_csv("/nfs/research1/stegle/users/rkreuzhu/deeplearning/Basset/data/test_encode_roadmap_short_pred.txt", "\t", header=None) assert np.isclose(preds, res_orig.values, atol=1e-3).all()
Example #29
Source File: DataReader.py From tensorflow-DeepFM with MIT License | 5 votes |
def parse(self, infile=None, df=None, has_label=False): assert not ((infile is None) and (df is None)), "infile or df at least one is set" assert not ((infile is not None) and (df is not None)), "only one can be set" if infile is None: dfi = df.copy() else: dfi = pd.read_csv(infile) if has_label: y = dfi["target"].values.tolist() dfi.drop(["id", "target"], axis=1, inplace=True) else: ids = dfi["id"].values.tolist() dfi.drop(["id"], axis=1, inplace=True) # dfi for feature index # dfv for feature value which can be either binary (1/0) or float (e.g., 10.24) dfv = dfi.copy() for col in dfi.columns: if col in self.feat_dict.ignore_cols: dfi.drop(col, axis=1, inplace=True) dfv.drop(col, axis=1, inplace=True) continue if col in self.feat_dict.numeric_cols: dfi[col] = self.feat_dict.feat_dict[col] else: dfi[col] = dfi[col].map(self.feat_dict.feat_dict[col]) dfv[col] = 1. # list of list of feature indices of each sample in the dataset Xi = dfi.values.tolist() # list of list of feature values of each sample in the dataset Xv = dfv.values.tolist() if has_label: return Xi, Xv, y else: return Xi, Xv, ids
Example #30
Source File: some_python.py From jupyterlab_code_formatter with MIT License | 5 votes |
def read(fp): df = (pd.read_csv(fp) .rename(columns=str.lower) .drop('unnamed: 36', axis=1) .pipe(extract_city_name) .pipe(time_to_datetime, ['dep_time', 'arr_time', 'crs_arr_time', 'crs_dep_time']) .assign(fl_date=lambda x: pd.to_datetime(x['fl_date']), dest=lambda x: pd.Categorical(x['dest']), origin=lambda x: pd.Categorical(x['origin']), tail_num=lambda x: pd.Categorical(x['tail_num']), unique_carrier=lambda x: pd.Categorical(x['unique_carrier']), cancellation_code=lambda x: pd.Categorical(x['cancellation_code']))) return df