Python pandas.ExcelFile() Examples
The following are 30
code examples of pandas.ExcelFile().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: test_excel.py From Computable with MIT License | 7 votes |
def test_parse_cols_int(self): _skip_if_no_openpyxl() _skip_if_no_xlrd() suffix = ['xls', 'xlsx', 'xlsm'] for s in suffix: pth = os.path.join(self.dirpath, 'test.%s' % s) xls = ExcelFile(pth) df = xls.parse('Sheet1', index_col=0, parse_dates=True, parse_cols=3) df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True) df2 = df2.reindex(columns=['A', 'B', 'C']) df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True, parse_cols=3) # TODO add index to xls file) tm.assert_frame_equal(df, df2, check_names=False) tm.assert_frame_equal(df3, df2, check_names=False)
Example #2
Source File: simu_agent.py From OpenData with Apache License 2.0 | 7 votes |
def get_data(self, index): prog_cod = index_map[index] url = "https://www.barclayhedge.com/cgi-bin/barclay_stats/ghsndx.cgi" param = { 'dump': 'excel', 'prog_cod': prog_cod, } response = self.do_request(url, param=param, method='POST', type='binary') if response is not None: excel = pd.ExcelFile(io.BytesIO(response)) df = excel.parse('Sheet1').dropna(how='all').copy().reset_index().drop(0) df.columns = ['year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'YTD'] df = df.set_index('year') return df, '' return None, "获取数据失败"
Example #3
Source File: own_tech.py From MultipleFactorRiskModel with MIT License | 6 votes |
def load_file(file_name, sheet_name_list): ''' load xlsx file into a dictionary indexed by sheet names :param string file_name:name of file :param [string] sheet_name_list: name of selected sheets in the xlsx file :return: {string:DataFrame} raw_data: {name of sheet:pure data retrieved from xlsx with column and index 0,1,2,...} ''' print 'loading file...' cut_head = 2 file = pd.ExcelFile(file_name) raw_data = {} # iterate over every sheet and retrieve useful data into raw_data for i in range(len(sheet_name_list)): print 'parsing sheet', sheet_name_list[i] # parse a sheet from the whole file into a DataFrame with headers cut off temp = file.parse(sheet_name_list[i]).iloc[cut_head:, :] # now temp.dtype = object,because the data read in contains string.Here convert it to float temp = temp.astype(np.float) # reset index and column with 0,1,2,..., temp.columns = range(temp.shape[1]) temp.index = range(temp.shape[0]) temp.fillna(0, inplace=True) raw_data[sheet_name_list[i]] = temp return raw_data
Example #4
Source File: test_excel.py From Computable with MIT License | 6 votes |
def test_parse_cols_list(self): _skip_if_no_openpyxl() _skip_if_no_xlrd() suffix = ['xls', 'xlsx', 'xlsm'] for s in suffix: pth = os.path.join(self.dirpath, 'test.%s' % s) xls = ExcelFile(pth) df = xls.parse('Sheet1', index_col=0, parse_dates=True, parse_cols=[0, 2, 3]) df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True) df2 = df2.reindex(columns=['B', 'C']) df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True, parse_cols=[0, 2, 3]) # TODO add index to xls file) tm.assert_frame_equal(df, df2, check_names=False) tm.assert_frame_equal(df3, df2, check_names=False)
Example #5
Source File: xls2json.py From indy-ssivc-tutorial with Apache License 2.0 | 6 votes |
def xls2csv(inputfile): data_xls = pd.ExcelFile(inputfile) # print data_xls.sheet_names # print len(data_xls.sheet_names) csvs = [] for tab in data_xls.sheet_names: if ".csv" in tab: # Get the names of the columns column_list = [] df_column = pd.read_excel(data_xls, tab).columns for i in df_column: column_list.append(i) # Create a converter so column data are all strings converter = {col: str for col in column_list} # Read the excel tabs tab_xls = pd.read_excel(data_xls, tab, index_col=None, converters=converter) tab_xls.to_csv(tab, encoding='utf-8', index=False) csvs.append(tab) return csvs
Example #6
Source File: plexos.py From GridCal with GNU General Public License v3.0 | 6 votes |
def parse_excel(fname): """ Parse excel export of the plexos file :param fname: complete path to the file """ excel = pd.ExcelFile(fname) print('Reading objects...') objects = excel.parse(sheet_name='Objects') print('Reading Memberships...') memberships = excel.parse(sheet_name='Memberships') print('Reading Properties...') properties = excel.parse(sheet_name='Properties') properties.rename(columns={'filename': 'path'}) excel.close() # file_dict = {row['child_object']: row['filename'] for i, row in properties.iterrows()} return objects, memberships, properties
Example #7
Source File: xlsx_compare_test.py From xgbfir with MIT License | 6 votes |
def _compare_xlsx(self, file1, file2, rtol=1e-02, atol=1e-03): # print("requested compare: {} and {}".format(file1, file2)) xl1 = pd.ExcelFile(file1) xl2 = pd.ExcelFile(file2) self.assertEqual(xl1.sheet_names, xl2.sheet_names) for sheet in xl1.sheet_names: # print("Prrocessing sheet {}".format(sheet)) df1 = xl1.parse(sheet) df2 = xl2.parse(sheet) columns1 = list(df1) columns2 = list(df2) self.assertEqual(len(columns1), len(columns2)) arr1 = df1.values arr2 = df2.values self.assertEqual(arr1.shape, arr2.shape) for x, y in np.ndindex(arr1.shape): v1 = arr1[x, y] v2 = arr2[x, y] # print("{}: ({}, {}): {} vs {}".format(sheet, x, y, v1, v2)) if isinstance(v1, six.string_types) or isinstance(v2, six.string_types): self.assertEqual(v1, v2) else: npt.assert_allclose(v1, v2, rtol=rtol, atol=atol)
Example #8
Source File: parseundp.py From Semantic-Search-for-Sustainable-Development with Apache License 2.0 | 6 votes |
def extract_template_data(path = '.', exclude = []): data = [] found = False for file in os.listdir(path): if file[-4:] == 'xlsx' and '~' not in file and file not in exclude: file_path = os.path.join(path, file) xls = pd.ExcelFile(file_path) for sheet in range(len(xls.sheet_names)): template = pd.read_excel(file_path, header = None, sheetname = sheet) template.fillna('', inplace = True) for col in range(len(template.columns)): try: if 'Identify closest' in str(template.iloc[0][col]) or 'Identify closest' in str(template.iloc[1][col]): keep = col found = True elif 'National Development Plan' in str(template.iloc[1][col]) and not found: keep = col found = True except: continue if found: data.append(template[[template.keys()[1], template.keys()[keep]]]) found = False return data
Example #9
Source File: test_excel.py From Computable with MIT License | 6 votes |
def test_excel_table(self): _skip_if_no_xlrd() pth = os.path.join(self.dirpath, 'test.xls') xls = ExcelFile(pth) df = xls.parse('Sheet1', index_col=0, parse_dates=True) df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True) df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True) tm.assert_frame_equal(df, df2, check_names=False) tm.assert_frame_equal(df3, df2, check_names=False) df4 = xls.parse('Sheet1', index_col=0, parse_dates=True, skipfooter=1) df5 = xls.parse('Sheet1', index_col=0, parse_dates=True, skip_footer=1) tm.assert_frame_equal(df4, df.ix[:-1]) tm.assert_frame_equal(df4, df5)
Example #10
Source File: test_excel.py From Computable with MIT License | 6 votes |
def test_xlsx_table(self): _skip_if_no_xlrd() _skip_if_no_openpyxl() pth = os.path.join(self.dirpath, 'test.xlsx') xlsx = ExcelFile(pth) df = xlsx.parse('Sheet1', index_col=0, parse_dates=True) df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True) df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True) # TODO add index to xlsx file tm.assert_frame_equal(df, df2, check_names=False) tm.assert_frame_equal(df3, df2, check_names=False) df4 = xlsx.parse('Sheet1', index_col=0, parse_dates=True, skipfooter=1) df5 = xlsx.parse('Sheet1', index_col=0, parse_dates=True, skip_footer=1) tm.assert_frame_equal(df4, df.ix[:-1]) tm.assert_frame_equal(df4, df5)
Example #11
Source File: eia861.py From pudl with MIT License | 6 votes |
def get_xlsx_dict(self, years, file_name): """Read in Excel files to create Excel objects. Rather than reading in the same Excel files several times, we can just read them each in once (one per year) and use the ExcelFile object to refer back to the data in memory. Args: years (list): The years that we're trying to read data for. file_name (str): Name of the excel file. """ for yr in years: try: self.xlsx_dict[yr] logger.info(f"we already have an xlsx file for {yr}") except KeyError: logger.info( f"Extracting data from {self.dataset_name} {file_name} spreadsheet for {yr}.") self.xlsx_dict[yr] = pd.ExcelFile( self.get_file(yr, file_name) )
Example #12
Source File: test_excel.py From Computable with MIT License | 6 votes |
def test_sheets(self): _skip_if_no_xlrd() with ensure_clean(self.ext) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # Test writing to separate sheets writer = ExcelWriter(path) self.frame.to_excel(writer, 'test1') self.tsframe.to_excel(writer, 'test2') writer.save() reader = ExcelFile(path) recons = reader.parse('test1', index_col=0) tm.assert_frame_equal(self.frame, recons) recons = reader.parse('test2', index_col=0) tm.assert_frame_equal(self.tsframe, recons) np.testing.assert_equal(2, len(reader.sheet_names)) np.testing.assert_equal('test1', reader.sheet_names[0]) np.testing.assert_equal('test2', reader.sheet_names[1])
Example #13
Source File: test_excel.py From Computable with MIT License | 6 votes |
def test_colaliases(self): _skip_if_no_xlrd() with ensure_clean(self.ext) as path: self.frame['A'][:5] = nan self.frame.to_excel(path, 'test1') self.frame.to_excel(path, 'test1', cols=['A', 'B']) self.frame.to_excel(path, 'test1', header=False) self.frame.to_excel(path, 'test1', index=False) # column aliases col_aliases = Index(['AA', 'X', 'Y', 'Z']) self.frame2.to_excel(path, 'test1', header=col_aliases) reader = ExcelFile(path) rs = reader.parse('test1', index_col=0) xp = self.frame2.copy() xp.columns = col_aliases tm.assert_frame_equal(xp, rs)
Example #14
Source File: test_excel.py From Computable with MIT License | 6 votes |
def test_excel_roundtrip_indexname(self): _skip_if_no_xlrd() df = DataFrame(np.random.randn(10, 4)) df.index.name = 'foo' with ensure_clean(self.ext) as path: df.to_excel(path, merge_cells=self.merge_cells) xf = ExcelFile(path) result = xf.parse(xf.sheet_names[0], index_col=0, has_index_names=self.merge_cells) tm.assert_frame_equal(result, df) self.assertEqual(result.index.name, 'foo')
Example #15
Source File: test_excel.py From Computable with MIT License | 6 votes |
def test_to_excel_multiindex(self): _skip_if_no_xlrd() frame = self.frame arrays = np.arange(len(frame.index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=['first', 'second']) frame.index = new_index with ensure_clean(self.ext) as path: frame.to_excel(path, 'test1', header=False) frame.to_excel(path, 'test1', cols=['A', 'B']) # round trip frame.to_excel(path, 'test1', merge_cells=self.merge_cells) reader = ExcelFile(path) df = reader.parse('test1', index_col=[0, 1], parse_dates=False, has_index_names=self.merge_cells) tm.assert_frame_equal(frame, df) self.assertEqual(frame.index.names, df.index.names)
Example #16
Source File: test_excel.py From Computable with MIT License | 6 votes |
def test_int_types(self): _skip_if_no_xlrd() for np_type in (np.int8, np.int16, np.int32, np.int64): with ensure_clean(self.ext) as path: # Test np.int values read come back as int (rather than float # which is Excel's format). frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)), dtype=np_type) frame.to_excel(path, 'test1') reader = ExcelFile(path) recons = reader.parse('test1') int_frame = frame.astype(np.int64) tm.assert_frame_equal(int_frame, recons) recons2 = read_excel(path, 'test1') tm.assert_frame_equal(int_frame, recons2) # test with convert_float=False comes back as float float_frame = frame.astype(float) recons = read_excel(path, 'test1', convert_float=False) tm.assert_frame_equal(recons, float_frame)
Example #17
Source File: test_excel.py From Computable with MIT License | 6 votes |
def test_to_excel_multiindex_dates(self): _skip_if_no_xlrd() # try multiindex with dates tsframe = self.tsframe.copy() new_index = [tsframe.index, np.arange(len(tsframe.index))] tsframe.index = MultiIndex.from_arrays(new_index) with ensure_clean(self.ext) as path: tsframe.index.names = ['time', 'foo'] tsframe.to_excel(path, 'test1', merge_cells=self.merge_cells) reader = ExcelFile(path) recons = reader.parse('test1', index_col=[0, 1], has_index_names=self.merge_cells) tm.assert_frame_equal(tsframe, recons) self.assertEquals(recons.index.names, ('time', 'foo'))
Example #18
Source File: test_excel.py From Computable with MIT License | 6 votes |
def test_to_excel_multiindex_no_write_index(self): _skip_if_no_xlrd() # Test writing and re-reading a MI witout the index. GH 5616. # Initial non-MI frame. frame1 = pd.DataFrame({'a': [10, 20], 'b': [30, 40], 'c': [50, 60]}) # Add a MI. frame2 = frame1.copy() multi_index = pd.MultiIndex.from_tuples([(70, 80), (90, 100)]) frame2.index = multi_index with ensure_clean(self.ext) as path: # Write out to Excel without the index. frame2.to_excel(path, 'test1', index=False) # Read it back in. reader = ExcelFile(path) frame3 = reader.parse('test1') # Test that it is the same as the initial frame. tm.assert_frame_equal(frame1, frame3)
Example #19
Source File: test_excel.py From Computable with MIT License | 6 votes |
def check_excel_table_sheet_by_index(self, filename, csvfile): import xlrd pth = os.path.join(self.dirpath, filename) xls = ExcelFile(pth) df = xls.parse(0, index_col=0, parse_dates=True) df2 = self.read_csv(csvfile, index_col=0, parse_dates=True) df3 = xls.parse(1, skiprows=[1], index_col=0, parse_dates=True) tm.assert_frame_equal(df, df2, check_names=False) tm.assert_frame_equal(df3, df2, check_names=False) df4 = xls.parse(0, index_col=0, parse_dates=True, skipfooter=1) df5 = xls.parse(0, index_col=0, parse_dates=True, skip_footer=1) tm.assert_frame_equal(df4, df.ix[:-1]) tm.assert_frame_equal(df4, df5) self.assertRaises(xlrd.XLRDError, xls.parse, 'asdf')
Example #20
Source File: test_excel.py From Computable with MIT License | 6 votes |
def test_to_excel_float_format(self): _skip_if_no_xlrd() df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) with ensure_clean(self.ext) as filename: df.to_excel(filename, 'test1', float_format='%.2f') reader = ExcelFile(filename) rs = reader.parse('test1', index_col=None) xp = DataFrame([[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], index=['A', 'B'], columns=['X', 'Y', 'Z']) tm.assert_frame_equal(rs, xp)
Example #21
Source File: test_check_object.py From pythonwhat with GNU Affero General Public License v3.0 | 5 votes |
def test_manual_converter(): res = helper.run( { "DC_CODE": "xl = pd.ExcelFile('battledeath2.xlsx')", "DC_SOLUTION": "xl = pd.ExcelFile('battledeath.xlsx')", "DC_PEC": "import pandas as pd; from urllib.request import urlretrieve; urlretrieve('https://s3.amazonaws.com/assets.datacamp.com/production/course_998/datasets/battledeath.xlsx', 'battledeath.xlsx'); from urllib.request import urlretrieve; urlretrieve('https://s3.amazonaws.com/assets.datacamp.com/production/course_998/datasets/battledeath.xlsx', 'battledeath2.xlsx')", "DC_SCT": """ def my_converter(x): return(x.sheet_names) set_converter(key = "pandas.io.excel.ExcelFile", fundef = my_converter) Ex().check_object('xl').has_equal_value() """, } ) assert res["correct"]
Example #22
Source File: excel.py From pudl with MIT License | 5 votes |
def _load_excel_file(self, year, page): """Returns ExcelFile object corresponding to given (year, page). Additionally, loaded files are stored under self._file_cache for reuse. """ full_path = self._get_file_path(year, page) if full_path not in self._file_cache: logger.info( f'{self._dataset_name}: Loading excel file {full_path}') self._file_cache[full_path] = pd.ExcelFile(full_path) return self._file_cache[full_path]
Example #23
Source File: utils.py From pyam with Apache License 2.0 | 5 votes |
def read_pandas(path, default_sheet='data', *args, **kwargs): """Read a file and return a pandas.DataFrame""" if path.endswith('csv'): df = pd.read_csv(path, *args, **kwargs) else: xl = pd.ExcelFile(path) if len(xl.sheet_names) > 1 and 'sheet_name' not in kwargs: kwargs['sheet_name'] = default_sheet df = pd.read_excel(path, *args, **kwargs) return df
Example #24
Source File: core.py From pyam with Apache License 2.0 | 5 votes |
def _init(self, data, **kwargs): """Process data and set attributes for new instance""" # import data from pd.DataFrame or read from source if isinstance(data, pd.DataFrame) or isinstance(data, pd.Series): _data = format_data(data.copy(), **kwargs) elif has_ix and isinstance(data, ixmp.TimeSeries): _data = read_ix(data, **kwargs) else: logger.info('Reading file `{}`'.format(data)) _data = read_file(data, **kwargs) self.data, self.time_col, self.extra_cols = _data # cast time_col to desired format if self.time_col == 'year': self._format_year_col() elif self.time_col == 'time': self._format_datetime_col() self._LONG_IDX = IAMC_IDX + [self.time_col] + self.extra_cols # define `meta` dataframe for categorization & quantitative indicators self.meta = self.data[META_IDX].drop_duplicates().set_index(META_IDX) self.reset_exclude() # if initializing from xlsx, try to load `meta` table from file meta_sheet = kwargs.get('meta_sheet_name', 'meta') if isstr(data) and data.endswith('.xlsx') and meta_sheet is not False\ and meta_sheet in pd.ExcelFile(data).sheet_names: self.load_meta(data, sheet_name=meta_sheet) # execute user-defined code if 'exec' in run_control(): self._execute_run_control()
Example #25
Source File: base.py From openscm with GNU Affero General Public License v3.0 | 5 votes |
def _read_pandas(fname: str, *args: Any, **kwargs: Any) -> pd.DataFrame: """ Read a file and return a :class:`pd.DataFrame`. Parameters ---------- fname Path from which to read data *args Passed to :func:`pd.read_csv` if :obj:`fname` ends with '.csv', otherwise passed to :func:`pd.read_excel`. **kwargs Passed to :func:`pd.read_csv` if :obj:`fname` ends with '.csv', otherwise passed to :func:`pd.read_excel`. Returns ------- :obj:`pd.DataFrame` Read data Raises ------ OSError Path specified by :obj:`fname` does not exist """ if not os.path.exists(fname): raise OSError("no data file `{}` found!".format(fname)) if fname.endswith("csv"): df = pd.read_csv(fname, *args, **kwargs) else: xl = pd.ExcelFile(fname) if len(xl.sheet_names) > 1 and "sheet_name" not in kwargs: kwargs["sheet_name"] = "data" df = pd.read_excel(fname, *args, **kwargs) return df # pylint doesn't recognise return statements if they include ','
Example #26
Source File: databases.py From CityEnergyAnalyst with MIT License | 5 votes |
def database_to_dict(db_path): out = OrderedDict() xls = pd.ExcelFile(db_path) for sheet in xls.sheet_names: df = xls.parse(sheet, keep_default_na=False) out[sheet] = df.to_dict(orient='records', into=OrderedDict) return out
Example #27
Source File: DyStockDataGateway.py From DevilYuan with MIT License | 5 votes |
def _getTickDataFrom163(code=None, date=None, retry_count=3, pause=0.001): """ 从网易获取分笔数据 网易的分笔数据只有最近5日的 接口和返回的DF,保持跟tushare一致 Parameters ------ code:string 股票代码 e.g. 600848 date:string 日期 format:YYYY-MM-DD retry_count : int, 默认 3 如遇网络等问题重复执行的次数 pause : int, 默认 0 重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题 return ------- DataFrame 当日所有股票交易数据(DataFrame) 属性:成交时间、成交价格、价格变动,成交手、成交金额(元),买卖类型 """ if code is None or len(code)!=6 or date is None: return None symbol = DyStockDataTicksGateway._codeTo163Symbol(code) yyyy, mm, dd = date.split('-') for _ in range(retry_count): sleep(pause) try: url = 'http://quotes.money.163.com/cjmx/{0}/{1}/{2}.xls'.format(yyyy, yyyy+mm+dd, symbol) socket = urlopen(url) xd = pd.ExcelFile(socket) df = xd.parse(xd.sheet_names[0], names=['time', 'price', 'change', 'volume', 'amount', 'type']) df['amount'] = df['amount'].astype('int64') # keep same as tushare except Exception as e: print(e) ex = e else: return df raise ex
Example #28
Source File: Options Straddle backtest.py From quant-trading with Apache License 2.0 | 5 votes |
def main(): data=pd.ExcelFile('stoxx50.xlsx') aug=data.parse('aug') aug.set_index('Dates',inplace=True) aug.index=pd.to_datetime(aug.index) spot=data.parse('spot') spot.set_index('Dates',inplace=True) spot.index=pd.to_datetime(spot.index) target=find_strike_price(aug) #we iterate through all the available option pairs #to find the optimal strike price to maximize our profit for strikeprice in target: df=straddle(aug,spot,contractsize,strikeprice) signal=signal_generation(df,threshold) plot(signal,strikeprice,contractsize) # In[8]:
Example #29
Source File: generate_xhale.py From sharpy with BSD 3-Clause "New" or "Revised" License | 5 votes |
def read_aero_data(filename='inputs/aero_properties.xlsx'): import pandas as pd xl = pd.ExcelFile(filename) sheets = {sheet_name: xl.parse(sheet_name, header=0, index_col=0) for sheet_name in xl.sheet_names} aero_data = dict() for sheet, val in sheets.items(): aero_data[sheet] = dict() for item in val['value'].items(): aero_data[sheet][item[0]] = item[1] return aero_data
Example #30
Source File: eia861.py From pudl with MIT License | 5 votes |
def get_path_name(self, yr, file_name): """Get the ExcelFile file path name.""" return self.get_meta('file_name_map', None).loc[yr, file_name]