Python pandas.read_excel() Examples
The following are 30
code examples of pandas.read_excel().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: test_excel.py From recruit with Apache License 2.0 | 10 votes |
def test_to_excel_multiindex(self, merge_cells, engine, ext): frame = self.frame arrays = np.arange(len(frame.index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=['first', 'second']) frame.index = new_index frame.to_excel(self.path, 'test1', header=False) frame.to_excel(self.path, 'test1', columns=['A', 'B']) # round trip frame.to_excel(self.path, 'test1', merge_cells=merge_cells) reader = ExcelFile(self.path) df = read_excel(reader, 'test1', index_col=[0, 1]) tm.assert_frame_equal(frame, df) # GH13511
Example #2
Source File: test_excel.py From recruit with Apache License 2.0 | 6 votes |
def test_int_types(self, merge_cells, engine, ext, np_type): # Test np.int values read come back as int # (rather than float which is Excel's format). frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)), dtype=np_type) frame.to_excel(self.path, "test1") reader = ExcelFile(self.path) recons = read_excel(reader, "test1", index_col=0) int_frame = frame.astype(np.int64) tm.assert_frame_equal(int_frame, recons) recons2 = read_excel(self.path, "test1", index_col=0) tm.assert_frame_equal(int_frame, recons2) # Test with convert_float=False comes back as float. float_frame = frame.astype(float) recons = read_excel(self.path, "test1", convert_float=False, index_col=0) tm.assert_frame_equal(recons, float_frame, check_index_type=False, check_column_type=False)
Example #3
Source File: test_excel.py From recruit with Apache License 2.0 | 6 votes |
def get_exceldf(self, basename, ext, *args, **kwds): """ Return test data DataFrame. Parameters ---------- basename : str File base name, excluding file extension. Returns ------- df : DataFrame """ pth = os.path.join(self.dirpath, basename + ext) return read_excel(pth, *args, **kwds)
Example #4
Source File: china_index_list_spider.py From zvt with MIT License | 6 votes |
def fetch_szse_index(self) -> None: """ 抓取深证指数列表 """ url = 'http://www.szse.cn/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=1812_zs&TABKEY=tab1' response = requests.get(url) df = pd.read_excel(io.BytesIO(response.content), dtype='str') df.columns = ['code', 'name', 'timestamp', 'base_point', 'list_date'] df['category'] = 'szse' df = df.loc[df['code'].str.contains(r'^\d{6}$')] self.persist_index(df) self.logger.info('深证指数列表抓取完成...') # 抓取深证指数成分股 self.fetch_szse_index_component(df) self.logger.info('深证指数成分股抓取完成...')
Example #5
Source File: test_excel.py From recruit with Apache License 2.0 | 6 votes |
def test_datetimes(self, merge_cells, engine, ext): # Test writing and reading datetimes. For issue #9139. (xref #9185) datetimes = [datetime(2013, 1, 13, 1, 2, 3), datetime(2013, 1, 13, 2, 45, 56), datetime(2013, 1, 13, 4, 29, 49), datetime(2013, 1, 13, 6, 13, 42), datetime(2013, 1, 13, 7, 57, 35), datetime(2013, 1, 13, 9, 41, 28), datetime(2013, 1, 13, 11, 25, 21), datetime(2013, 1, 13, 13, 9, 14), datetime(2013, 1, 13, 14, 53, 7), datetime(2013, 1, 13, 16, 37, 0), datetime(2013, 1, 13, 18, 20, 52)] write_frame = DataFrame({'A': datetimes}) write_frame.to_excel(self.path, 'Sheet1') read_frame = read_excel(self.path, 'Sheet1', header=0) tm.assert_series_equal(write_frame['A'], read_frame['A'])
Example #6
Source File: test_excel.py From recruit with Apache License 2.0 | 6 votes |
def test_to_excel_multiindex_no_write_index(self, merge_cells, engine, ext): # Test writing and re-reading a MI witout the index. GH 5616. # Initial non-MI frame. frame1 = DataFrame({'a': [10, 20], 'b': [30, 40], 'c': [50, 60]}) # Add a MI. frame2 = frame1.copy() multi_index = MultiIndex.from_tuples([(70, 80), (90, 100)]) frame2.index = multi_index # Write out to Excel without the index. frame2.to_excel(self.path, 'test1', index=False) # Read it back in. reader = ExcelFile(self.path) frame3 = read_excel(reader, 'test1') # Test that it is the same as the initial frame. tm.assert_frame_equal(frame1, frame3)
Example #7
Source File: test_excel.py From recruit with Apache License 2.0 | 6 votes |
def test_set_column_names_in_parameter(self, ext): # GH 12870 : pass down column names associated with # keyword argument names refdf = pd.DataFrame([[1, 'foo'], [2, 'bar'], [3, 'baz']], columns=['a', 'b']) with ensure_clean(ext) as pth: with ExcelWriter(pth) as writer: refdf.to_excel(writer, 'Data_no_head', header=False, index=False) refdf.to_excel(writer, 'Data_with_head', index=False) refdf.columns = ['A', 'B'] with ExcelFile(pth) as reader: xlsdf_no_head = read_excel(reader, 'Data_no_head', header=None, names=['A', 'B']) xlsdf_with_head = read_excel(reader, 'Data_with_head', index_col=None, names=['A', 'B']) tm.assert_frame_equal(xlsdf_no_head, refdf) tm.assert_frame_equal(xlsdf_with_head, refdf)
Example #8
Source File: test_excel.py From recruit with Apache License 2.0 | 6 votes |
def test_to_excel_multiindex_cols(self, merge_cells, engine, ext): frame = self.frame arrays = np.arange(len(frame.index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=['first', 'second']) frame.index = new_index new_cols_index = MultiIndex.from_tuples([(40, 1), (40, 2), (50, 1), (50, 2)]) frame.columns = new_cols_index header = [0, 1] if not merge_cells: header = 0 # round trip frame.to_excel(self.path, 'test1', merge_cells=merge_cells) reader = ExcelFile(self.path) df = read_excel(reader, 'test1', header=header, index_col=[0, 1]) if not merge_cells: fm = frame.columns.format(sparsify=False, adjoin=False, names=False) frame.columns = [".".join(map(str, q)) for q in zip(*fm)] tm.assert_frame_equal(frame, df)
Example #9
Source File: test_excel.py From recruit with Apache License 2.0 | 6 votes |
def test_creating_and_reading_multiple_sheets(self, ext): # see gh-9450 # # Test reading multiple sheets, from a runtime # created Excel file with multiple sheets. def tdf(col_sheet_name): d, i = [11, 22, 33], [1, 2, 3] return DataFrame(d, i, columns=[col_sheet_name]) sheets = ["AAA", "BBB", "CCC"] dfs = [tdf(s) for s in sheets] dfs = dict(zip(sheets, dfs)) with ensure_clean(ext) as pth: with ExcelWriter(pth) as ew: for sheetname, df in iteritems(dfs): df.to_excel(ew, sheetname) dfs_returned = read_excel(pth, sheet_name=sheets, index_col=0) for s in sheets: tm.assert_frame_equal(dfs[s], dfs_returned[s])
Example #10
Source File: test_excel.py From recruit with Apache License 2.0 | 6 votes |
def test_to_excel_timedelta(self, *_): # see gh-19242, gh-9155 # # Test writing timedelta to xls. frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), columns=["A"], dtype=np.int64) expected = frame.copy() frame["new"] = frame["A"].apply(lambda x: timedelta(seconds=x)) expected["new"] = expected["A"].apply( lambda x: timedelta(seconds=x).total_seconds() / float(86400)) frame.to_excel(self.path, "test1") reader = ExcelFile(self.path) recons = read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(expected, recons)
Example #11
Source File: test_excel.py From recruit with Apache License 2.0 | 6 votes |
def test_to_excel_interval_labels(self, *_): # see gh-19242 # # Test writing Interval with labels. frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64) expected = frame.copy() intervals = pd.cut(frame[0], 10, labels=["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"]) frame["new"] = intervals expected["new"] = pd.Series(list(intervals)) frame.to_excel(self.path, "test1") reader = ExcelFile(self.path) recons = read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(expected, recons)
Example #12
Source File: test_excel.py From recruit with Apache License 2.0 | 6 votes |
def test_to_excel_interval_no_labels(self, *_): # see gh-19242 # # Test writing Interval without labels. frame = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64) expected = frame.copy() frame["new"] = pd.cut(frame[0], 10) expected["new"] = pd.cut(expected[0], 10).astype(str) frame.to_excel(self.path, "test1") reader = ExcelFile(self.path) recons = read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(expected, recons)
Example #13
Source File: test_excel.py From recruit with Apache License 2.0 | 6 votes |
def test_read_excel_parse_dates(self, ext): # see gh-11544, gh-12051 df = DataFrame( {"col": [1, 2, 3], "date_strings": pd.date_range("2012-01-01", periods=3)}) df2 = df.copy() df2["date_strings"] = df2["date_strings"].dt.strftime("%m/%d/%Y") with ensure_clean(ext) as pth: df2.to_excel(pth) res = read_excel(pth, index_col=0) tm.assert_frame_equal(df2, res) res = read_excel(pth, parse_dates=["date_strings"], index_col=0) tm.assert_frame_equal(df, res) date_parser = lambda x: pd.datetime.strptime(x, "%m/%d/%Y") res = read_excel(pth, parse_dates=["date_strings"], date_parser=date_parser, index_col=0) tm.assert_frame_equal(df, res)
Example #14
Source File: test_excel.py From recruit with Apache License 2.0 | 6 votes |
def test_colaliases(self, merge_cells, engine, ext): self.frame['A'][:5] = nan self.frame.to_excel(self.path, 'test1') self.frame.to_excel(self.path, 'test1', columns=['A', 'B']) self.frame.to_excel(self.path, 'test1', header=False) self.frame.to_excel(self.path, 'test1', index=False) # column aliases col_aliases = Index(['AA', 'X', 'Y', 'Z']) self.frame2.to_excel(self.path, 'test1', header=col_aliases) reader = ExcelFile(self.path) rs = read_excel(reader, 'test1', index_col=0) xp = self.frame2.copy() xp.columns = col_aliases tm.assert_frame_equal(xp, rs)
Example #15
Source File: test_excel.py From recruit with Apache License 2.0 | 6 votes |
def test_read_excel_squeeze(self, ext): # GH 12157 f = os.path.join(self.dirpath, 'test_squeeze' + ext) actual = pd.read_excel(f, 'two_columns', index_col=0, squeeze=True) expected = pd.Series([2, 3, 4], [4, 5, 6], name='b') expected.index.name = 'a' tm.assert_series_equal(actual, expected) actual = pd.read_excel(f, 'two_columns', squeeze=True) expected = pd.DataFrame({'a': [4, 5, 6], 'b': [2, 3, 4]}) tm.assert_frame_equal(actual, expected) actual = pd.read_excel(f, 'one_column', squeeze=True) expected = pd.Series([1, 2, 3], name='a') tm.assert_series_equal(actual, expected)
Example #16
Source File: test_excel.py From recruit with Apache License 2.0 | 6 votes |
def test_sheets(self, merge_cells, engine, ext): self.frame['A'][:5] = nan self.frame.to_excel(self.path, 'test1') self.frame.to_excel(self.path, 'test1', columns=['A', 'B']) self.frame.to_excel(self.path, 'test1', header=False) self.frame.to_excel(self.path, 'test1', index=False) # Test writing to separate sheets writer = ExcelWriter(self.path) self.frame.to_excel(writer, 'test1') self.tsframe.to_excel(writer, 'test2') writer.save() reader = ExcelFile(self.path) recons = read_excel(reader, 'test1', index_col=0) tm.assert_frame_equal(self.frame, recons) recons = read_excel(reader, 'test2', index_col=0) tm.assert_frame_equal(self.tsframe, recons) assert 2 == len(reader.sheet_names) assert 'test1' == reader.sheet_names[0] assert 'test2' == reader.sheet_names[1]
Example #17
Source File: test_excel.py From recruit with Apache License 2.0 | 5 votes |
def test_to_excel_output_encoding(self, merge_cells, engine, ext): # Avoid mixed inferred_type. df = DataFrame([[u"\u0192", u"\u0193", u"\u0194"], [u"\u0195", u"\u0196", u"\u0197"]], index=[u"A\u0192", u"B"], columns=[u"X\u0193", u"Y", u"Z"]) with ensure_clean("__tmp_to_excel_float_format__." + ext) as filename: df.to_excel(filename, sheet_name="TestSheet", encoding="utf8") result = read_excel(filename, "TestSheet", encoding="utf8", index_col=0) tm.assert_frame_equal(result, df)
Example #18
Source File: test_excel.py From recruit with Apache License 2.0 | 5 votes |
def test_duplicated_columns(self, *_): # see gh-5235 df = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]], columns=["A", "B", "B"]) df.to_excel(self.path, "test1") expected = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]], columns=["A", "B", "B.1"]) # By default, we mangle. result = read_excel(self.path, "test1", index_col=0) tm.assert_frame_equal(result, expected) # Explicitly, we pass in the parameter. result = read_excel(self.path, "test1", index_col=0, mangle_dupe_cols=True) tm.assert_frame_equal(result, expected) # see gh-11007, gh-10970 df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "A", "B"]) df.to_excel(self.path, "test1") result = read_excel(self.path, "test1", index_col=0) expected = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "A.1", "B.1"]) tm.assert_frame_equal(result, expected) # see gh-10982 df.to_excel(self.path, "test1", index=False, header=False) result = read_excel(self.path, "test1", header=None) expected = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]]) tm.assert_frame_equal(result, expected) msg = "Setting mangle_dupe_cols=False is not supported yet" with pytest.raises(ValueError, match=msg): read_excel(self.path, "test1", header=None, mangle_dupe_cols=False)
Example #19
Source File: test_excel.py From recruit with Apache License 2.0 | 5 votes |
def test_true_and_false_value_options(self, *_): # see gh-13347 df = pd.DataFrame([["foo", "bar"]], columns=["col1", "col2"]) expected = df.replace({"foo": True, "bar": False}) df.to_excel(self.path) read_frame = read_excel(self.path, true_values=["foo"], false_values=["bar"], index_col=0) tm.assert_frame_equal(read_frame, expected)
Example #20
Source File: test_excel.py From recruit with Apache License 2.0 | 5 votes |
def test_excel_date_datetime_format(self, merge_cells, engine, ext): # see gh-4133 # # Excel output format strings df = DataFrame([[date(2014, 1, 31), date(1999, 9, 24)], [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)]], index=["DATE", "DATETIME"], columns=["X", "Y"]) df_expected = DataFrame([[datetime(2014, 1, 31), datetime(1999, 9, 24)], [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)]], index=["DATE", "DATETIME"], columns=["X", "Y"]) with ensure_clean(ext) as filename2: writer1 = ExcelWriter(self.path) writer2 = ExcelWriter(filename2, date_format="DD.MM.YYYY", datetime_format="DD.MM.YYYY HH-MM-SS") df.to_excel(writer1, "test1") df.to_excel(writer2, "test1") writer1.close() writer2.close() reader1 = ExcelFile(self.path) reader2 = ExcelFile(filename2) rs1 = read_excel(reader1, "test1", index_col=0) rs2 = read_excel(reader2, "test1", index_col=0) tm.assert_frame_equal(rs1, rs2) # Since the reader returns a datetime object for dates, # we need to use df_expected to check the result. tm.assert_frame_equal(rs2, df_expected)
Example #21
Source File: hot_words_generator.py From LagouJob with Apache License 2.0 | 5 votes |
def cal_and_show_jd_hot_words(self, jd_dir='../spider/jd'): """ calculate and show hot words of Job Description (JD) :param jd_dir: :return: """ if not os.path.exists(jd_dir) or len(os.listdir(jd_dir)) == 0: print('Error! No valid content in {0}'.format(jd_dir)) sys.exit(0) else: jd_and_dir = {_.split('.')[0]: os.path.join(jd_dir, _) for _ in os.listdir(jd_dir)} for k, v in jd_and_dir.items(): text = "".join(pd.read_excel(v)['详情描述']) jieba.analyse.set_stop_words(STOPWORDS_PATH) jieba.load_userdict(USER_CORPUS) hot_words_with_weights = jieba.analyse.extract_tags(text, topK=30, withWeight=True, allowPOS=()) frequencies = {_[0]: _[1] for _ in hot_words_with_weights} print(frequencies) x, y = np.ogrid[:300, :300] mask = (x - 150) ** 2 + (y - 150) ** 2 > 130 ** 2 mask = 255 * mask.astype(int) wordcloud = WordCloud(font_path='./msyh.ttf', width=600, height=300, background_color="white", repeat=False, mask=mask) wordcloud.generate_from_frequencies(frequencies) import matplotlib.pyplot as plt plt.imshow(wordcloud, interpolation='bilinear') plt.axis("off") plt.show()
Example #22
Source File: test_excel.py From recruit with Apache License 2.0 | 5 votes |
def test_freeze_panes(self, *_): # see gh-15160 expected = DataFrame([[1, 2], [3, 4]], columns=["col1", "col2"]) expected.to_excel(self.path, "Sheet1", freeze_panes=(1, 1)) result = read_excel(self.path, index_col=0) tm.assert_frame_equal(result, expected)
Example #23
Source File: test_excel.py From recruit with Apache License 2.0 | 5 votes |
def test_excel_roundtrip_datetime(self, merge_cells, *_): # datetime.date, not sure what to test here exactly tsf = self.tsframe.copy() tsf.index = [x.date() for x in self.tsframe.index] tsf.to_excel(self.path, "test1", merge_cells=merge_cells) reader = ExcelFile(self.path) recons = read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(self.tsframe, recons)
Example #24
Source File: test_io.py From skan with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_write_excel_tables(): num_sheets = np.random.randint(1, 4) num_cols = np.random.randint(1, 5, size=num_sheets) num_rows = np.random.randint(20, 40, size=num_sheets) tables = [] for m, n in zip(num_rows, num_cols): columns = [f'column{i}' for i in range(n)] data = np.random.random((m, n)) tables.append(pd.DataFrame(data=data, columns=columns)) sheet_names = [f'sheet {i}' for i in range(num_sheets)] kwargs = dict(zip(sheet_names, tables)) kwargs['config'] = {'image files': ['image1.tif', 'image2.tif'], 'image format': 'fei', 'threshold radius': 5e-8} with temporary_file(suffix='.xlsx') as file: io.write_excel(file, **kwargs) tables_in = [pd.read_excel(file, sheet_name=name, index_col=0) for name in sheet_names] config_in_df = pd.read_excel(file, sheet_name='config') config_in = dict(zip(config_in_df['parameters'], config_in_df['values'])) for table, table_in in zip(tables, tables_in): assert list(table.columns) == list(table_in.columns) np.testing.assert_allclose(table_in.values, table.values) for key, val in kwargs['config'].items(): str(val) == str(config_in[key])
Example #25
Source File: test_excel.py From recruit with Apache License 2.0 | 5 votes |
def test_path_path_lib(self, merge_cells, engine, ext): df = tm.makeDataFrame() writer = partial(df.to_excel, engine=engine) reader = partial(pd.read_excel, index_col=0) result = tm.round_trip_pathlib(writer, reader, path="foo.{ext}".format(ext=ext)) tm.assert_frame_equal(result, df)
Example #26
Source File: test_excel.py From recruit with Apache License 2.0 | 5 votes |
def test_path_local_path(self, merge_cells, engine, ext): df = tm.makeDataFrame() writer = partial(df.to_excel, engine=engine) reader = partial(pd.read_excel, index_col=0) result = tm.round_trip_pathlib(writer, reader, path="foo.{ext}".format(ext=ext)) tm.assert_frame_equal(result, df)
Example #27
Source File: test_excel.py From recruit with Apache License 2.0 | 5 votes |
def test_inf_roundtrip(self, *_): frame = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)]) frame.to_excel(self.path, "test1") reader = ExcelFile(self.path) recons = read_excel(reader, "test1", index_col=0) tm.assert_frame_equal(frame, recons)
Example #28
Source File: test_excel.py From recruit with Apache License 2.0 | 5 votes |
def test_bool_types(self, merge_cells, engine, ext, np_type): # Test np.bool values read come back as float. frame = (DataFrame([1, 0, True, False], dtype=np_type)) frame.to_excel(self.path, "test1") reader = ExcelFile(self.path) recons = read_excel(reader, "test1", index_col=0).astype(np_type) tm.assert_frame_equal(frame, recons)
Example #29
Source File: test_excel.py From recruit with Apache License 2.0 | 5 votes |
def test_invalid_columns(self, *_): # see gh-10982 write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2]}) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): write_frame.to_excel(self.path, "test1", columns=["B", "C"]) expected = write_frame.reindex(columns=["B", "C"]) read_frame = read_excel(self.path, "test1", index_col=0) tm.assert_frame_equal(expected, read_frame) with pytest.raises(KeyError): write_frame.to_excel(self.path, "test1", columns=["C", "D"])
Example #30
Source File: plot.py From Eins with MIT License | 5 votes |
def plot(path, oplane, roadid, savepath): fig = plt.figure() ax = fig.add_subplot(1,1,1) data = pd.read_excel(path+'.xlsx', 'SpaceTimeData') data = data[data['LANE_ID'] == oplane] data = data[data['ROAD_HASH_ID'] == roadid] y, x = __get_space_time_data(data) layer = ax.scatter(x, y, c='k', alpha=0.2, s=0.1, marker='x') plt.savefig(savepath, dpi=1000) return layer