Python pandas.io.parsers.read_csv() Examples
The following are 30
code examples of pandas.io.parsers.read_csv().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas.io.parsers
, or try the search function
.
Example #1
Source File: test_excel.py From Computable with MIT License | 7 votes |
def test_parse_cols_int(self): _skip_if_no_openpyxl() _skip_if_no_xlrd() suffix = ['xls', 'xlsx', 'xlsm'] for s in suffix: pth = os.path.join(self.dirpath, 'test.%s' % s) xls = ExcelFile(pth) df = xls.parse('Sheet1', index_col=0, parse_dates=True, parse_cols=3) df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True) df2 = df2.reindex(columns=['A', 'B', 'C']) df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True, parse_cols=3) # TODO add index to xls file) tm.assert_frame_equal(df, df2, check_names=False) tm.assert_frame_equal(df3, df2, check_names=False)
Example #2
Source File: test_stata.py From vnpy_crypto with MIT License | 6 votes |
def test_read_write_reread_dta14(self, file, parsed_114, version): file = getattr(self, file) parsed = self.read_dta(file) parsed.index.name = 'index' expected = self.read_csv(self.csv14) cols = ['byte_', 'int_', 'long_', 'float_', 'double_'] for col in cols: expected[col] = expected[col]._convert(datetime=True, numeric=True) expected['float_'] = expected['float_'].astype(np.float32) expected['date_td'] = pd.to_datetime( expected['date_td'], errors='coerce') tm.assert_frame_equal(parsed_114, parsed) with tm.ensure_clean() as path: parsed_114.to_stata(path, {'date_td': 'td'}, version=version) written_and_read_again = self.read_dta(path) tm.assert_frame_equal( written_and_read_again.set_index('index'), parsed_114)
Example #3
Source File: test_stata.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_read_write_reread_dta14(self, file, parsed_114, version): file = getattr(self, file) parsed = self.read_dta(file) parsed.index.name = 'index' expected = self.read_csv(self.csv14) cols = ['byte_', 'int_', 'long_', 'float_', 'double_'] for col in cols: expected[col] = expected[col]._convert(datetime=True, numeric=True) expected['float_'] = expected['float_'].astype(np.float32) expected['date_td'] = pd.to_datetime( expected['date_td'], errors='coerce') tm.assert_frame_equal(parsed_114, parsed) with tm.ensure_clean() as path: parsed_114.to_stata(path, {'date_td': 'td'}, version=version) written_and_read_again = self.read_dta(path) tm.assert_frame_equal( written_and_read_again.set_index('index'), parsed_114)
Example #4
Source File: test_network.py From recruit with Apache License 2.0 | 6 votes |
def test_read_csv_chunked_download(self, s3_resource, caplog): # 8 MB, S3FS usees 5MB chunks df = DataFrame(np.random.randn(100000, 4), columns=list('abcd')) buf = BytesIO() str_buf = StringIO() df.to_csv(str_buf) buf = BytesIO(str_buf.getvalue().encode('utf-8')) s3_resource.Bucket("pandas-test").put_object( Key="large-file.csv", Body=buf) with caplog.at_level(logging.DEBUG, logger='s3fs.core'): read_csv("s3://pandas-test/large-file.csv", nrows=5) # log of fetch_range (start, stop) assert ((0, 5505024) in {x.args[-2:] for x in caplog.records})
Example #5
Source File: test_read_fwf.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_read_csv_compat(): csv_data = """\ A,B,C,D,E 2011,58,360.242940,149.910199,11950.7 2011,59,444.953632,166.985655,11788.4 2011,60,364.136849,183.628767,11806.2 2011,61,413.836124,184.375703,11916.8 2011,62,502.953953,173.237159,12468.3 """ expected = read_csv(StringIO(csv_data), engine="python") fwf_data = """\ A B C D E 201158 360.242940 149.910199 11950.7 201159 444.953632 166.985655 11788.4 201160 364.136849 183.628767 11806.2 201161 413.836124 184.375703 11916.8 201162 502.953953 173.237159 12468.3 """ colspecs = [(0, 4), (4, 8), (8, 20), (21, 33), (34, 43)] result = read_fwf(StringIO(fwf_data), colspecs=colspecs) tm.assert_frame_equal(result, expected)
Example #6
Source File: test_excel.py From Computable with MIT License | 6 votes |
def test_parse_cols_list(self): _skip_if_no_openpyxl() _skip_if_no_xlrd() suffix = ['xls', 'xlsx', 'xlsm'] for s in suffix: pth = os.path.join(self.dirpath, 'test.%s' % s) xls = ExcelFile(pth) df = xls.parse('Sheet1', index_col=0, parse_dates=True, parse_cols=[0, 2, 3]) df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True) df2 = df2.reindex(columns=['B', 'C']) df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True, parse_cols=[0, 2, 3]) # TODO add index to xls file) tm.assert_frame_equal(df, df2, check_names=False) tm.assert_frame_equal(df3, df2, check_names=False)
Example #7
Source File: test_excel.py From Computable with MIT License | 6 votes |
def check_excel_table_sheet_by_index(self, filename, csvfile): import xlrd pth = os.path.join(self.dirpath, filename) xls = ExcelFile(pth) df = xls.parse(0, index_col=0, parse_dates=True) df2 = self.read_csv(csvfile, index_col=0, parse_dates=True) df3 = xls.parse(1, skiprows=[1], index_col=0, parse_dates=True) tm.assert_frame_equal(df, df2, check_names=False) tm.assert_frame_equal(df3, df2, check_names=False) df4 = xls.parse(0, index_col=0, parse_dates=True, skipfooter=1) df5 = xls.parse(0, index_col=0, parse_dates=True, skip_footer=1) tm.assert_frame_equal(df4, df.ix[:-1]) tm.assert_frame_equal(df4, df5) self.assertRaises(xlrd.XLRDError, xls.parse, 'asdf')
Example #8
Source File: test_network.py From recruit with Apache License 2.0 | 6 votes |
def test_parse_public_s3_bucket_chunked_python(self, tips_df): # Read with a chunksize using the Python parser chunksize = 5 for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df_reader = read_csv('s3://pandas-test/tips.csv' + ext, chunksize=chunksize, compression=comp, engine='python') assert df_reader.chunksize == chunksize for i_chunk in [0, 1, 2]: # Read a couple of chunks and make sure we see them properly. df = df_reader.get_chunk() assert isinstance(df, DataFrame) assert not df.empty true_df = tips_df.iloc[ chunksize * i_chunk: chunksize * (i_chunk + 1)] tm.assert_frame_equal(true_df, df)
Example #9
Source File: test_network.py From recruit with Apache License 2.0 | 6 votes |
def test_parse_public_s3_bucket_chunked(self, tips_df): # Read with a chunksize chunksize = 5 for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df_reader = read_csv('s3://pandas-test/tips.csv' + ext, chunksize=chunksize, compression=comp) assert df_reader.chunksize == chunksize for i_chunk in [0, 1, 2]: # Read a couple of chunks and make sure we see them # properly. df = df_reader.get_chunk() assert isinstance(df, DataFrame) assert not df.empty true_df = tips_df.iloc[ chunksize * i_chunk: chunksize * (i_chunk + 1)] tm.assert_frame_equal(true_df, df)
Example #10
Source File: test_network.py From vnpy_crypto with MIT License | 6 votes |
def test_parse_public_s3_bucket(self, tips_df): pytest.importorskip('s3fs') # more of an integration test due to the not-public contents portion # can probably mock this though. for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, compression=comp) assert isinstance(df, DataFrame) assert not df.empty tm.assert_frame_equal(df, tips_df) # Read public file from bucket with not-public contents df = read_csv('s3://cant_get_it/tips.csv') assert isinstance(df, DataFrame) assert not df.empty tm.assert_frame_equal(df, tips_df)
Example #11
Source File: test_excel.py From Computable with MIT License | 6 votes |
def test_xlsx_table(self): _skip_if_no_xlrd() _skip_if_no_openpyxl() pth = os.path.join(self.dirpath, 'test.xlsx') xlsx = ExcelFile(pth) df = xlsx.parse('Sheet1', index_col=0, parse_dates=True) df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True) df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True) # TODO add index to xlsx file tm.assert_frame_equal(df, df2, check_names=False) tm.assert_frame_equal(df3, df2, check_names=False) df4 = xlsx.parse('Sheet1', index_col=0, parse_dates=True, skipfooter=1) df5 = xlsx.parse('Sheet1', index_col=0, parse_dates=True, skip_footer=1) tm.assert_frame_equal(df4, df.ix[:-1]) tm.assert_frame_equal(df4, df5)
Example #12
Source File: test_network.py From recruit with Apache License 2.0 | 6 votes |
def test_parse_public_s3_bucket(self, tips_df): pytest.importorskip('s3fs') # more of an integration test due to the not-public contents portion # can probably mock this though. for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, compression=comp) assert isinstance(df, DataFrame) assert not df.empty tm.assert_frame_equal(df, tips_df) # Read public file from bucket with not-public contents df = read_csv('s3://cant_get_it/tips.csv') assert isinstance(df, DataFrame) assert not df.empty tm.assert_frame_equal(df, tips_df)
Example #13
Source File: test_network.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_parse_public_s3_bucket(self, tips_df): pytest.importorskip('s3fs') # more of an integration test due to the not-public contents portion # can probably mock this though. for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, compression=comp) assert isinstance(df, DataFrame) assert not df.empty tm.assert_frame_equal(df, tips_df) # Read public file from bucket with not-public contents df = read_csv('s3://cant_get_it/tips.csv') assert isinstance(df, DataFrame) assert not df.empty tm.assert_frame_equal(df, tips_df)
Example #14
Source File: test_network.py From vnpy_crypto with MIT License | 6 votes |
def test_parse_public_s3_bucket_chunked(self, tips_df): # Read with a chunksize chunksize = 5 for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df_reader = read_csv('s3://pandas-test/tips.csv' + ext, chunksize=chunksize, compression=comp) assert df_reader.chunksize == chunksize for i_chunk in [0, 1, 2]: # Read a couple of chunks and make sure we see them # properly. df = df_reader.get_chunk() assert isinstance(df, DataFrame) assert not df.empty true_df = tips_df.iloc[ chunksize * i_chunk: chunksize * (i_chunk + 1)] tm.assert_frame_equal(true_df, df)
Example #15
Source File: test_network.py From vnpy_crypto with MIT License | 6 votes |
def test_parse_public_s3_bucket_chunked_python(self, tips_df): # Read with a chunksize using the Python parser chunksize = 5 for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df_reader = read_csv('s3://pandas-test/tips.csv' + ext, chunksize=chunksize, compression=comp, engine='python') assert df_reader.chunksize == chunksize for i_chunk in [0, 1, 2]: # Read a couple of chunks and make sure we see them properly. df = df_reader.get_chunk() assert isinstance(df, DataFrame) assert not df.empty true_df = tips_df.iloc[ chunksize * i_chunk: chunksize * (i_chunk + 1)] tm.assert_frame_equal(true_df, df)
Example #16
Source File: test_network.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_parse_public_s3_bucket_chunked(self, tips_df): # Read with a chunksize chunksize = 5 for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df_reader = read_csv('s3://pandas-test/tips.csv' + ext, chunksize=chunksize, compression=comp) assert df_reader.chunksize == chunksize for i_chunk in [0, 1, 2]: # Read a couple of chunks and make sure we see them # properly. df = df_reader.get_chunk() assert isinstance(df, DataFrame) assert not df.empty true_df = tips_df.iloc[ chunksize * i_chunk: chunksize * (i_chunk + 1)] tm.assert_frame_equal(true_df, df)
Example #17
Source File: test_read_fwf.py From recruit with Apache License 2.0 | 6 votes |
def test_read_csv_compat(): csv_data = """\ A,B,C,D,E 2011,58,360.242940,149.910199,11950.7 2011,59,444.953632,166.985655,11788.4 2011,60,364.136849,183.628767,11806.2 2011,61,413.836124,184.375703,11916.8 2011,62,502.953953,173.237159,12468.3 """ expected = read_csv(StringIO(csv_data), engine="python") fwf_data = """\ A B C D E 201158 360.242940 149.910199 11950.7 201159 444.953632 166.985655 11788.4 201160 364.136849 183.628767 11806.2 201161 413.836124 184.375703 11916.8 201162 502.953953 173.237159 12468.3 """ colspecs = [(0, 4), (4, 8), (8, 20), (21, 33), (34, 43)] result = read_fwf(StringIO(fwf_data), colspecs=colspecs) tm.assert_frame_equal(result, expected)
Example #18
Source File: test_network.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_parse_public_s3_bucket_chunked_python(self, tips_df): # Read with a chunksize using the Python parser chunksize = 5 for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df_reader = read_csv('s3://pandas-test/tips.csv' + ext, chunksize=chunksize, compression=comp, engine='python') assert df_reader.chunksize == chunksize for i_chunk in [0, 1, 2]: # Read a couple of chunks and make sure we see them properly. df = df_reader.get_chunk() assert isinstance(df, DataFrame) assert not df.empty true_df = tips_df.iloc[ chunksize * i_chunk: chunksize * (i_chunk + 1)] tm.assert_frame_equal(true_df, df)
Example #19
Source File: test_excel.py From vnpy_crypto with MIT License | 6 votes |
def get_csv_refdf(self, basename): """ Obtain the reference data from read_csv with the Python engine. Parameters ---------- basename : str File base name, excluding file extension. Returns ------- dfref : DataFrame """ pref = os.path.join(self.dirpath, basename + '.csv') dfref = read_csv(pref, index_col=0, parse_dates=True, engine='python') return dfref
Example #20
Source File: test_stata.py From recruit with Apache License 2.0 | 6 votes |
def test_read_write_reread_dta14(self, file, parsed_114, version): file = getattr(self, file) parsed = self.read_dta(file) parsed.index.name = 'index' expected = self.read_csv(self.csv14) cols = ['byte_', 'int_', 'long_', 'float_', 'double_'] for col in cols: expected[col] = expected[col]._convert(datetime=True, numeric=True) expected['float_'] = expected['float_'].astype(np.float32) expected['date_td'] = pd.to_datetime( expected['date_td'], errors='coerce') tm.assert_frame_equal(parsed_114, parsed) with tm.ensure_clean() as path: parsed_114.to_stata(path, {'date_td': 'td'}, version=version) written_and_read_again = self.read_dta(path) tm.assert_frame_equal( written_and_read_again.set_index('index'), parsed_114)
Example #21
Source File: test_network.py From vnpy_crypto with MIT License | 6 votes |
def test_read_csv_chunked_download(self, s3_resource, caplog): # 8 MB, S3FS usees 5MB chunks df = DataFrame(np.random.randn(100000, 4), columns=list('abcd')) buf = BytesIO() str_buf = StringIO() df.to_csv(str_buf) buf = BytesIO(str_buf.getvalue().encode('utf-8')) s3_resource.Bucket("pandas-test").put_object( Key="large-file.csv", Body=buf) with caplog.at_level(logging.DEBUG, logger='s3fs.core'): read_csv("s3://pandas-test/large-file.csv", nrows=5) # log of fetch_range (start, stop) assert ((0, 5505024) in set(x.args[-2:] for x in caplog.records))
Example #22
Source File: test_network.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def tips_df(datapath): """DataFrame with the tips dataset.""" return read_csv(datapath('io', 'parser', 'data', 'tips.csv'))
Example #23
Source File: test_stata.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_read_write_reread_dta15(self, file): expected = self.read_csv(self.csv15) expected['byte_'] = expected['byte_'].astype(np.int8) expected['int_'] = expected['int_'].astype(np.int16) expected['long_'] = expected['long_'].astype(np.int32) expected['float_'] = expected['float_'].astype(np.float32) expected['double_'] = expected['double_'].astype(np.float64) expected['date_td'] = expected['date_td'].apply( datetime.strptime, args=('%Y-%m-%d',)) file = getattr(self, file) parsed = self.read_dta(file) tm.assert_frame_equal(expected, parsed)
Example #24
Source File: test_excel.py From Computable with MIT License | 5 votes |
def read_csv(self, *args, **kwds): kwds = kwds.copy() kwds['engine'] = 'python' return read_csv(*args, **kwds)
Example #25
Source File: test_stata.py From Computable with MIT License | 5 votes |
def test_read_dta8(self): expected = read_csv(self.csv8, parse_dates=True, sep='\t') parsed = self.read_dta(self.dta8) tm.assert_frame_equal(parsed, expected)
Example #26
Source File: test_stata.py From Computable with MIT License | 5 votes |
def test_read_dta7(self): expected = read_csv(self.csv7, parse_dates=True, sep='\t') parsed = self.read_dta(self.dta7) tm.assert_frame_equal(parsed, expected)
Example #27
Source File: test_stata.py From Computable with MIT License | 5 votes |
def test_write_dta6(self): skip_if_not_little_endian() original = self.read_csv(self.csv3) original.index.name = 'index' with tm.ensure_clean() as path: original.to_stata(path, None, False) written_and_read_again = self.read_dta(path) tm.assert_frame_equal(written_and_read_again.set_index('index'), original)
Example #28
Source File: test_stata.py From Computable with MIT License | 5 votes |
def test_read_dta3(self): parsed = self.read_dta(self.dta3) parsed_13 = self.read_dta(self.dta3_13) # match stata here expected = self.read_csv(self.csv3) expected = expected.astype(np.float32) expected['year'] = expected['year'].astype(np.int32) expected['quarter'] = expected['quarter'].astype(np.int16) tm.assert_frame_equal(parsed, expected) tm.assert_frame_equal(parsed_13, expected)
Example #29
Source File: test_stata.py From Computable with MIT License | 5 votes |
def read_csv(self, file): return read_csv(file, parse_dates=True)
Example #30
Source File: test_network.py From vnpy_crypto with MIT License | 5 votes |
def test_read_csv_handles_boto_s3_object(self, s3_resource, tips_file): # see gh-16135 s3_object = s3_resource.meta.client.get_object( Bucket='pandas-test', Key='tips.csv') result = read_csv(BytesIO(s3_object["Body"].read()), encoding='utf8') assert isinstance(result, DataFrame) assert not result.empty expected = read_csv(tips_file) tm.assert_frame_equal(result, expected)