Python pandas.read_fwf() Examples
The following are 30
code examples of pandas.read_fwf().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: test_read_fwf.py From vnpy_crypto with MIT License | 9 votes |
def test_fwf_colspecs_None(self): # GH 7079 data = """\ 123456 456789 """ colspecs = [(0, 3), (3, None)] result = read_fwf(StringIO(data), colspecs=colspecs, header=None) expected = DataFrame([[123, 456], [456, 789]]) tm.assert_frame_equal(result, expected) colspecs = [(None, 3), (3, 6)] result = read_fwf(StringIO(data), colspecs=colspecs, header=None) expected = DataFrame([[123, 456], [456, 789]]) tm.assert_frame_equal(result, expected) colspecs = [(0, None), (3, None)] result = read_fwf(StringIO(data), colspecs=colspecs, header=None) expected = DataFrame([[123456, 456], [456789, 789]]) tm.assert_frame_equal(result, expected) colspecs = [(None, None), (3, 6)] result = read_fwf(StringIO(data), colspecs=colspecs, header=None) expected = DataFrame([[123456, 456], [456789, 789]]) tm.assert_frame_equal(result, expected)
Example #2
Source File: helios.py From heliopy with GNU General Public License v3.0 | 7 votes |
def load_local_file(self, interval): # Read in data headings = ['probe', 'year', 'doy', 'hour', 'minute', 'second', 'naverage', 'Bx', 'By', 'Bz', '|B|', 'sigma_Bx', 'sigma_By', 'sigma_Bz'] colspecs = [(1, 2), (2, 4), (4, 7), (7, 9), (9, 11), (11, 13), (13, 15), (15, 22), (22, 29), (29, 36), (36, 42), (42, 48), (48, 54), (54, 60)] data = pd.read_fwf(self.local_path(interval), names=headings, header=None, colspecs=colspecs) # Process data data['year'] += 1900 # Convert date info to datetime data['Time'] = pd.to_datetime(data['year'], format='%Y') + \ pd.to_timedelta(data['doy'] - 1, unit='d') + \ pd.to_timedelta(data['hour'], unit='h') + \ pd.to_timedelta(data['minute'], unit='m') + \ pd.to_timedelta(data['second'], unit='s') data = data.drop(['year', 'doy', 'hour', 'minute', 'second'], axis=1) data = data.set_index('Time', drop=False) return data
Example #3
Source File: marriage.py From DataExploration with MIT License | 6 votes |
def ReadFemResp1995(): """Reads respondent data from NSFG Cycle 5. returns: DataFrame """ dat_file = '1995FemRespData.dat.gz' names = ['cmintvw', 'timesmar', 'cmmarrhx', 'cmbirth', 'finalwgt'] colspecs = [(12360-1, 12363), (4637-1, 4638), (11759-1, 11762), (14-1, 16), (12350-1, 12359)] df = pd.read_fwf(dat_file, compression='gzip', colspecs=colspecs, names=names) df.timesmar.replace([98, 99], np.nan, inplace=True) df['evrmarry'] = (df.timesmar > 0) CleanData(df) return df
Example #4
Source File: test_read_fwf.py From vnpy_crypto with MIT License | 6 votes |
def test_fwf_regression(self): # GH 3594 # turns out 'T060' is parsable as a datetime slice! tzlist = [1, 10, 20, 30, 60, 80, 100] ntz = len(tzlist) tcolspecs = [16] + [8] * ntz tcolnames = ['SST'] + ["T%03d" % z for z in tzlist[1:]] data = """ 2009164202000 9.5403 9.4105 8.6571 7.8372 6.0612 5.8843 5.5192 2009164203000 9.5435 9.2010 8.6167 7.8176 6.0804 5.8728 5.4869 2009164204000 9.5873 9.1326 8.4694 7.5889 6.0422 5.8526 5.4657 2009164205000 9.5810 9.0896 8.4009 7.4652 6.0322 5.8189 5.4379 2009164210000 9.6034 9.0897 8.3822 7.4905 6.0908 5.7904 5.4039 """ df = read_fwf(StringIO(data), index_col=0, header=None, names=tcolnames, widths=tcolspecs, parse_dates=True, date_parser=lambda s: datetime.strptime(s, '%Y%j%H%M%S')) for c in df.columns: res = df.loc[:, c] assert len(res)
Example #5
Source File: test_read_fwf.py From vnpy_crypto with MIT License | 6 votes |
def test_fwf_compression(self): try: import gzip import bz2 except ImportError: pytest.skip("Need gzip and bz2 to run this test") data = """1111111111 2222222222 3333333333""".strip() widths = [5, 5] names = ['one', 'two'] expected = read_fwf(StringIO(data), widths=widths, names=names) if compat.PY3: data = bytes(data, encoding='utf-8') comps = [('gzip', gzip.GzipFile), ('bz2', bz2.BZ2File)] for comp_name, compresser in comps: with tm.ensure_clean() as path: tmp = compresser(path, mode='wb') tmp.write(data) tmp.close() result = read_fwf(path, widths=widths, names=names, compression=comp_name) tm.assert_frame_equal(result, expected)
Example #6
Source File: test_io.py From modin with Apache License 2.0 | 6 votes |
def test_fwf_file_parse_dates(): dates = pandas.date_range("2000", freq="h", periods=10) fwf_data = "col1 col2 col3 col4" for i in range(10, 20): fwf_data = fwf_data + "\n{col1} {col2} {col3} {col4}".format( col1=str(i), col2=str(dates[i - 10].date()), col3=str(i), col4=str(dates[i - 10].time()), ) setup_fwf_file(overwrite=True, fwf_data=fwf_data) pandas_df = pandas.read_fwf(TEST_FWF_FILENAME, parse_dates=[["col2", "col4"]]) modin_df = pd.read_fwf(TEST_FWF_FILENAME, parse_dates=[["col2", "col4"]]) df_equals(modin_df, pandas_df) pandas_df = pandas.read_fwf( TEST_FWF_FILENAME, parse_dates={"time": ["col2", "col4"]} ) modin_df = pd.read_fwf(TEST_FWF_FILENAME, parse_dates={"time": ["col2", "col4"]}) df_equals(modin_df, pandas_df) teardown_fwf_file()
Example #7
Source File: io.py From modin with Apache License 2.0 | 6 votes |
def read_fwf( cls, filepath_or_buffer, colspecs="infer", widths=None, infer_nrows=100, **kwds ): ErrorMessage.default_to_pandas("`read_fwf`") pd_obj = pandas.read_fwf( filepath_or_buffer, colspecs=colspecs, widths=widths, infer_nrows=infer_nrows, **kwds, ) if isinstance(pd_obj, pandas.DataFrame): return cls.from_pandas(pd_obj) if isinstance(pd_obj, pandas.io.parsers.TextFileReader): # Overwriting the read method should return a Modin DataFrame for calls # to __next__ and get_chunk pd_read = pd_obj.read pd_obj.read = lambda *args, **kwargs: cls.from_pandas( pd_read(*args, **kwargs) ) return pd_obj
Example #8
Source File: test_read_fwf.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def test_fwf_colspecs_None(self): # GH 7079 data = """\ 123456 456789 """ colspecs = [(0, 3), (3, None)] result = read_fwf(StringIO(data), colspecs=colspecs, header=None) expected = DataFrame([[123, 456], [456, 789]]) tm.assert_frame_equal(result, expected) colspecs = [(None, 3), (3, 6)] result = read_fwf(StringIO(data), colspecs=colspecs, header=None) expected = DataFrame([[123, 456], [456, 789]]) tm.assert_frame_equal(result, expected) colspecs = [(0, None), (3, None)] result = read_fwf(StringIO(data), colspecs=colspecs, header=None) expected = DataFrame([[123456, 456], [456789, 789]]) tm.assert_frame_equal(result, expected) colspecs = [(None, None), (3, 6)] result = read_fwf(StringIO(data), colspecs=colspecs, header=None) expected = DataFrame([[123456, 456], [456789, 789]]) tm.assert_frame_equal(result, expected)
Example #9
Source File: test_io.py From modin with Apache License 2.0 | 6 votes |
def test_fwf_file_usecols(usecols): fwf_data = """a b c d id8141 360.242940 149.910199 11950.7 id1594 444.953632 166.985655 11788.4 id1849 364.136849 183.628767 11806.2 id1230 413.836124 184.375703 11916.8 id1948 502.953953 173.237159 12468.3""" setup_fwf_file(overwrite=True, fwf_data=fwf_data) pandas_df = pandas.read_fwf(TEST_FWF_FILENAME, usecols=usecols) modin_df = pd.read_fwf(TEST_FWF_FILENAME, usecols=usecols) df_equals(modin_df, pandas_df) teardown_fwf_file()
Example #10
Source File: test_read_fwf.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def test_fwf_compression(self): try: import gzip import bz2 except ImportError: pytest.skip("Need gzip and bz2 to run this test") data = """1111111111 2222222222 3333333333""".strip() widths = [5, 5] names = ['one', 'two'] expected = read_fwf(StringIO(data), widths=widths, names=names) if compat.PY3: data = bytes(data, encoding='utf-8') comps = [('gzip', gzip.GzipFile), ('bz2', bz2.BZ2File)] for comp_name, compresser in comps: with tm.ensure_clean() as path: tmp = compresser(path, mode='wb') tmp.write(data) tmp.close() result = read_fwf(path, widths=widths, names=names, compression=comp_name) tm.assert_frame_equal(result, expected)
Example #11
Source File: test_read_fwf.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_full_file_with_spaces_and_missing(self): # File with spaces and missing values in columns test = """ Account Name Balance CreditLimit AccountCreated 101 10000.00 1/17/1998 312 Gerard Butler 90.00 1000.00 8/6/2003 868 5/25/1985 761 Jada Pinkett-Smith 49654.87 100000.00 12/5/2006 317 Bill Murray 789.65 """.strip('\r\n') colspecs = ((0, 7), (8, 28), (30, 38), (42, 53), (56, 70)) expected = read_fwf(StringIO(test), colspecs=colspecs) tm.assert_frame_equal(expected, read_fwf(StringIO(test)))
Example #12
Source File: test_read_fwf.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_1000_fwf(self): data = """ 1 2,334.0 5 10 13 10. """ expected = np.array([[1, 2334., 5], [10, 13, 10]]) df = read_fwf(StringIO(data), colspecs=[(0, 3), (3, 11), (12, 16)], thousands=',') tm.assert_almost_equal(df.values, expected)
Example #13
Source File: test_read_fwf.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_messed_up_data(self): # Completely messed up file test = """ Account Name Balance Credit Limit Account Created 101 10000.00 1/17/1998 312 Gerard Butler 90.00 1000.00 761 Jada Pinkett-Smith 49654.87 100000.00 12/5/2006 317 Bill Murray 789.65 """.strip('\r\n') colspecs = ((2, 10), (15, 33), (37, 45), (49, 61), (64, 79)) expected = read_fwf(StringIO(test), colspecs=colspecs) tm.assert_frame_equal(expected, read_fwf(StringIO(test)))
Example #14
Source File: test_read_fwf.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_multiple_delimiters(self): test = r""" col1~~~~~col2 col3++++++++++++++++++col4 ~~22.....11.0+++foo~~~~~~~~~~Keanu Reeves 33+++122.33\\\bar.........Gerard Butler ++44~~~~12.01 baz~~Jennifer Love Hewitt ~~55 11+++foo++++Jada Pinkett-Smith ..66++++++.03~~~bar Bill Murray """.strip('\r\n') colspecs = ((0, 4), (7, 13), (15, 19), (21, 41)) expected = read_fwf(StringIO(test), colspecs=colspecs, delimiter=' +~.\\') tm.assert_frame_equal(expected, read_fwf(StringIO(test), delimiter=' +~.\\'))
Example #15
Source File: pfile.py From Ocean-Data-Map-Project with GNU General Public License v3.0 | 5 votes |
def _read_file(self, skip, colspecs, names): self.dataframe = pd.read_fwf( self.filename, skiprows=skip, colspecs=colspecs, header=None, names=names, index_col=False ) if 'pres' in self.dataframe.columns.values: self.dataframe['depth'] = seawater.eos80.dpth( self.dataframe['pres'], self.meta['latitude'])
Example #16
Source File: test_io.py From modin with Apache License 2.0 | 5 votes |
def test_fwf_file_skiprows(): setup_fwf_file(overwrite=True) pandas_df = pandas.read_fwf(TEST_FWF_FILENAME, skiprows=2) modin_df = pd.read_fwf(TEST_FWF_FILENAME, skiprows=2) df_equals(modin_df, pandas_df) pandas_df = pandas.read_fwf(TEST_FWF_FILENAME, usecols=[0, 4, 7], skiprows=[2, 5]) modin_df = pd.read_fwf(TEST_FWF_FILENAME, usecols=[0, 4, 7], skiprows=[2, 5]) df_equals(modin_df, pandas_df)
Example #17
Source File: test_io.py From modin with Apache License 2.0 | 5 votes |
def test_fwf_file_index_col(): fwf_data = """a b c d id8141 360.242940 149.910199 11950.7 id1594 444.953632 166.985655 11788.4 id1849 364.136849 183.628767 11806.2 id1230 413.836124 184.375703 11916.8 id1948 502.953953 173.237159 12468.3""" setup_fwf_file(overwrite=True, fwf_data=fwf_data) pandas_df = pandas.read_fwf(TEST_FWF_FILENAME, index_col="c") modin_df = pd.read_fwf(TEST_FWF_FILENAME, index_col="c") df_equals(modin_df, pandas_df) teardown_fwf_file()
Example #18
Source File: test_io.py From modin with Apache License 2.0 | 5 votes |
def test_fwf_file_skipfooter(): setup_fwf_file(overwrite=True) pandas_df = pandas.read_fwf(TEST_FWF_FILENAME, skipfooter=2) modin_df = pd.read_fwf(TEST_FWF_FILENAME, skipfooter=2) df_equals(modin_df, pandas_df)
Example #19
Source File: datasets.py From Doubly-Stochastic-DGP with Apache License 2.0 | 5 votes |
def download_data(self): url = '{}{}'.format(uci_base, '00316/UCI%20CBM%20Dataset.zip') with urlopen(url) as zipresp: with ZipFile(BytesIO(zipresp.read())) as zfile: zfile.extractall('/tmp/') data = pandas.read_fwf('/tmp/UCI CBM Dataset/data.txt', header=None).values data = data[:, :-1] with open(self.csv_file_path(self.name), 'w') as f: csv.writer(f).writerows(data)
Example #20
Source File: thinkstats2.py From DataExploration with MIT License | 5 votes |
def ReadFixedWidth(self, filename, **options): """Reads a fixed width ASCII file. filename: string filename returns: DataFrame """ df = pandas.read_fwf(filename, colspecs=self.colspecs, names=self.names, **options) return df
Example #21
Source File: test_read_fwf.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_full_file_with_spaces(self): # File with spaces in columns test = """ Account Name Balance CreditLimit AccountCreated 101 Keanu Reeves 9315.45 10000.00 1/17/1998 312 Gerard Butler 90.00 1000.00 8/6/2003 868 Jennifer Love Hewitt 0 17000.00 5/25/1985 761 Jada Pinkett-Smith 49654.87 100000.00 12/5/2006 317 Bill Murray 789.65 5000.00 2/5/2007 """.strip('\r\n') colspecs = ((0, 7), (8, 28), (30, 38), (42, 53), (56, 70)) expected = read_fwf(StringIO(test), colspecs=colspecs) tm.assert_frame_equal(expected, read_fwf(StringIO(test)))
Example #22
Source File: test_read_fwf.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_full_file(self): # File with all values test = """index A B C 2000-01-03T00:00:00 0.980268513777 3 foo 2000-01-04T00:00:00 1.04791624281 -4 bar 2000-01-05T00:00:00 0.498580885705 73 baz 2000-01-06T00:00:00 1.12020151869 1 foo 2000-01-07T00:00:00 0.487094399463 0 bar 2000-01-10T00:00:00 0.836648671666 2 baz 2000-01-11T00:00:00 0.157160753327 34 foo""" colspecs = ((0, 19), (21, 35), (38, 40), (42, 45)) expected = read_fwf(StringIO(test), colspecs=colspecs) tm.assert_frame_equal(expected, read_fwf(StringIO(test)))
Example #23
Source File: test_read_fwf.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_bool_header_arg(self): # see gh-6114 data = """\ MyColumn a b a b""" for arg in [True, False]: with pytest.raises(TypeError): read_fwf(StringIO(data), header=arg)
Example #24
Source File: test_read_fwf.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_skiprows_by_index_inference(self): test = """ To be skipped Not To Be Skipped Once more to be skipped 123 34 8 123 456 78 9 456 """.strip() expected = read_csv(StringIO(test), skiprows=[0, 2], delim_whitespace=True) tm.assert_frame_equal(expected, read_fwf( StringIO(test), skiprows=[0, 2]))
Example #25
Source File: test_read_fwf.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_comment_fwf(self): data = """ 1 2. 4 #hello world 5 NaN 10.0 """ expected = np.array([[1, 2., 4], [5, np.nan, 10.]]) df = read_fwf(StringIO(data), colspecs=[(0, 3), (4, 9), (9, 25)], comment='#') tm.assert_almost_equal(df.values, expected)
Example #26
Source File: test_read_fwf.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_fwf_regression(self): # GH 3594 # turns out 'T060' is parsable as a datetime slice! tzlist = [1, 10, 20, 30, 60, 80, 100] ntz = len(tzlist) tcolspecs = [16] + [8] * ntz tcolnames = ['SST'] + ["T%03d" % z for z in tzlist[1:]] data = """ 2009164202000 9.5403 9.4105 8.6571 7.8372 6.0612 5.8843 5.5192 2009164203000 9.5435 9.2010 8.6167 7.8176 6.0804 5.8728 5.4869 2009164204000 9.5873 9.1326 8.4694 7.5889 6.0422 5.8526 5.4657 2009164205000 9.5810 9.0896 8.4009 7.4652 6.0322 5.8189 5.4379 2009164210000 9.6034 9.0897 8.3822 7.4905 6.0908 5.7904 5.4039 """ df = read_fwf(StringIO(data), index_col=0, header=None, names=tcolnames, widths=tcolspecs, parse_dates=True, date_parser=lambda s: datetime.strptime(s, '%Y%j%H%M%S')) for c in df.columns: res = df.loc[:, c] assert len(res)
Example #27
Source File: test_read_fwf.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_fwf_colspecs_is_list_or_tuple_of_two_element_tuples(self): data = """index,A,B,C,D foo,2,3,4,5 bar,7,8,9,10 baz,12,13,14,15 qux,12,13,14,15 foo2,12,13,14,15 bar2,12,13,14,15 """ with tm.assert_raises_regex(TypeError, 'Each column specification ' 'must be.+'): read_fwf(StringIO(data), [('a', 1)])
Example #28
Source File: test_read_fwf.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_BytesIO_input(self): if not compat.PY3: pytest.skip( "Bytes-related test - only needs to work on Python 3") result = read_fwf(BytesIO("שלום\nשלום".encode('utf8')), widths=[ 2, 2], encoding='utf8') expected = DataFrame([["של", "ום"]], columns=["של", "ום"]) tm.assert_frame_equal(result, expected)
Example #29
Source File: datasets.py From Conditional_Density_Estimation with MIT License | 5 votes |
def get_df(self): if self.needs_download: self.download_dataset() df = pd.read_fwf(self.data_file_path, header=None).dropna() return df
Example #30
Source File: datasets.py From Conditional_Density_Estimation with MIT License | 5 votes |
def get_df(self): if self.needs_download: self.download_dataset() df = pd.read_fwf(self.data_file_path, header=None) return df