Python pandas.read_sas() Examples
The following are 30
code examples of pandas.read_sas().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: io.py From modin with Apache License 2.0 | 8 votes |
def read_sas( cls, filepath_or_buffer, format=None, index=None, encoding=None, chunksize=None, iterator=False, ): # pragma: no cover ErrorMessage.default_to_pandas("`read_sas`") return cls.from_pandas( pandas.read_sas( filepath_or_buffer, format=format, index=index, encoding=encoding, chunksize=chunksize, iterator=iterator, ) )
Example #2
Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 8 votes |
def test_encoding_options(datapath): fname = datapath("io", "sas", "data", "test1.sas7bdat") df1 = pd.read_sas(fname) df2 = pd.read_sas(fname, encoding='utf-8') for col in df1.columns: try: df1[col] = df1[col].str.decode('utf-8') except AttributeError: pass tm.assert_frame_equal(df1, df2) from pandas.io.sas.sas7bdat import SAS7BDATReader rdr = SAS7BDATReader(fname, convert_header_text=False) df3 = rdr.read() rdr.close() for x, y in zip(df1.columns, df3.columns): assert(x == y.decode())
Example #3
Source File: format.py From dataiku-contrib with Apache License 2.0 | 6 votes |
def get_format_extractor(self, stream, schema=None): return SASFormatExtractor(stream, schema, self.config) # Fix for the stream class provided by DSS # Seek could be disabled by a one-liner like the following one but read_sas may seek forward # self.stream.seek = types.MethodType(lambda self, _: False, self.stream)
Example #4
Source File: test_sas7bdat.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def test_encoding_options(): dirpath = tm.get_data_path() fname = os.path.join(dirpath, "test1.sas7bdat") df1 = pd.read_sas(fname) df2 = pd.read_sas(fname, encoding='utf-8') for col in df1.columns: try: df1[col] = df1[col].str.decode('utf-8') except AttributeError: pass tm.assert_frame_equal(df1, df2) from pandas.io.sas.sas7bdat import SAS7BDATReader rdr = SAS7BDATReader(fname, convert_header_text=False) df3 = rdr.read() rdr.close() for x, y in zip(df1.columns, df3.columns): assert(x == y.decode())
Example #5
Source File: test_sas7bdat.py From recruit with Apache License 2.0 | 6 votes |
def test_encoding_options(datapath): fname = datapath("io", "sas", "data", "test1.sas7bdat") df1 = pd.read_sas(fname) df2 = pd.read_sas(fname, encoding='utf-8') for col in df1.columns: try: df1[col] = df1[col].str.decode('utf-8') except AttributeError: pass tm.assert_frame_equal(df1, df2) from pandas.io.sas.sas7bdat import SAS7BDATReader rdr = SAS7BDATReader(fname, convert_header_text=False) df3 = rdr.read() rdr.close() for x, y in zip(df1.columns, df3.columns): assert(x == y.decode())
Example #6
Source File: test_sas7bdat.py From vnpy_crypto with MIT License | 6 votes |
def test_encoding_options(datapath): fname = datapath("io", "sas", "data", "test1.sas7bdat") df1 = pd.read_sas(fname) df2 = pd.read_sas(fname, encoding='utf-8') for col in df1.columns: try: df1[col] = df1[col].str.decode('utf-8') except AttributeError: pass tm.assert_frame_equal(df1, df2) from pandas.io.sas.sas7bdat import SAS7BDATReader rdr = SAS7BDATReader(fname, convert_header_text=False) df3 = rdr.read() rdr.close() for x, y in zip(df1.columns, df3.columns): assert(x == y.decode())
Example #7
Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_zero_variables(datapath): # Check if the SAS file has zero variables (PR #18184) fname = datapath("io", "sas", "data", "zero_variables.sas7bdat") with pytest.raises(EmptyDataError): pd.read_sas(fname)
Example #8
Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_from_iterator(self): for j in 0, 1: df0 = self.data[j] for k in self.test_ix[j]: fname = os.path.join( self.dirpath, "test{k}.sas7bdat".format(k=k)) rdr = pd.read_sas(fname, iterator=True, encoding='utf-8') df = rdr.read(2) tm.assert_frame_equal(df, df0.iloc[0:2, :]) df = rdr.read(3) tm.assert_frame_equal(df, df0.iloc[2:5, :]) rdr.close()
Example #9
Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_path_pathlib(self): from pathlib import Path for j in 0, 1: df0 = self.data[j] for k in self.test_ix[j]: fname = Path(os.path.join( self.dirpath, "test{k}.sas7bdat".format(k=k))) df = pd.read_sas(fname, encoding='utf-8') tm.assert_frame_equal(df, df0)
Example #10
Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_path_localpath(self): from py.path import local as LocalPath for j in 0, 1: df0 = self.data[j] for k in self.test_ix[j]: fname = LocalPath(os.path.join( self.dirpath, "test{k}.sas7bdat".format(k=k))) df = pd.read_sas(fname, encoding='utf-8') tm.assert_frame_equal(df, df0)
Example #11
Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_iterator_read_too_much(self): # github #14734 k = self.test_ix[0][0] fname = os.path.join(self.dirpath, "test{k}.sas7bdat".format(k=k)) rdr = pd.read_sas(fname, format="sas7bdat", iterator=True, encoding='utf-8') d1 = rdr.read(rdr.row_count + 20) rdr.close() rdr = pd.read_sas(fname, iterator=True, encoding="utf-8") d2 = rdr.read(rdr.row_count + 20) tm.assert_frame_equal(d1, d2) rdr.close()
Example #12
Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_productsales(datapath): fname = datapath("io", "sas", "data", "productsales.sas7bdat") df = pd.read_sas(fname, encoding='utf-8') fname = datapath("io", "sas", "data", "productsales.csv") df0 = pd.read_csv(fname, parse_dates=['MONTH']) vn = ["ACTUAL", "PREDICT", "QUARTER", "YEAR"] df0[vn] = df0[vn].astype(np.float64) tm.assert_frame_equal(df, df0)
Example #13
Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_12659(datapath): fname = datapath("io", "sas", "data", "test_12659.sas7bdat") df = pd.read_sas(fname) fname = datapath("io", "sas", "data", "test_12659.csv") df0 = pd.read_csv(fname) df0 = df0.astype(np.float64) tm.assert_frame_equal(df, df0)
Example #14
Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_airline(datapath): fname = datapath("io", "sas", "data", "airline.sas7bdat") df = pd.read_sas(fname) fname = datapath("io", "sas", "data", "airline.csv") df0 = pd.read_csv(fname) df0 = df0.astype(np.float64) tm.assert_frame_equal(df, df0, check_exact=False)
Example #15
Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_compact_numerical_values(datapath): # Regression test for #21616 fname = datapath("io", "sas", "data", "cars.sas7bdat") df = pd.read_sas(fname, encoding='latin-1') # The two columns CYL and WGT in cars.sas7bdat have column # width < 8 and only contain integral values. # Test that pandas doesn't corrupt the numbers by adding # decimals. result = df['WGT'] expected = df['WGT'].round() tm.assert_series_equal(result, expected, check_exact=True) result = df['CYL'] expected = df['CYL'].round() tm.assert_series_equal(result, expected, check_exact=True)
Example #16
Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_many_columns(datapath): # Test for looking for column information in more places (PR #22628) fname = datapath("io", "sas", "data", "many_columns.sas7bdat") df = pd.read_sas(fname, encoding='latin-1') fname = datapath("io", "sas", "data", "many_columns.csv") df0 = pd.read_csv(fname, encoding='latin-1') tm.assert_frame_equal(df, df0)
Example #17
Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_inconsistent_number_of_rows(datapath): # Regression test for issue #16615. (PR #22628) fname = datapath("io", "sas", "data", "load_log.sas7bdat") df = pd.read_sas(fname, encoding='latin-1') assert len(df) == 2097
Example #18
Source File: test_sas7bdat.py From vnpy_crypto with MIT License | 5 votes |
def test_zero_variables(datapath): # Check if the SAS file has zero variables (PR #18184) fname = datapath("io", "sas", "data", "zero_variables.sas7bdat") with pytest.raises(EmptyDataError): pd.read_sas(fname)
Example #19
Source File: test_sas7bdat.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_from_file(self): for j in 0, 1: df0 = self.data[j] for k in self.test_ix[j]: fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k) df = pd.read_sas(fname, encoding='utf-8') tm.assert_frame_equal(df, df0)
Example #20
Source File: test_sas7bdat.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_from_buffer(self): for j in 0, 1: df0 = self.data[j] for k in self.test_ix[j]: fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k) with open(fname, 'rb') as f: byts = f.read() buf = io.BytesIO(byts) rdr = pd.read_sas(buf, format="sas7bdat", iterator=True, encoding='utf-8') df = rdr.read() tm.assert_frame_equal(df, df0, check_exact=False) rdr.close()
Example #21
Source File: test_sas7bdat.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_from_iterator(self): for j in 0, 1: df0 = self.data[j] for k in self.test_ix[j]: fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k) rdr = pd.read_sas(fname, iterator=True, encoding='utf-8') df = rdr.read(2) tm.assert_frame_equal(df, df0.iloc[0:2, :]) df = rdr.read(3) tm.assert_frame_equal(df, df0.iloc[2:5, :]) rdr.close()
Example #22
Source File: test_sas7bdat.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_path_pathlib(self): tm._skip_if_no_pathlib() from pathlib import Path for j in 0, 1: df0 = self.data[j] for k in self.test_ix[j]: fname = Path(os.path.join(self.dirpath, "test%d.sas7bdat" % k)) df = pd.read_sas(fname, encoding='utf-8') tm.assert_frame_equal(df, df0)
Example #23
Source File: test_sas7bdat.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_path_localpath(self): tm._skip_if_no_localpath() from py.path import local as LocalPath for j in 0, 1: df0 = self.data[j] for k in self.test_ix[j]: fname = LocalPath(os.path.join(self.dirpath, "test%d.sas7bdat" % k)) df = pd.read_sas(fname, encoding='utf-8') tm.assert_frame_equal(df, df0)
Example #24
Source File: test_sas7bdat.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_iterator_read_too_much(self): # github #14734 k = self.test_ix[0][0] fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k) rdr = pd.read_sas(fname, format="sas7bdat", iterator=True, encoding='utf-8') d1 = rdr.read(rdr.row_count + 20) rdr.close() rdr = pd.read_sas(fname, iterator=True, encoding="utf-8") d2 = rdr.read(rdr.row_count + 20) tm.assert_frame_equal(d1, d2) rdr.close()
Example #25
Source File: test_sas7bdat.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_productsales(): dirpath = tm.get_data_path() fname = os.path.join(dirpath, "productsales.sas7bdat") df = pd.read_sas(fname, encoding='utf-8') fname = os.path.join(dirpath, "productsales.csv") df0 = pd.read_csv(fname, parse_dates=['MONTH']) vn = ["ACTUAL", "PREDICT", "QUARTER", "YEAR"] df0[vn] = df0[vn].astype(np.float64) tm.assert_frame_equal(df, df0)
Example #26
Source File: test_sas7bdat.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_12659(): dirpath = tm.get_data_path() fname = os.path.join(dirpath, "test_12659.sas7bdat") df = pd.read_sas(fname) fname = os.path.join(dirpath, "test_12659.csv") df0 = pd.read_csv(fname) df0 = df0.astype(np.float64) tm.assert_frame_equal(df, df0)
Example #27
Source File: test_sas7bdat.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_airline(): dirpath = tm.get_data_path() fname = os.path.join(dirpath, "airline.sas7bdat") df = pd.read_sas(fname) fname = os.path.join(dirpath, "airline.csv") df0 = pd.read_csv(fname) df0 = df0.astype(np.float64) tm.assert_frame_equal(df, df0, check_exact=False)
Example #28
Source File: test_io.py From modin with Apache License 2.0 | 5 votes |
def test_from_sas(): pandas_df = pandas.read_sas(TEST_SAS_FILENAME) modin_df = pd.read_sas(TEST_SAS_FILENAME) df_equals(modin_df, pandas_df)
Example #29
Source File: test_sas7bdat.py From recruit with Apache License 2.0 | 5 votes |
def test_inconsistent_number_of_rows(datapath): # Regression test for issue #16615. (PR #22628) fname = datapath("io", "sas", "data", "load_log.sas7bdat") df = pd.read_sas(fname, encoding='latin-1') assert len(df) == 2097
Example #30
Source File: format.py From dataiku-contrib with Apache License 2.0 | 5 votes |
def __init__(self, stream, schema, config): FormatExtractor.__init__(self, stream) chunksize = int(config.get("chunksize", "10000")) sas_format = config.get("sas_format", "sas7bdat") encoding = config.get("encoding", "latin_1") dump_to_file = config.get("dump_to_file", False) self.hasSchema = schema != None read_from = ForwardSeekStream(stream) if dump_to_file: dirname, _ = os.path.split(os.path.abspath(__file__)) fullpath = os.path.join(dirname, 'dumped-%s.sas7bdat' % (time.time())) with open(fullpath, 'w+') as of: # Reading 500kb data everytime for data in iter((lambda:stream.read(500000)), b''): of.write(data) read_from = fullpath self.iterator = pd.read_sas(read_from, format=sas_format, iterator=True, encoding=encoding, chunksize=chunksize) self.get_chunk()