Python Examples of pandas.read

Source File: io.py From modin with Apache License 2.0

8 votes

def read_sas(
        cls,
        filepath_or_buffer,
        format=None,
        index=None,
        encoding=None,
        chunksize=None,
        iterator=False,
    ):  # pragma: no cover
        ErrorMessage.default_to_pandas("`read_sas`")
        return cls.from_pandas(
            pandas.read_sas(
                filepath_or_buffer,
                format=format,
                index=index,
                encoding=encoding,
                chunksize=chunksize,
                iterator=iterator,
            )
        )

Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0

8 votes

def test_encoding_options(datapath):
    fname = datapath("io", "sas", "data", "test1.sas7bdat")
    df1 = pd.read_sas(fname)
    df2 = pd.read_sas(fname, encoding='utf-8')
    for col in df1.columns:
        try:
            df1[col] = df1[col].str.decode('utf-8')
        except AttributeError:
            pass
    tm.assert_frame_equal(df1, df2)

    from pandas.io.sas.sas7bdat import SAS7BDATReader
    rdr = SAS7BDATReader(fname, convert_header_text=False)
    df3 = rdr.read()
    rdr.close()
    for x, y in zip(df1.columns, df3.columns):
        assert(x == y.decode())

Source File: format.py From dataiku-contrib with Apache License 2.0

6 votes

def get_format_extractor(self, stream, schema=None):
        return SASFormatExtractor(stream, schema, self.config)    


# Fix for the stream class provided by DSS
# Seek could be disabled by a one-liner like the following one but read_sas may seek forward
# self.stream.seek = types.MethodType(lambda self, _: False, self.stream)

Source File: test_sas7bdat.py From elasticintel with GNU General Public License v3.0

6 votes

def test_encoding_options():
    dirpath = tm.get_data_path()
    fname = os.path.join(dirpath, "test1.sas7bdat")
    df1 = pd.read_sas(fname)
    df2 = pd.read_sas(fname, encoding='utf-8')
    for col in df1.columns:
        try:
            df1[col] = df1[col].str.decode('utf-8')
        except AttributeError:
            pass
    tm.assert_frame_equal(df1, df2)

    from pandas.io.sas.sas7bdat import SAS7BDATReader
    rdr = SAS7BDATReader(fname, convert_header_text=False)
    df3 = rdr.read()
    rdr.close()
    for x, y in zip(df1.columns, df3.columns):
        assert(x == y.decode())

Source File: test_sas7bdat.py From recruit with Apache License 2.0

6 votes

def test_encoding_options(datapath):
    fname = datapath("io", "sas", "data", "test1.sas7bdat")
    df1 = pd.read_sas(fname)
    df2 = pd.read_sas(fname, encoding='utf-8')
    for col in df1.columns:
        try:
            df1[col] = df1[col].str.decode('utf-8')
        except AttributeError:
            pass
    tm.assert_frame_equal(df1, df2)

    from pandas.io.sas.sas7bdat import SAS7BDATReader
    rdr = SAS7BDATReader(fname, convert_header_text=False)
    df3 = rdr.read()
    rdr.close()
    for x, y in zip(df1.columns, df3.columns):
        assert(x == y.decode())

Source File: test_sas7bdat.py From vnpy_crypto with MIT License

6 votes

def test_encoding_options(datapath):
    fname = datapath("io", "sas", "data", "test1.sas7bdat")
    df1 = pd.read_sas(fname)
    df2 = pd.read_sas(fname, encoding='utf-8')
    for col in df1.columns:
        try:
            df1[col] = df1[col].str.decode('utf-8')
        except AttributeError:
            pass
    tm.assert_frame_equal(df1, df2)

    from pandas.io.sas.sas7bdat import SAS7BDATReader
    rdr = SAS7BDATReader(fname, convert_header_text=False)
    df3 = rdr.read()
    rdr.close()
    for x, y in zip(df1.columns, df3.columns):
        assert(x == y.decode())

Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_zero_variables(datapath):
    # Check if the SAS file has zero variables (PR #18184)
    fname = datapath("io", "sas", "data", "zero_variables.sas7bdat")
    with pytest.raises(EmptyDataError):
        pd.read_sas(fname)

Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_from_iterator(self):
        for j in 0, 1:
            df0 = self.data[j]
            for k in self.test_ix[j]:
                fname = os.path.join(
                    self.dirpath, "test{k}.sas7bdat".format(k=k))
                rdr = pd.read_sas(fname, iterator=True, encoding='utf-8')
                df = rdr.read(2)
                tm.assert_frame_equal(df, df0.iloc[0:2, :])
                df = rdr.read(3)
                tm.assert_frame_equal(df, df0.iloc[2:5, :])
                rdr.close()

Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_path_pathlib(self):
        from pathlib import Path
        for j in 0, 1:
            df0 = self.data[j]
            for k in self.test_ix[j]:
                fname = Path(os.path.join(
                    self.dirpath, "test{k}.sas7bdat".format(k=k)))
                df = pd.read_sas(fname, encoding='utf-8')
                tm.assert_frame_equal(df, df0)

Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_path_localpath(self):
        from py.path import local as LocalPath
        for j in 0, 1:
            df0 = self.data[j]
            for k in self.test_ix[j]:
                fname = LocalPath(os.path.join(
                    self.dirpath, "test{k}.sas7bdat".format(k=k)))
                df = pd.read_sas(fname, encoding='utf-8')
                tm.assert_frame_equal(df, df0)

Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_iterator_read_too_much(self):
        # github #14734
        k = self.test_ix[0][0]
        fname = os.path.join(self.dirpath, "test{k}.sas7bdat".format(k=k))
        rdr = pd.read_sas(fname, format="sas7bdat",
                          iterator=True, encoding='utf-8')
        d1 = rdr.read(rdr.row_count + 20)
        rdr.close()

        rdr = pd.read_sas(fname, iterator=True, encoding="utf-8")
        d2 = rdr.read(rdr.row_count + 20)
        tm.assert_frame_equal(d1, d2)
        rdr.close()

Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_productsales(datapath):
    fname = datapath("io", "sas", "data", "productsales.sas7bdat")
    df = pd.read_sas(fname, encoding='utf-8')
    fname = datapath("io", "sas", "data", "productsales.csv")
    df0 = pd.read_csv(fname, parse_dates=['MONTH'])
    vn = ["ACTUAL", "PREDICT", "QUARTER", "YEAR"]
    df0[vn] = df0[vn].astype(np.float64)
    tm.assert_frame_equal(df, df0)

Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_12659(datapath):
    fname = datapath("io", "sas", "data", "test_12659.sas7bdat")
    df = pd.read_sas(fname)
    fname = datapath("io", "sas", "data", "test_12659.csv")
    df0 = pd.read_csv(fname)
    df0 = df0.astype(np.float64)
    tm.assert_frame_equal(df, df0)

Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_airline(datapath):
    fname = datapath("io", "sas", "data", "airline.sas7bdat")
    df = pd.read_sas(fname)
    fname = datapath("io", "sas", "data", "airline.csv")
    df0 = pd.read_csv(fname)
    df0 = df0.astype(np.float64)
    tm.assert_frame_equal(df, df0, check_exact=False)

Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_compact_numerical_values(datapath):
    # Regression test for #21616
    fname = datapath("io", "sas", "data", "cars.sas7bdat")
    df = pd.read_sas(fname, encoding='latin-1')
    # The two columns CYL and WGT in cars.sas7bdat have column
    # width < 8 and only contain integral values.
    # Test that pandas doesn't corrupt the numbers by adding
    # decimals.
    result = df['WGT']
    expected = df['WGT'].round()
    tm.assert_series_equal(result, expected, check_exact=True)
    result = df['CYL']
    expected = df['CYL'].round()
    tm.assert_series_equal(result, expected, check_exact=True)

Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_many_columns(datapath):
    # Test for looking for column information in more places (PR #22628)
    fname = datapath("io", "sas", "data", "many_columns.sas7bdat")
    df = pd.read_sas(fname, encoding='latin-1')
    fname = datapath("io", "sas", "data", "many_columns.csv")
    df0 = pd.read_csv(fname, encoding='latin-1')
    tm.assert_frame_equal(df, df0)

Source File: test_sas7bdat.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_inconsistent_number_of_rows(datapath):
    # Regression test for issue #16615. (PR #22628)
    fname = datapath("io", "sas", "data", "load_log.sas7bdat")
    df = pd.read_sas(fname, encoding='latin-1')
    assert len(df) == 2097

Source File: test_sas7bdat.py From vnpy_crypto with MIT License

5 votes

def test_zero_variables(datapath):
    # Check if the SAS file has zero variables (PR #18184)
    fname = datapath("io", "sas", "data", "zero_variables.sas7bdat")
    with pytest.raises(EmptyDataError):
        pd.read_sas(fname)

Source File: test_sas7bdat.py From elasticintel with GNU General Public License v3.0

5 votes

def test_from_file(self):
        for j in 0, 1:
            df0 = self.data[j]
            for k in self.test_ix[j]:
                fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
                df = pd.read_sas(fname, encoding='utf-8')
                tm.assert_frame_equal(df, df0)

Source File: test_sas7bdat.py From elasticintel with GNU General Public License v3.0

5 votes

def test_from_buffer(self):
        for j in 0, 1:
            df0 = self.data[j]
            for k in self.test_ix[j]:
                fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
                with open(fname, 'rb') as f:
                    byts = f.read()
                buf = io.BytesIO(byts)
                rdr = pd.read_sas(buf, format="sas7bdat",
                                  iterator=True, encoding='utf-8')
                df = rdr.read()
                tm.assert_frame_equal(df, df0, check_exact=False)
                rdr.close()

Source File: test_sas7bdat.py From elasticintel with GNU General Public License v3.0

5 votes

def test_from_iterator(self):
        for j in 0, 1:
            df0 = self.data[j]
            for k in self.test_ix[j]:
                fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
                rdr = pd.read_sas(fname, iterator=True, encoding='utf-8')
                df = rdr.read(2)
                tm.assert_frame_equal(df, df0.iloc[0:2, :])
                df = rdr.read(3)
                tm.assert_frame_equal(df, df0.iloc[2:5, :])
                rdr.close()

Source File: test_sas7bdat.py From elasticintel with GNU General Public License v3.0

5 votes

def test_path_pathlib(self):
        tm._skip_if_no_pathlib()
        from pathlib import Path
        for j in 0, 1:
            df0 = self.data[j]
            for k in self.test_ix[j]:
                fname = Path(os.path.join(self.dirpath, "test%d.sas7bdat" % k))
                df = pd.read_sas(fname, encoding='utf-8')
                tm.assert_frame_equal(df, df0)

Source File: test_sas7bdat.py From elasticintel with GNU General Public License v3.0

5 votes

def test_path_localpath(self):
        tm._skip_if_no_localpath()
        from py.path import local as LocalPath
        for j in 0, 1:
            df0 = self.data[j]
            for k in self.test_ix[j]:
                fname = LocalPath(os.path.join(self.dirpath,
                                               "test%d.sas7bdat" % k))
                df = pd.read_sas(fname, encoding='utf-8')
                tm.assert_frame_equal(df, df0)

Source File: test_sas7bdat.py From elasticintel with GNU General Public License v3.0

5 votes

def test_iterator_read_too_much(self):
        # github #14734
        k = self.test_ix[0][0]
        fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
        rdr = pd.read_sas(fname, format="sas7bdat",
                          iterator=True, encoding='utf-8')
        d1 = rdr.read(rdr.row_count + 20)
        rdr.close()

        rdr = pd.read_sas(fname, iterator=True, encoding="utf-8")
        d2 = rdr.read(rdr.row_count + 20)
        tm.assert_frame_equal(d1, d2)
        rdr.close()

Source File: test_sas7bdat.py From elasticintel with GNU General Public License v3.0

5 votes

def test_productsales():
    dirpath = tm.get_data_path()
    fname = os.path.join(dirpath, "productsales.sas7bdat")
    df = pd.read_sas(fname, encoding='utf-8')
    fname = os.path.join(dirpath, "productsales.csv")
    df0 = pd.read_csv(fname, parse_dates=['MONTH'])
    vn = ["ACTUAL", "PREDICT", "QUARTER", "YEAR"]
    df0[vn] = df0[vn].astype(np.float64)
    tm.assert_frame_equal(df, df0)

Source File: test_sas7bdat.py From elasticintel with GNU General Public License v3.0

5 votes

def test_12659():
    dirpath = tm.get_data_path()
    fname = os.path.join(dirpath, "test_12659.sas7bdat")
    df = pd.read_sas(fname)
    fname = os.path.join(dirpath, "test_12659.csv")
    df0 = pd.read_csv(fname)
    df0 = df0.astype(np.float64)
    tm.assert_frame_equal(df, df0)

Source File: test_sas7bdat.py From elasticintel with GNU General Public License v3.0

5 votes

def test_airline():
    dirpath = tm.get_data_path()
    fname = os.path.join(dirpath, "airline.sas7bdat")
    df = pd.read_sas(fname)
    fname = os.path.join(dirpath, "airline.csv")
    df0 = pd.read_csv(fname)
    df0 = df0.astype(np.float64)
    tm.assert_frame_equal(df, df0, check_exact=False)

Source File: test_io.py From modin with Apache License 2.0

5 votes

def test_from_sas():
    pandas_df = pandas.read_sas(TEST_SAS_FILENAME)
    modin_df = pd.read_sas(TEST_SAS_FILENAME)

    df_equals(modin_df, pandas_df)

Source File: test_sas7bdat.py From recruit with Apache License 2.0

5 votes

def test_inconsistent_number_of_rows(datapath):
    # Regression test for issue #16615. (PR #22628)
    fname = datapath("io", "sas", "data", "load_log.sas7bdat")
    df = pd.read_sas(fname, encoding='latin-1')
    assert len(df) == 2097

Source File: format.py From dataiku-contrib with Apache License 2.0

5 votes

def __init__(self, stream, schema, config):
        FormatExtractor.__init__(self, stream)
        
        chunksize = int(config.get("chunksize", "10000"))
        sas_format = config.get("sas_format", "sas7bdat")
        encoding = config.get("encoding", "latin_1")
        dump_to_file = config.get("dump_to_file", False)

        self.hasSchema = schema != None

        read_from = ForwardSeekStream(stream)

        if dump_to_file:
            dirname, _ = os.path.split(os.path.abspath(__file__))
            fullpath = os.path.join(dirname, 'dumped-%s.sas7bdat' % (time.time()))
            with open(fullpath, 'w+') as of:
                # Reading 500kb data everytime
                for data in iter((lambda:stream.read(500000)), b''):
                    of.write(data)
                
            read_from = fullpath

        self.iterator = pd.read_sas(read_from,
                                    format=sas_format,
                                    iterator=True,
                                    encoding=encoding,
                                    chunksize=chunksize)

        self.get_chunk()

Python pandas.read_sas() Examples