Python Examples of pandas.compat.BytesIO

Source File: test_read_fwf.py From recruit with Apache License 2.0

6 votes

def test_variable_width_unicode():
    if not compat.PY3:
        pytest.skip("Bytes-related test - only needs to work on Python 3")

    data = """
שלום שלום
ום   שלל
של   ום
""".strip("\r\n")
    encoding = "utf8"
    kwargs = dict(header=None, encoding=encoding)

    expected = read_fwf(BytesIO(data.encode(encoding)),
                        colspecs=[(0, 4), (5, 9)], **kwargs)
    result = read_fwf(BytesIO(data.encode(encoding)), **kwargs)
    tm.assert_frame_equal(result, expected)

Source File: sas_xport.py From recruit with Apache License 2.0

6 votes

def __init__(self, filepath_or_buffer, index=None, encoding='ISO-8859-1',
                 chunksize=None):

        self._encoding = encoding
        self._lines_read = 0
        self._index = index
        self._chunksize = chunksize

        if isinstance(filepath_or_buffer, str):
            (filepath_or_buffer, encoding,
             compression, should_close) = get_filepath_or_buffer(
                filepath_or_buffer, encoding=encoding)

        if isinstance(filepath_or_buffer, (str, compat.text_type, bytes)):
            self.filepath_or_buffer = open(filepath_or_buffer, 'rb')
        else:
            # Copy to BytesIO, and ensure no encoding
            contents = filepath_or_buffer.read()
            try:
                contents = contents.encode(self._encoding)
            except UnicodeEncodeError:
                pass
            self.filepath_or_buffer = compat.BytesIO(contents)

        self._read_header()

Source File: test_network.py From predictive-maintenance-using-machine-learning with Apache License 2.0

6 votes

def test_read_csv_chunked_download(self, s3_resource, caplog):
        # 8 MB, S3FS usees 5MB chunks
        df = DataFrame(np.random.randn(100000, 4), columns=list('abcd'))
        buf = BytesIO()
        str_buf = StringIO()

        df.to_csv(str_buf)

        buf = BytesIO(str_buf.getvalue().encode('utf-8'))

        s3_resource.Bucket("pandas-test").put_object(
            Key="large-file.csv",
            Body=buf)

        with caplog.at_level(logging.DEBUG, logger='s3fs.core'):
            read_csv("s3://pandas-test/large-file.csv", nrows=5)
            # log of fetch_range (start, stop)
            assert ((0, 5505024) in {x.args[-2:] for x in caplog.records})

Source File: test_read_fwf.py From predictive-maintenance-using-machine-learning with Apache License 2.0

6 votes

def test_variable_width_unicode():
    if not compat.PY3:
        pytest.skip("Bytes-related test - only needs to work on Python 3")

    data = """
שלום שלום
ום   שלל
של   ום
""".strip("\r\n")
    encoding = "utf8"
    kwargs = dict(header=None, encoding=encoding)

    expected = read_fwf(BytesIO(data.encode(encoding)),
                        colspecs=[(0, 4), (5, 9)], **kwargs)
    result = read_fwf(BytesIO(data.encode(encoding)), **kwargs)
    tm.assert_frame_equal(result, expected)

Source File: test_network.py From vnpy_crypto with MIT License

6 votes

def test_read_csv_chunked_download(self, s3_resource, caplog):
        # 8 MB, S3FS usees 5MB chunks
        df = DataFrame(np.random.randn(100000, 4), columns=list('abcd'))
        buf = BytesIO()
        str_buf = StringIO()

        df.to_csv(str_buf)

        buf = BytesIO(str_buf.getvalue().encode('utf-8'))

        s3_resource.Bucket("pandas-test").put_object(
            Key="large-file.csv",
            Body=buf)

        with caplog.at_level(logging.DEBUG, logger='s3fs.core'):
            read_csv("s3://pandas-test/large-file.csv", nrows=5)
            # log of fetch_range (start, stop)
            assert ((0, 5505024) in set(x.args[-2:] for x in caplog.records))

Source File: multithread.py From vnpy_crypto with MIT License

6 votes

def test_multithread_stringio_read_csv(self):
        # see gh-11786
        max_row_range = 10000
        num_files = 100

        bytes_to_df = [
            '\n'.join(
                ['%d,%d,%d' % (i, i, i) for i in range(max_row_range)]
            ).encode() for j in range(num_files)]
        files = [BytesIO(b) for b in bytes_to_df]

        # read all files in many threads
        pool = ThreadPool(8)
        results = pool.map(self.read_csv, files)
        first_result = results[0]

        for result in results:
            tm.assert_frame_equal(first_result, result)

Source File: sas_xport.py From predictive-maintenance-using-machine-learning with Apache License 2.0

6 votes

def __init__(self, filepath_or_buffer, index=None, encoding='ISO-8859-1',
                 chunksize=None):

        self._encoding = encoding
        self._lines_read = 0
        self._index = index
        self._chunksize = chunksize

        if isinstance(filepath_or_buffer, str):
            (filepath_or_buffer, encoding,
             compression, should_close) = get_filepath_or_buffer(
                filepath_or_buffer, encoding=encoding)

        if isinstance(filepath_or_buffer, (str, compat.text_type, bytes)):
            self.filepath_or_buffer = open(filepath_or_buffer, 'rb')
        else:
            # Copy to BytesIO, and ensure no encoding
            contents = filepath_or_buffer.read()
            try:
                contents = contents.encode(self._encoding)
            except UnicodeEncodeError:
                pass
            self.filepath_or_buffer = compat.BytesIO(contents)

        self._read_header()

Source File: sas_xport.py From vnpy_crypto with MIT License

6 votes

def __init__(self, filepath_or_buffer, index=None, encoding='ISO-8859-1',
                 chunksize=None):

        self._encoding = encoding
        self._lines_read = 0
        self._index = index
        self._chunksize = chunksize

        if isinstance(filepath_or_buffer, str):
            (filepath_or_buffer, encoding,
             compression, should_close) = get_filepath_or_buffer(
                filepath_or_buffer, encoding=encoding)

        if isinstance(filepath_or_buffer, (str, compat.text_type, bytes)):
            self.filepath_or_buffer = open(filepath_or_buffer, 'rb')
        else:
            # Copy to BytesIO, and ensure no encoding
            contents = filepath_or_buffer.read()
            try:
                contents = contents.encode(self._encoding)
            except:
                pass
            self.filepath_or_buffer = compat.BytesIO(contents)

        self._read_header()

Source File: test_network.py From recruit with Apache License 2.0

6 votes

def test_read_csv_chunked_download(self, s3_resource, caplog):
        # 8 MB, S3FS usees 5MB chunks
        df = DataFrame(np.random.randn(100000, 4), columns=list('abcd'))
        buf = BytesIO()
        str_buf = StringIO()

        df.to_csv(str_buf)

        buf = BytesIO(str_buf.getvalue().encode('utf-8'))

        s3_resource.Bucket("pandas-test").put_object(
            Key="large-file.csv",
            Body=buf)

        with caplog.at_level(logging.DEBUG, logger='s3fs.core'):
            read_csv("s3://pandas-test/large-file.csv", nrows=5)
            # log of fetch_range (start, stop)
            assert ((0, 5505024) in {x.args[-2:] for x in caplog.records})

Source File: sas_xport.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def __init__(self, filepath_or_buffer, index=None, encoding='ISO-8859-1',
                 chunksize=None):

        self._encoding = encoding
        self._lines_read = 0
        self._index = index
        self._chunksize = chunksize

        if isinstance(filepath_or_buffer, str):
            filepath_or_buffer, encoding, compression = get_filepath_or_buffer(
                filepath_or_buffer, encoding=encoding)

        if isinstance(filepath_or_buffer, (str, compat.text_type, bytes)):
            self.filepath_or_buffer = open(filepath_or_buffer, 'rb')
        else:
            # Copy to BytesIO, and ensure no encoding
            contents = filepath_or_buffer.read()
            try:
                contents = contents.encode(self._encoding)
            except:
                pass
            self.filepath_or_buffer = compat.BytesIO(contents)

        self._read_header()

Source File: python_parser_only.py From elasticintel with GNU General Public License v3.0

5 votes

def test_sniff_delimiter(self):
        text = """index|A|B|C
foo|1|2|3
bar|4|5|6
baz|7|8|9
"""
        data = self.read_csv(StringIO(text), index_col=0, sep=None)
        tm.assert_index_equal(data.index,
                              Index(['foo', 'bar', 'baz'], name='index'))

        data2 = self.read_csv(StringIO(text), index_col=0, delimiter='|')
        tm.assert_frame_equal(data, data2)

        text = """ignore this
ignore this too
index|A|B|C
foo|1|2|3
bar|4|5|6
baz|7|8|9
"""
        data3 = self.read_csv(StringIO(text), index_col=0,
                              sep=None, skiprows=2)
        tm.assert_frame_equal(data, data3)

        text = u("""ignore this
ignore this too
index|A|B|C
foo|1|2|3
bar|4|5|6
baz|7|8|9
""").encode('utf-8')

        s = BytesIO(text)
        if compat.PY3:
            # somewhat False since the code never sees bytes
            from io import TextIOWrapper
            s = TextIOWrapper(s, encoding='utf-8')

        data4 = self.read_csv(s, index_col=0, sep=None, skiprows=2,
                              encoding='utf-8')
        tm.assert_frame_equal(data, data4)

Source File: pickle.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def _pickle_array(arr):
    arr = arr.view(np.ndarray)

    buf = BytesIO()
    write_array(buf, arr)

    return buf.getvalue()

Source File: test_pack.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def testArraySize(self, sizes=[0, 5, 50, 1000]):
        bio = compat.BytesIO()
        packer = Packer()
        for size in sizes:
            bio.write(packer.pack_array_header(size))
            for i in range(size):
                bio.write(packer.pack(i))

        bio.seek(0)
        unpacker = Unpacker(bio, use_list=1)
        for size in sizes:
            assert unpacker.unpack() == list(range(size))

Source File: test_read_fwf.py From recruit with Apache License 2.0

5 votes

def test_bytes_io_input():
    if not compat.PY3:
        pytest.skip("Bytes-related test - only needs to work on Python 3")

    result = read_fwf(BytesIO("שלום\nשלום".encode('utf8')),
                      widths=[2, 2], encoding="utf8")
    expected = DataFrame([["של", "ום"]], columns=["של", "ום"])
    tm.assert_frame_equal(result, expected)

Source File: pickle.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def _unpickle_array(bytes):
    arr = read_array(BytesIO(bytes))

    # All datetimes should be stored as M8[ns].  When unpickling with
    # numpy1.6, it will read these as M8[us].  So this ensures all
    # datetime64 types are read as MS[ns]
    if is_datetime64_dtype(arr):
        arr = arr.view(_NS_DTYPE)

    return arr

Source File: pickle.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def _pickle_array(arr):
    arr = arr.view(np.ndarray)

    buf = BytesIO()
    write_array(buf, arr)

    return buf.getvalue()

Source File: test_excel.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_bytes_io(self, merge_cells, engine, ext):
        # see gh-7074
        bio = BytesIO()
        df = DataFrame(np.random.randn(10, 2))

        # Pass engine explicitly, as there is no file path to infer from.
        writer = ExcelWriter(bio, engine=engine)
        df.to_excel(writer)
        writer.save()

        bio.seek(0)
        reread_df = read_excel(bio, index_col=0)
        tm.assert_frame_equal(df, reread_df)

Source File: test_sequnpack.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_readbytes(self):
        unpacker = Unpacker(read_size=3)
        unpacker.feed(b'foobar')
        assert unpacker.unpack() == ord(b'f')
        assert unpacker.read_bytes(3) == b'oob'
        assert unpacker.unpack() == ord(b'a')
        assert unpacker.unpack() == ord(b'r')

        # Test buffer refill
        unpacker = Unpacker(compat.BytesIO(b'foobar'), read_size=3)
        assert unpacker.unpack() == ord(b'f')
        assert unpacker.read_bytes(3) == b'oob'
        assert unpacker.unpack() == ord(b'a')
        assert unpacker.unpack() == ord(b'r')

Source File: test_pack.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def testMapSize(self, sizes=[0, 5, 50, 1000]):
        bio = compat.BytesIO()
        packer = Packer()
        for size in sizes:
            bio.write(packer.pack_map_header(size))
            for i in range(size):
                bio.write(packer.pack(i))  # key
                bio.write(packer.pack(i * 2))  # value

        bio.seek(0)
        unpacker = Unpacker(bio)
        for size in sizes:
            assert unpacker.unpack() == {i: i * 2 for i in range(size)}

Source File: packers.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def __iter__(self):

        needs_closing = True
        try:

            # see if we have an actual file
            if isinstance(self.path, compat.string_types):

                try:
                    path_exists = os.path.exists(self.path)
                except TypeError:
                    path_exists = False

                if path_exists:
                    fh = open(self.path, 'rb')
                else:
                    fh = compat.BytesIO(self.path)

            else:

                if not hasattr(self.path, 'read'):
                    fh = compat.BytesIO(self.path)

                else:

                    # a file-like
                    needs_closing = False
                    fh = self.path

            unpacker = unpack(fh)
            for o in unpacker:
                yield o
        finally:
            if needs_closing:
                fh.close()

Source File: test_common.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_bytes_io_input(all_parsers):
    if compat.PY2:
        pytest.skip("Bytes-related test does not need to work on Python 2.x")

    encoding = "cp1255"
    parser = all_parsers

    data = BytesIO("שלום:1234\n562:123".encode(encoding))
    result = parser.read_csv(data, sep=":", encoding=encoding)

    expected = DataFrame([[562, 123]], columns=["שלום", "1234"])
    tm.assert_frame_equal(result, expected)

Source File: packers.py From vnpy_crypto with MIT License

5 votes

def __iter__(self):

        needs_closing = True
        try:

            # see if we have an actual file
            if isinstance(self.path, compat.string_types):

                try:
                    path_exists = os.path.exists(self.path)
                except TypeError:
                    path_exists = False

                if path_exists:
                    fh = open(self.path, 'rb')
                else:
                    fh = compat.BytesIO(self.path)

            else:

                if not hasattr(self.path, 'read'):
                    fh = compat.BytesIO(self.path)

                else:

                    # a file-like
                    needs_closing = False
                    fh = self.path

            unpacker = unpack(fh)
            for o in unpacker:
                yield o
        finally:
            if needs_closing:
                fh.close()

Source File: common.py From Computable with MIT License

5 votes

def _pickle_array(arr):
    arr = arr.view(np.ndarray)

    buf = BytesIO()
    write_array(buf, arr)

    return buf.getvalue()

Source File: test_network.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_read_csv_handles_boto_s3_object(self,
                                             s3_resource,
                                             tips_file):
        # see gh-16135

        s3_object = s3_resource.meta.client.get_object(
            Bucket='pandas-test',
            Key='tips.csv')

        result = read_csv(BytesIO(s3_object["Body"].read()), encoding='utf8')
        assert isinstance(result, DataFrame)
        assert not result.empty

        expected = read_csv(tips_file)
        tm.assert_frame_equal(result, expected)

Source File: test_c_parser_only.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_buffer_rd_bytes_bad_unicode(c_parser_only):
    # see gh-22748
    parser = c_parser_only
    t = BytesIO(b"\xB0")

    if PY3:
        msg = "'utf-8' codec can't encode character"
        t = TextIOWrapper(t, encoding="ascii", errors="surrogateescape")
    else:
        msg = "'utf8' codec can't decode byte"

    with pytest.raises(UnicodeError, match=msg):
        parser.read_csv(t, encoding="UTF-8")

Source File: test_read_fwf.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_bytes_io_input():
    if not compat.PY3:
        pytest.skip("Bytes-related test - only needs to work on Python 3")

    result = read_fwf(BytesIO("שלום\nשלום".encode('utf8')),
                      widths=[2, 2], encoding="utf8")
    expected = DataFrame([["של", "ום"]], columns=["של", "ום"])
    tm.assert_frame_equal(result, expected)

Source File: test_packers.py From Computable with MIT License

5 votes

def test_string_io(self):

        df = DataFrame(np.random.randn(10,2))
        s = df.to_msgpack(None)
        result = read_msgpack(s)
        tm.assert_frame_equal(result,df)

        s = df.to_msgpack()
        result = read_msgpack(s)
        tm.assert_frame_equal(result,df)

        s = df.to_msgpack()
        result = read_msgpack(compat.BytesIO(s))
        tm.assert_frame_equal(result,df)

        s = to_msgpack(None,df)
        result = read_msgpack(s)
        tm.assert_frame_equal(result, df)

        with ensure_clean(self.path) as p:

            s = df.to_msgpack()
            fh = open(p,'wb')
            fh.write(s)
            fh.close()
            result = read_msgpack(p)
            tm.assert_frame_equal(result, df)

Source File: packers.py From Computable with MIT License

5 votes

def __iter__(self):

        needs_closing = True
        try:

            # see if we have an actual file
            if isinstance(self.path, compat.string_types):

                try:
                    path_exists = os.path.exists(self.path)
                except TypeError:
                    path_exists = False

                if path_exists:
                    fh = open(self.path, 'rb')
                else:
                    fh = compat.BytesIO(self.path)

            else:

                if not hasattr(self.path, 'read'):
                    fh = compat.BytesIO(self.path)

                else:

                    # a file-like
                    needs_closing = False
                    fh = self.path

            unpacker = unpack(fh)
            for o in unpacker:
                yield o
        finally:
            if needs_closing:
                fh.close()

Source File: packers.py From Computable with MIT License

5 votes

def to_msgpack(path_or_buf, *args, **kwargs):
    """
    msgpack (serialize) object to input file path

    THIS IS AN EXPERIMENTAL LIBRARY and the storage format
    may not be stable until a future release.

    Parameters
    ----------
    path_or_buf : string File path, buffer-like, or None
                  if None, return generated string
    args : an object or objects to serialize
    append : boolean whether to append to an existing msgpack
             (default is False)
    compress : type of compressor (zlib or blosc), default to None (no
               compression)
    """
    global compressor
    compressor = kwargs.pop('compress', None)
    append = kwargs.pop('append', None)
    if append:
        mode = 'a+b'
    else:
        mode = 'wb'

    def writer(fh):
        for a in args:
            fh.write(pack(a, **kwargs))

    if isinstance(path_or_buf, compat.string_types):
        with open(path_or_buf, mode) as fh:
            writer(fh)
    elif path_or_buf is None:
        buf = compat.BytesIO()
        writer(buf)
        return buf.getvalue()
    else:
        writer(path_or_buf)

Source File: common.py From Computable with MIT License

5 votes

def _unpickle_array(bytes):
    arr = read_array(BytesIO(bytes))

    # All datetimes should be stored as M8[ns].  When unpickling with
    # numpy1.6, it will read these as M8[us].  So this ensures all
    # datetime64 types are read as MS[ns]
    if is_datetime64_dtype(arr):
        arr = arr.view(_NS_DTYPE)

    return arr

Python pandas.compat.BytesIO() Examples