Python pandas.compat.BytesIO() Examples

The following are 30 code examples of pandas.compat.BytesIO(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.compat , or try the search function .
Example #1
Source File: test_read_fwf.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_variable_width_unicode():
    if not compat.PY3:
        pytest.skip("Bytes-related test - only needs to work on Python 3")

    data = """
שלום שלום
ום   שלל
של   ום
""".strip("\r\n")
    encoding = "utf8"
    kwargs = dict(header=None, encoding=encoding)

    expected = read_fwf(BytesIO(data.encode(encoding)),
                        colspecs=[(0, 4), (5, 9)], **kwargs)
    result = read_fwf(BytesIO(data.encode(encoding)), **kwargs)
    tm.assert_frame_equal(result, expected) 
Example #2
Source File: sas_xport.py    From recruit with Apache License 2.0 6 votes vote down vote up
def __init__(self, filepath_or_buffer, index=None, encoding='ISO-8859-1',
                 chunksize=None):

        self._encoding = encoding
        self._lines_read = 0
        self._index = index
        self._chunksize = chunksize

        if isinstance(filepath_or_buffer, str):
            (filepath_or_buffer, encoding,
             compression, should_close) = get_filepath_or_buffer(
                filepath_or_buffer, encoding=encoding)

        if isinstance(filepath_or_buffer, (str, compat.text_type, bytes)):
            self.filepath_or_buffer = open(filepath_or_buffer, 'rb')
        else:
            # Copy to BytesIO, and ensure no encoding
            contents = filepath_or_buffer.read()
            try:
                contents = contents.encode(self._encoding)
            except UnicodeEncodeError:
                pass
            self.filepath_or_buffer = compat.BytesIO(contents)

        self._read_header() 
Example #3
Source File: test_network.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_read_csv_chunked_download(self, s3_resource, caplog):
        # 8 MB, S3FS usees 5MB chunks
        df = DataFrame(np.random.randn(100000, 4), columns=list('abcd'))
        buf = BytesIO()
        str_buf = StringIO()

        df.to_csv(str_buf)

        buf = BytesIO(str_buf.getvalue().encode('utf-8'))

        s3_resource.Bucket("pandas-test").put_object(
            Key="large-file.csv",
            Body=buf)

        with caplog.at_level(logging.DEBUG, logger='s3fs.core'):
            read_csv("s3://pandas-test/large-file.csv", nrows=5)
            # log of fetch_range (start, stop)
            assert ((0, 5505024) in {x.args[-2:] for x in caplog.records}) 
Example #4
Source File: test_read_fwf.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_variable_width_unicode():
    if not compat.PY3:
        pytest.skip("Bytes-related test - only needs to work on Python 3")

    data = """
שלום שלום
ום   שלל
של   ום
""".strip("\r\n")
    encoding = "utf8"
    kwargs = dict(header=None, encoding=encoding)

    expected = read_fwf(BytesIO(data.encode(encoding)),
                        colspecs=[(0, 4), (5, 9)], **kwargs)
    result = read_fwf(BytesIO(data.encode(encoding)), **kwargs)
    tm.assert_frame_equal(result, expected) 
Example #5
Source File: test_network.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_read_csv_chunked_download(self, s3_resource, caplog):
        # 8 MB, S3FS usees 5MB chunks
        df = DataFrame(np.random.randn(100000, 4), columns=list('abcd'))
        buf = BytesIO()
        str_buf = StringIO()

        df.to_csv(str_buf)

        buf = BytesIO(str_buf.getvalue().encode('utf-8'))

        s3_resource.Bucket("pandas-test").put_object(
            Key="large-file.csv",
            Body=buf)

        with caplog.at_level(logging.DEBUG, logger='s3fs.core'):
            read_csv("s3://pandas-test/large-file.csv", nrows=5)
            # log of fetch_range (start, stop)
            assert ((0, 5505024) in set(x.args[-2:] for x in caplog.records)) 
Example #6
Source File: multithread.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_multithread_stringio_read_csv(self):
        # see gh-11786
        max_row_range = 10000
        num_files = 100

        bytes_to_df = [
            '\n'.join(
                ['%d,%d,%d' % (i, i, i) for i in range(max_row_range)]
            ).encode() for j in range(num_files)]
        files = [BytesIO(b) for b in bytes_to_df]

        # read all files in many threads
        pool = ThreadPool(8)
        results = pool.map(self.read_csv, files)
        first_result = results[0]

        for result in results:
            tm.assert_frame_equal(first_result, result) 
Example #7
Source File: sas_xport.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def __init__(self, filepath_or_buffer, index=None, encoding='ISO-8859-1',
                 chunksize=None):

        self._encoding = encoding
        self._lines_read = 0
        self._index = index
        self._chunksize = chunksize

        if isinstance(filepath_or_buffer, str):
            (filepath_or_buffer, encoding,
             compression, should_close) = get_filepath_or_buffer(
                filepath_or_buffer, encoding=encoding)

        if isinstance(filepath_or_buffer, (str, compat.text_type, bytes)):
            self.filepath_or_buffer = open(filepath_or_buffer, 'rb')
        else:
            # Copy to BytesIO, and ensure no encoding
            contents = filepath_or_buffer.read()
            try:
                contents = contents.encode(self._encoding)
            except UnicodeEncodeError:
                pass
            self.filepath_or_buffer = compat.BytesIO(contents)

        self._read_header() 
Example #8
Source File: sas_xport.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def __init__(self, filepath_or_buffer, index=None, encoding='ISO-8859-1',
                 chunksize=None):

        self._encoding = encoding
        self._lines_read = 0
        self._index = index
        self._chunksize = chunksize

        if isinstance(filepath_or_buffer, str):
            (filepath_or_buffer, encoding,
             compression, should_close) = get_filepath_or_buffer(
                filepath_or_buffer, encoding=encoding)

        if isinstance(filepath_or_buffer, (str, compat.text_type, bytes)):
            self.filepath_or_buffer = open(filepath_or_buffer, 'rb')
        else:
            # Copy to BytesIO, and ensure no encoding
            contents = filepath_or_buffer.read()
            try:
                contents = contents.encode(self._encoding)
            except:
                pass
            self.filepath_or_buffer = compat.BytesIO(contents)

        self._read_header() 
Example #9
Source File: test_network.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_read_csv_chunked_download(self, s3_resource, caplog):
        # 8 MB, S3FS usees 5MB chunks
        df = DataFrame(np.random.randn(100000, 4), columns=list('abcd'))
        buf = BytesIO()
        str_buf = StringIO()

        df.to_csv(str_buf)

        buf = BytesIO(str_buf.getvalue().encode('utf-8'))

        s3_resource.Bucket("pandas-test").put_object(
            Key="large-file.csv",
            Body=buf)

        with caplog.at_level(logging.DEBUG, logger='s3fs.core'):
            read_csv("s3://pandas-test/large-file.csv", nrows=5)
            # log of fetch_range (start, stop)
            assert ((0, 5505024) in {x.args[-2:] for x in caplog.records}) 
Example #10
Source File: sas_xport.py    From Splunking-Crime with GNU Affero General Public License v3.0 6 votes vote down vote up
def __init__(self, filepath_or_buffer, index=None, encoding='ISO-8859-1',
                 chunksize=None):

        self._encoding = encoding
        self._lines_read = 0
        self._index = index
        self._chunksize = chunksize

        if isinstance(filepath_or_buffer, str):
            filepath_or_buffer, encoding, compression = get_filepath_or_buffer(
                filepath_or_buffer, encoding=encoding)

        if isinstance(filepath_or_buffer, (str, compat.text_type, bytes)):
            self.filepath_or_buffer = open(filepath_or_buffer, 'rb')
        else:
            # Copy to BytesIO, and ensure no encoding
            contents = filepath_or_buffer.read()
            try:
                contents = contents.encode(self._encoding)
            except:
                pass
            self.filepath_or_buffer = compat.BytesIO(contents)

        self._read_header() 
Example #11
Source File: python_parser_only.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_sniff_delimiter(self):
        text = """index|A|B|C
foo|1|2|3
bar|4|5|6
baz|7|8|9
"""
        data = self.read_csv(StringIO(text), index_col=0, sep=None)
        tm.assert_index_equal(data.index,
                              Index(['foo', 'bar', 'baz'], name='index'))

        data2 = self.read_csv(StringIO(text), index_col=0, delimiter='|')
        tm.assert_frame_equal(data, data2)

        text = """ignore this
ignore this too
index|A|B|C
foo|1|2|3
bar|4|5|6
baz|7|8|9
"""
        data3 = self.read_csv(StringIO(text), index_col=0,
                              sep=None, skiprows=2)
        tm.assert_frame_equal(data, data3)

        text = u("""ignore this
ignore this too
index|A|B|C
foo|1|2|3
bar|4|5|6
baz|7|8|9
""").encode('utf-8')

        s = BytesIO(text)
        if compat.PY3:
            # somewhat False since the code never sees bytes
            from io import TextIOWrapper
            s = TextIOWrapper(s, encoding='utf-8')

        data4 = self.read_csv(s, index_col=0, sep=None, skiprows=2,
                              encoding='utf-8')
        tm.assert_frame_equal(data, data4) 
Example #12
Source File: pickle.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def _pickle_array(arr):
    arr = arr.view(np.ndarray)

    buf = BytesIO()
    write_array(buf, arr)

    return buf.getvalue() 
Example #13
Source File: test_pack.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def testArraySize(self, sizes=[0, 5, 50, 1000]):
        bio = compat.BytesIO()
        packer = Packer()
        for size in sizes:
            bio.write(packer.pack_array_header(size))
            for i in range(size):
                bio.write(packer.pack(i))

        bio.seek(0)
        unpacker = Unpacker(bio, use_list=1)
        for size in sizes:
            assert unpacker.unpack() == list(range(size)) 
Example #14
Source File: test_read_fwf.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_bytes_io_input():
    if not compat.PY3:
        pytest.skip("Bytes-related test - only needs to work on Python 3")

    result = read_fwf(BytesIO("שלום\nשלום".encode('utf8')),
                      widths=[2, 2], encoding="utf8")
    expected = DataFrame([["של", "ום"]], columns=["של", "ום"])
    tm.assert_frame_equal(result, expected) 
Example #15
Source File: pickle.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def _unpickle_array(bytes):
    arr = read_array(BytesIO(bytes))

    # All datetimes should be stored as M8[ns].  When unpickling with
    # numpy1.6, it will read these as M8[us].  So this ensures all
    # datetime64 types are read as MS[ns]
    if is_datetime64_dtype(arr):
        arr = arr.view(_NS_DTYPE)

    return arr 
Example #16
Source File: pickle.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def _pickle_array(arr):
    arr = arr.view(np.ndarray)

    buf = BytesIO()
    write_array(buf, arr)

    return buf.getvalue() 
Example #17
Source File: test_excel.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_bytes_io(self, merge_cells, engine, ext):
        # see gh-7074
        bio = BytesIO()
        df = DataFrame(np.random.randn(10, 2))

        # Pass engine explicitly, as there is no file path to infer from.
        writer = ExcelWriter(bio, engine=engine)
        df.to_excel(writer)
        writer.save()

        bio.seek(0)
        reread_df = read_excel(bio, index_col=0)
        tm.assert_frame_equal(df, reread_df) 
Example #18
Source File: test_sequnpack.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_readbytes(self):
        unpacker = Unpacker(read_size=3)
        unpacker.feed(b'foobar')
        assert unpacker.unpack() == ord(b'f')
        assert unpacker.read_bytes(3) == b'oob'
        assert unpacker.unpack() == ord(b'a')
        assert unpacker.unpack() == ord(b'r')

        # Test buffer refill
        unpacker = Unpacker(compat.BytesIO(b'foobar'), read_size=3)
        assert unpacker.unpack() == ord(b'f')
        assert unpacker.read_bytes(3) == b'oob'
        assert unpacker.unpack() == ord(b'a')
        assert unpacker.unpack() == ord(b'r') 
Example #19
Source File: test_pack.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def testMapSize(self, sizes=[0, 5, 50, 1000]):
        bio = compat.BytesIO()
        packer = Packer()
        for size in sizes:
            bio.write(packer.pack_map_header(size))
            for i in range(size):
                bio.write(packer.pack(i))  # key
                bio.write(packer.pack(i * 2))  # value

        bio.seek(0)
        unpacker = Unpacker(bio)
        for size in sizes:
            assert unpacker.unpack() == {i: i * 2 for i in range(size)} 
Example #20
Source File: packers.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def __iter__(self):

        needs_closing = True
        try:

            # see if we have an actual file
            if isinstance(self.path, compat.string_types):

                try:
                    path_exists = os.path.exists(self.path)
                except TypeError:
                    path_exists = False

                if path_exists:
                    fh = open(self.path, 'rb')
                else:
                    fh = compat.BytesIO(self.path)

            else:

                if not hasattr(self.path, 'read'):
                    fh = compat.BytesIO(self.path)

                else:

                    # a file-like
                    needs_closing = False
                    fh = self.path

            unpacker = unpack(fh)
            for o in unpacker:
                yield o
        finally:
            if needs_closing:
                fh.close() 
Example #21
Source File: test_common.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_bytes_io_input(all_parsers):
    if compat.PY2:
        pytest.skip("Bytes-related test does not need to work on Python 2.x")

    encoding = "cp1255"
    parser = all_parsers

    data = BytesIO("שלום:1234\n562:123".encode(encoding))
    result = parser.read_csv(data, sep=":", encoding=encoding)

    expected = DataFrame([[562, 123]], columns=["שלום", "1234"])
    tm.assert_frame_equal(result, expected) 
Example #22
Source File: packers.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def __iter__(self):

        needs_closing = True
        try:

            # see if we have an actual file
            if isinstance(self.path, compat.string_types):

                try:
                    path_exists = os.path.exists(self.path)
                except TypeError:
                    path_exists = False

                if path_exists:
                    fh = open(self.path, 'rb')
                else:
                    fh = compat.BytesIO(self.path)

            else:

                if not hasattr(self.path, 'read'):
                    fh = compat.BytesIO(self.path)

                else:

                    # a file-like
                    needs_closing = False
                    fh = self.path

            unpacker = unpack(fh)
            for o in unpacker:
                yield o
        finally:
            if needs_closing:
                fh.close() 
Example #23
Source File: common.py    From Computable with MIT License 5 votes vote down vote up
def _pickle_array(arr):
    arr = arr.view(np.ndarray)

    buf = BytesIO()
    write_array(buf, arr)

    return buf.getvalue() 
Example #24
Source File: test_network.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_read_csv_handles_boto_s3_object(self,
                                             s3_resource,
                                             tips_file):
        # see gh-16135

        s3_object = s3_resource.meta.client.get_object(
            Bucket='pandas-test',
            Key='tips.csv')

        result = read_csv(BytesIO(s3_object["Body"].read()), encoding='utf8')
        assert isinstance(result, DataFrame)
        assert not result.empty

        expected = read_csv(tips_file)
        tm.assert_frame_equal(result, expected) 
Example #25
Source File: test_c_parser_only.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_buffer_rd_bytes_bad_unicode(c_parser_only):
    # see gh-22748
    parser = c_parser_only
    t = BytesIO(b"\xB0")

    if PY3:
        msg = "'utf-8' codec can't encode character"
        t = TextIOWrapper(t, encoding="ascii", errors="surrogateescape")
    else:
        msg = "'utf8' codec can't decode byte"

    with pytest.raises(UnicodeError, match=msg):
        parser.read_csv(t, encoding="UTF-8") 
Example #26
Source File: test_read_fwf.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_bytes_io_input():
    if not compat.PY3:
        pytest.skip("Bytes-related test - only needs to work on Python 3")

    result = read_fwf(BytesIO("שלום\nשלום".encode('utf8')),
                      widths=[2, 2], encoding="utf8")
    expected = DataFrame([["של", "ום"]], columns=["של", "ום"])
    tm.assert_frame_equal(result, expected) 
Example #27
Source File: test_packers.py    From Computable with MIT License 5 votes vote down vote up
def test_string_io(self):

        df = DataFrame(np.random.randn(10,2))
        s = df.to_msgpack(None)
        result = read_msgpack(s)
        tm.assert_frame_equal(result,df)

        s = df.to_msgpack()
        result = read_msgpack(s)
        tm.assert_frame_equal(result,df)

        s = df.to_msgpack()
        result = read_msgpack(compat.BytesIO(s))
        tm.assert_frame_equal(result,df)

        s = to_msgpack(None,df)
        result = read_msgpack(s)
        tm.assert_frame_equal(result, df)

        with ensure_clean(self.path) as p:

            s = df.to_msgpack()
            fh = open(p,'wb')
            fh.write(s)
            fh.close()
            result = read_msgpack(p)
            tm.assert_frame_equal(result, df) 
Example #28
Source File: packers.py    From Computable with MIT License 5 votes vote down vote up
def __iter__(self):

        needs_closing = True
        try:

            # see if we have an actual file
            if isinstance(self.path, compat.string_types):

                try:
                    path_exists = os.path.exists(self.path)
                except TypeError:
                    path_exists = False

                if path_exists:
                    fh = open(self.path, 'rb')
                else:
                    fh = compat.BytesIO(self.path)

            else:

                if not hasattr(self.path, 'read'):
                    fh = compat.BytesIO(self.path)

                else:

                    # a file-like
                    needs_closing = False
                    fh = self.path

            unpacker = unpack(fh)
            for o in unpacker:
                yield o
        finally:
            if needs_closing:
                fh.close() 
Example #29
Source File: packers.py    From Computable with MIT License 5 votes vote down vote up
def to_msgpack(path_or_buf, *args, **kwargs):
    """
    msgpack (serialize) object to input file path

    THIS IS AN EXPERIMENTAL LIBRARY and the storage format
    may not be stable until a future release.

    Parameters
    ----------
    path_or_buf : string File path, buffer-like, or None
                  if None, return generated string
    args : an object or objects to serialize
    append : boolean whether to append to an existing msgpack
             (default is False)
    compress : type of compressor (zlib or blosc), default to None (no
               compression)
    """
    global compressor
    compressor = kwargs.pop('compress', None)
    append = kwargs.pop('append', None)
    if append:
        mode = 'a+b'
    else:
        mode = 'wb'

    def writer(fh):
        for a in args:
            fh.write(pack(a, **kwargs))

    if isinstance(path_or_buf, compat.string_types):
        with open(path_or_buf, mode) as fh:
            writer(fh)
    elif path_or_buf is None:
        buf = compat.BytesIO()
        writer(buf)
        return buf.getvalue()
    else:
        writer(path_or_buf) 
Example #30
Source File: common.py    From Computable with MIT License 5 votes vote down vote up
def _unpickle_array(bytes):
    arr = read_array(BytesIO(bytes))

    # All datetimes should be stored as M8[ns].  When unpickling with
    # numpy1.6, it will read these as M8[us].  So this ensures all
    # datetime64 types are read as MS[ns]
    if is_datetime64_dtype(arr):
        arr = arr.view(_NS_DTYPE)

    return arr