Python pandas.compat.BytesIO() Examples
The following are 30
code examples of pandas.compat.BytesIO().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas.compat
, or try the search function
.
Example #1
Source File: test_read_fwf.py From recruit with Apache License 2.0 | 6 votes |
def test_variable_width_unicode(): if not compat.PY3: pytest.skip("Bytes-related test - only needs to work on Python 3") data = """ שלום שלום ום שלל של ום """.strip("\r\n") encoding = "utf8" kwargs = dict(header=None, encoding=encoding) expected = read_fwf(BytesIO(data.encode(encoding)), colspecs=[(0, 4), (5, 9)], **kwargs) result = read_fwf(BytesIO(data.encode(encoding)), **kwargs) tm.assert_frame_equal(result, expected)
Example #2
Source File: sas_xport.py From recruit with Apache License 2.0 | 6 votes |
def __init__(self, filepath_or_buffer, index=None, encoding='ISO-8859-1', chunksize=None): self._encoding = encoding self._lines_read = 0 self._index = index self._chunksize = chunksize if isinstance(filepath_or_buffer, str): (filepath_or_buffer, encoding, compression, should_close) = get_filepath_or_buffer( filepath_or_buffer, encoding=encoding) if isinstance(filepath_or_buffer, (str, compat.text_type, bytes)): self.filepath_or_buffer = open(filepath_or_buffer, 'rb') else: # Copy to BytesIO, and ensure no encoding contents = filepath_or_buffer.read() try: contents = contents.encode(self._encoding) except UnicodeEncodeError: pass self.filepath_or_buffer = compat.BytesIO(contents) self._read_header()
Example #3
Source File: test_network.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_read_csv_chunked_download(self, s3_resource, caplog): # 8 MB, S3FS usees 5MB chunks df = DataFrame(np.random.randn(100000, 4), columns=list('abcd')) buf = BytesIO() str_buf = StringIO() df.to_csv(str_buf) buf = BytesIO(str_buf.getvalue().encode('utf-8')) s3_resource.Bucket("pandas-test").put_object( Key="large-file.csv", Body=buf) with caplog.at_level(logging.DEBUG, logger='s3fs.core'): read_csv("s3://pandas-test/large-file.csv", nrows=5) # log of fetch_range (start, stop) assert ((0, 5505024) in {x.args[-2:] for x in caplog.records})
Example #4
Source File: test_read_fwf.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_variable_width_unicode(): if not compat.PY3: pytest.skip("Bytes-related test - only needs to work on Python 3") data = """ שלום שלום ום שלל של ום """.strip("\r\n") encoding = "utf8" kwargs = dict(header=None, encoding=encoding) expected = read_fwf(BytesIO(data.encode(encoding)), colspecs=[(0, 4), (5, 9)], **kwargs) result = read_fwf(BytesIO(data.encode(encoding)), **kwargs) tm.assert_frame_equal(result, expected)
Example #5
Source File: test_network.py From vnpy_crypto with MIT License | 6 votes |
def test_read_csv_chunked_download(self, s3_resource, caplog): # 8 MB, S3FS usees 5MB chunks df = DataFrame(np.random.randn(100000, 4), columns=list('abcd')) buf = BytesIO() str_buf = StringIO() df.to_csv(str_buf) buf = BytesIO(str_buf.getvalue().encode('utf-8')) s3_resource.Bucket("pandas-test").put_object( Key="large-file.csv", Body=buf) with caplog.at_level(logging.DEBUG, logger='s3fs.core'): read_csv("s3://pandas-test/large-file.csv", nrows=5) # log of fetch_range (start, stop) assert ((0, 5505024) in set(x.args[-2:] for x in caplog.records))
Example #6
Source File: multithread.py From vnpy_crypto with MIT License | 6 votes |
def test_multithread_stringio_read_csv(self): # see gh-11786 max_row_range = 10000 num_files = 100 bytes_to_df = [ '\n'.join( ['%d,%d,%d' % (i, i, i) for i in range(max_row_range)] ).encode() for j in range(num_files)] files = [BytesIO(b) for b in bytes_to_df] # read all files in many threads pool = ThreadPool(8) results = pool.map(self.read_csv, files) first_result = results[0] for result in results: tm.assert_frame_equal(first_result, result)
Example #7
Source File: sas_xport.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def __init__(self, filepath_or_buffer, index=None, encoding='ISO-8859-1', chunksize=None): self._encoding = encoding self._lines_read = 0 self._index = index self._chunksize = chunksize if isinstance(filepath_or_buffer, str): (filepath_or_buffer, encoding, compression, should_close) = get_filepath_or_buffer( filepath_or_buffer, encoding=encoding) if isinstance(filepath_or_buffer, (str, compat.text_type, bytes)): self.filepath_or_buffer = open(filepath_or_buffer, 'rb') else: # Copy to BytesIO, and ensure no encoding contents = filepath_or_buffer.read() try: contents = contents.encode(self._encoding) except UnicodeEncodeError: pass self.filepath_or_buffer = compat.BytesIO(contents) self._read_header()
Example #8
Source File: sas_xport.py From vnpy_crypto with MIT License | 6 votes |
def __init__(self, filepath_or_buffer, index=None, encoding='ISO-8859-1', chunksize=None): self._encoding = encoding self._lines_read = 0 self._index = index self._chunksize = chunksize if isinstance(filepath_or_buffer, str): (filepath_or_buffer, encoding, compression, should_close) = get_filepath_or_buffer( filepath_or_buffer, encoding=encoding) if isinstance(filepath_or_buffer, (str, compat.text_type, bytes)): self.filepath_or_buffer = open(filepath_or_buffer, 'rb') else: # Copy to BytesIO, and ensure no encoding contents = filepath_or_buffer.read() try: contents = contents.encode(self._encoding) except: pass self.filepath_or_buffer = compat.BytesIO(contents) self._read_header()
Example #9
Source File: test_network.py From recruit with Apache License 2.0 | 6 votes |
def test_read_csv_chunked_download(self, s3_resource, caplog): # 8 MB, S3FS usees 5MB chunks df = DataFrame(np.random.randn(100000, 4), columns=list('abcd')) buf = BytesIO() str_buf = StringIO() df.to_csv(str_buf) buf = BytesIO(str_buf.getvalue().encode('utf-8')) s3_resource.Bucket("pandas-test").put_object( Key="large-file.csv", Body=buf) with caplog.at_level(logging.DEBUG, logger='s3fs.core'): read_csv("s3://pandas-test/large-file.csv", nrows=5) # log of fetch_range (start, stop) assert ((0, 5505024) in {x.args[-2:] for x in caplog.records})
Example #10
Source File: sas_xport.py From Splunking-Crime with GNU Affero General Public License v3.0 | 6 votes |
def __init__(self, filepath_or_buffer, index=None, encoding='ISO-8859-1', chunksize=None): self._encoding = encoding self._lines_read = 0 self._index = index self._chunksize = chunksize if isinstance(filepath_or_buffer, str): filepath_or_buffer, encoding, compression = get_filepath_or_buffer( filepath_or_buffer, encoding=encoding) if isinstance(filepath_or_buffer, (str, compat.text_type, bytes)): self.filepath_or_buffer = open(filepath_or_buffer, 'rb') else: # Copy to BytesIO, and ensure no encoding contents = filepath_or_buffer.read() try: contents = contents.encode(self._encoding) except: pass self.filepath_or_buffer = compat.BytesIO(contents) self._read_header()
Example #11
Source File: python_parser_only.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_sniff_delimiter(self): text = """index|A|B|C foo|1|2|3 bar|4|5|6 baz|7|8|9 """ data = self.read_csv(StringIO(text), index_col=0, sep=None) tm.assert_index_equal(data.index, Index(['foo', 'bar', 'baz'], name='index')) data2 = self.read_csv(StringIO(text), index_col=0, delimiter='|') tm.assert_frame_equal(data, data2) text = """ignore this ignore this too index|A|B|C foo|1|2|3 bar|4|5|6 baz|7|8|9 """ data3 = self.read_csv(StringIO(text), index_col=0, sep=None, skiprows=2) tm.assert_frame_equal(data, data3) text = u("""ignore this ignore this too index|A|B|C foo|1|2|3 bar|4|5|6 baz|7|8|9 """).encode('utf-8') s = BytesIO(text) if compat.PY3: # somewhat False since the code never sees bytes from io import TextIOWrapper s = TextIOWrapper(s, encoding='utf-8') data4 = self.read_csv(s, index_col=0, sep=None, skiprows=2, encoding='utf-8') tm.assert_frame_equal(data, data4)
Example #12
Source File: pickle.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def _pickle_array(arr): arr = arr.view(np.ndarray) buf = BytesIO() write_array(buf, arr) return buf.getvalue()
Example #13
Source File: test_pack.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def testArraySize(self, sizes=[0, 5, 50, 1000]): bio = compat.BytesIO() packer = Packer() for size in sizes: bio.write(packer.pack_array_header(size)) for i in range(size): bio.write(packer.pack(i)) bio.seek(0) unpacker = Unpacker(bio, use_list=1) for size in sizes: assert unpacker.unpack() == list(range(size))
Example #14
Source File: test_read_fwf.py From recruit with Apache License 2.0 | 5 votes |
def test_bytes_io_input(): if not compat.PY3: pytest.skip("Bytes-related test - only needs to work on Python 3") result = read_fwf(BytesIO("שלום\nשלום".encode('utf8')), widths=[2, 2], encoding="utf8") expected = DataFrame([["של", "ום"]], columns=["של", "ום"]) tm.assert_frame_equal(result, expected)
Example #15
Source File: pickle.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def _unpickle_array(bytes): arr = read_array(BytesIO(bytes)) # All datetimes should be stored as M8[ns]. When unpickling with # numpy1.6, it will read these as M8[us]. So this ensures all # datetime64 types are read as MS[ns] if is_datetime64_dtype(arr): arr = arr.view(_NS_DTYPE) return arr
Example #16
Source File: pickle.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def _pickle_array(arr): arr = arr.view(np.ndarray) buf = BytesIO() write_array(buf, arr) return buf.getvalue()
Example #17
Source File: test_excel.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_bytes_io(self, merge_cells, engine, ext): # see gh-7074 bio = BytesIO() df = DataFrame(np.random.randn(10, 2)) # Pass engine explicitly, as there is no file path to infer from. writer = ExcelWriter(bio, engine=engine) df.to_excel(writer) writer.save() bio.seek(0) reread_df = read_excel(bio, index_col=0) tm.assert_frame_equal(df, reread_df)
Example #18
Source File: test_sequnpack.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_readbytes(self): unpacker = Unpacker(read_size=3) unpacker.feed(b'foobar') assert unpacker.unpack() == ord(b'f') assert unpacker.read_bytes(3) == b'oob' assert unpacker.unpack() == ord(b'a') assert unpacker.unpack() == ord(b'r') # Test buffer refill unpacker = Unpacker(compat.BytesIO(b'foobar'), read_size=3) assert unpacker.unpack() == ord(b'f') assert unpacker.read_bytes(3) == b'oob' assert unpacker.unpack() == ord(b'a') assert unpacker.unpack() == ord(b'r')
Example #19
Source File: test_pack.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def testMapSize(self, sizes=[0, 5, 50, 1000]): bio = compat.BytesIO() packer = Packer() for size in sizes: bio.write(packer.pack_map_header(size)) for i in range(size): bio.write(packer.pack(i)) # key bio.write(packer.pack(i * 2)) # value bio.seek(0) unpacker = Unpacker(bio) for size in sizes: assert unpacker.unpack() == {i: i * 2 for i in range(size)}
Example #20
Source File: packers.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def __iter__(self): needs_closing = True try: # see if we have an actual file if isinstance(self.path, compat.string_types): try: path_exists = os.path.exists(self.path) except TypeError: path_exists = False if path_exists: fh = open(self.path, 'rb') else: fh = compat.BytesIO(self.path) else: if not hasattr(self.path, 'read'): fh = compat.BytesIO(self.path) else: # a file-like needs_closing = False fh = self.path unpacker = unpack(fh) for o in unpacker: yield o finally: if needs_closing: fh.close()
Example #21
Source File: test_common.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_bytes_io_input(all_parsers): if compat.PY2: pytest.skip("Bytes-related test does not need to work on Python 2.x") encoding = "cp1255" parser = all_parsers data = BytesIO("שלום:1234\n562:123".encode(encoding)) result = parser.read_csv(data, sep=":", encoding=encoding) expected = DataFrame([[562, 123]], columns=["שלום", "1234"]) tm.assert_frame_equal(result, expected)
Example #22
Source File: packers.py From vnpy_crypto with MIT License | 5 votes |
def __iter__(self): needs_closing = True try: # see if we have an actual file if isinstance(self.path, compat.string_types): try: path_exists = os.path.exists(self.path) except TypeError: path_exists = False if path_exists: fh = open(self.path, 'rb') else: fh = compat.BytesIO(self.path) else: if not hasattr(self.path, 'read'): fh = compat.BytesIO(self.path) else: # a file-like needs_closing = False fh = self.path unpacker = unpack(fh) for o in unpacker: yield o finally: if needs_closing: fh.close()
Example #23
Source File: common.py From Computable with MIT License | 5 votes |
def _pickle_array(arr): arr = arr.view(np.ndarray) buf = BytesIO() write_array(buf, arr) return buf.getvalue()
Example #24
Source File: test_network.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_read_csv_handles_boto_s3_object(self, s3_resource, tips_file): # see gh-16135 s3_object = s3_resource.meta.client.get_object( Bucket='pandas-test', Key='tips.csv') result = read_csv(BytesIO(s3_object["Body"].read()), encoding='utf8') assert isinstance(result, DataFrame) assert not result.empty expected = read_csv(tips_file) tm.assert_frame_equal(result, expected)
Example #25
Source File: test_c_parser_only.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_buffer_rd_bytes_bad_unicode(c_parser_only): # see gh-22748 parser = c_parser_only t = BytesIO(b"\xB0") if PY3: msg = "'utf-8' codec can't encode character" t = TextIOWrapper(t, encoding="ascii", errors="surrogateescape") else: msg = "'utf8' codec can't decode byte" with pytest.raises(UnicodeError, match=msg): parser.read_csv(t, encoding="UTF-8")
Example #26
Source File: test_read_fwf.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_bytes_io_input(): if not compat.PY3: pytest.skip("Bytes-related test - only needs to work on Python 3") result = read_fwf(BytesIO("שלום\nשלום".encode('utf8')), widths=[2, 2], encoding="utf8") expected = DataFrame([["של", "ום"]], columns=["של", "ום"]) tm.assert_frame_equal(result, expected)
Example #27
Source File: test_packers.py From Computable with MIT License | 5 votes |
def test_string_io(self): df = DataFrame(np.random.randn(10,2)) s = df.to_msgpack(None) result = read_msgpack(s) tm.assert_frame_equal(result,df) s = df.to_msgpack() result = read_msgpack(s) tm.assert_frame_equal(result,df) s = df.to_msgpack() result = read_msgpack(compat.BytesIO(s)) tm.assert_frame_equal(result,df) s = to_msgpack(None,df) result = read_msgpack(s) tm.assert_frame_equal(result, df) with ensure_clean(self.path) as p: s = df.to_msgpack() fh = open(p,'wb') fh.write(s) fh.close() result = read_msgpack(p) tm.assert_frame_equal(result, df)
Example #28
Source File: packers.py From Computable with MIT License | 5 votes |
def __iter__(self): needs_closing = True try: # see if we have an actual file if isinstance(self.path, compat.string_types): try: path_exists = os.path.exists(self.path) except TypeError: path_exists = False if path_exists: fh = open(self.path, 'rb') else: fh = compat.BytesIO(self.path) else: if not hasattr(self.path, 'read'): fh = compat.BytesIO(self.path) else: # a file-like needs_closing = False fh = self.path unpacker = unpack(fh) for o in unpacker: yield o finally: if needs_closing: fh.close()
Example #29
Source File: packers.py From Computable with MIT License | 5 votes |
def to_msgpack(path_or_buf, *args, **kwargs): """ msgpack (serialize) object to input file path THIS IS AN EXPERIMENTAL LIBRARY and the storage format may not be stable until a future release. Parameters ---------- path_or_buf : string File path, buffer-like, or None if None, return generated string args : an object or objects to serialize append : boolean whether to append to an existing msgpack (default is False) compress : type of compressor (zlib or blosc), default to None (no compression) """ global compressor compressor = kwargs.pop('compress', None) append = kwargs.pop('append', None) if append: mode = 'a+b' else: mode = 'wb' def writer(fh): for a in args: fh.write(pack(a, **kwargs)) if isinstance(path_or_buf, compat.string_types): with open(path_or_buf, mode) as fh: writer(fh) elif path_or_buf is None: buf = compat.BytesIO() writer(buf) return buf.getvalue() else: writer(path_or_buf)
Example #30
Source File: common.py From Computable with MIT License | 5 votes |
def _unpickle_array(bytes): arr = read_array(BytesIO(bytes)) # All datetimes should be stored as M8[ns]. When unpickling with # numpy1.6, it will read these as M8[us]. So this ensures all # datetime64 types are read as MS[ns] if is_datetime64_dtype(arr): arr = arr.view(_NS_DTYPE) return arr