Python pandas.io.parsers.TextParser() Examples
The following are 20
code examples of pandas.io.parsers.TextParser().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas.io.parsers
, or try the search function
.
Example #1
Source File: test_common.py From recruit with Apache License 2.0 | 6 votes |
def test_reader_list(all_parsers): data = """index,A,B,C,D foo,2,3,4,5 bar,7,8,9,10 baz,12,13,14,15 qux,12,13,14,15 foo2,12,13,14,15 bar2,12,13,14,15 """ parser = all_parsers kwargs = dict(index_col=0) lines = list(csv.reader(StringIO(data))) reader = TextParser(lines, chunksize=2, **kwargs) expected = parser.read_csv(StringIO(data), **kwargs) chunks = list(reader) tm.assert_frame_equal(chunks[0], expected[:2]) tm.assert_frame_equal(chunks[1], expected[2:4]) tm.assert_frame_equal(chunks[2], expected[4:])
Example #2
Source File: html.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def _data_to_frame(**kwargs): head, body, foot = kwargs.pop('data') header = kwargs.pop('header') kwargs['skiprows'] = _get_skiprows(kwargs['skiprows']) if head: rows = lrange(len(head)) body = head + body if header is None: # special case when a table has <th> elements header = 0 if rows == [0] else rows if foot: body += [foot] # fill out elements of body that are "ragged" _expand_elements(body) tp = TextParser(body, header=header, **kwargs) df = tp.read() return df
Example #3
Source File: html.py From Splunking-Crime with GNU Affero General Public License v3.0 | 6 votes |
def _data_to_frame(**kwargs): head, body, foot = kwargs.pop('data') header = kwargs.pop('header') kwargs['skiprows'] = _get_skiprows(kwargs['skiprows']) if head: rows = lrange(len(head)) body = head + body if header is None: # special case when a table has <th> elements header = 0 if rows == [0] else rows if foot: body += [foot] # fill out elements of body that are "ragged" _expand_elements(body) tp = TextParser(body, header=header, **kwargs) df = tp.read() return df
Example #4
Source File: gspread_dataframe.py From gspread-dataframe with MIT License | 6 votes |
def get_as_dataframe(worksheet, evaluate_formulas=False, **options): r""" Returns the worksheet contents as a DataFrame. :param worksheet: the worksheet. :param evaluate_formulas: if True, get the value of a cell after formula evaluation; otherwise get the formula itself if present. Defaults to False. :param \*\*options: all the options for pandas.io.parsers.TextParser, according to the version of pandas that is installed. (Note: TextParser supports only the default 'python' parser engine, not the C engine.) :returns: pandas.DataFrame """ all_values = _get_all_values(worksheet, evaluate_formulas) return TextParser(all_values, **options).read(options.get('nrows', None))
Example #5
Source File: html.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def _data_to_frame(**kwargs): head, body, foot = kwargs.pop('data') header = kwargs.pop('header') kwargs['skiprows'] = _get_skiprows(kwargs['skiprows']) if head: body = head + body # Infer header when there is a <thead> or top <th>-only rows if header is None: if len(head) == 1: header = 0 else: # ignore all-empty-text rows header = [i for i, row in enumerate(head) if any(text for text in row)] if foot: body += foot # fill out elements of body that are "ragged" _expand_elements(body) tp = TextParser(body, header=header, **kwargs) df = tp.read() return df
Example #6
Source File: test_common.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_reader_list_skiprows(all_parsers): data = """index,A,B,C,D foo,2,3,4,5 bar,7,8,9,10 baz,12,13,14,15 qux,12,13,14,15 foo2,12,13,14,15 bar2,12,13,14,15 """ parser = all_parsers kwargs = dict(index_col=0) lines = list(csv.reader(StringIO(data))) reader = TextParser(lines, chunksize=2, skiprows=[1], **kwargs) expected = parser.read_csv(StringIO(data), **kwargs) chunks = list(reader) tm.assert_frame_equal(chunks[0], expected[1:3])
Example #7
Source File: test_common.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_reader_list(all_parsers): data = """index,A,B,C,D foo,2,3,4,5 bar,7,8,9,10 baz,12,13,14,15 qux,12,13,14,15 foo2,12,13,14,15 bar2,12,13,14,15 """ parser = all_parsers kwargs = dict(index_col=0) lines = list(csv.reader(StringIO(data))) reader = TextParser(lines, chunksize=2, **kwargs) expected = parser.read_csv(StringIO(data), **kwargs) chunks = list(reader) tm.assert_frame_equal(chunks[0], expected[:2]) tm.assert_frame_equal(chunks[1], expected[2:4]) tm.assert_frame_equal(chunks[2], expected[4:])
Example #8
Source File: html.py From Computable with MIT License | 6 votes |
def _data_to_frame(data, header, index_col, skiprows, infer_types, parse_dates, tupleize_cols, thousands): head, body, _ = data # _ is footer which is rarely used: ignore for now if head: body = [head] + body if header is None: # special case when a table has <th> elements header = 0 # fill out elements of body that are "ragged" _expand_elements(body) tp = TextParser(body, header=header, index_col=index_col, skiprows=_get_skiprows(skiprows), parse_dates=parse_dates, tupleize_cols=tupleize_cols, thousands=thousands) df = tp.read() if infer_types: # TODO: rm this code so infer_types has no effect in 0.14 df = df.convert_objects(convert_dates='coerce') else: df = df.applymap(text_type) return df
Example #9
Source File: html.py From vnpy_crypto with MIT License | 6 votes |
def _data_to_frame(**kwargs): head, body, foot = kwargs.pop('data') header = kwargs.pop('header') kwargs['skiprows'] = _get_skiprows(kwargs['skiprows']) if head: rows = lrange(len(head)) body = head + body if header is None: # special case when a table has <th> elements header = 0 if rows == [0] else rows if foot: body += [foot] # fill out elements of body that are "ragged" _expand_elements(body) tp = TextParser(body, header=header, **kwargs) df = tp.read() return df
Example #10
Source File: html.py From recruit with Apache License 2.0 | 6 votes |
def _data_to_frame(**kwargs): head, body, foot = kwargs.pop('data') header = kwargs.pop('header') kwargs['skiprows'] = _get_skiprows(kwargs['skiprows']) if head: body = head + body # Infer header when there is a <thead> or top <th>-only rows if header is None: if len(head) == 1: header = 0 else: # ignore all-empty-text rows header = [i for i, row in enumerate(head) if any(text for text in row)] if foot: body += foot # fill out elements of body that are "ragged" _expand_elements(body) tp = TextParser(body, header=header, **kwargs) df = tp.read() return df
Example #11
Source File: test_common.py From recruit with Apache License 2.0 | 6 votes |
def test_reader_list_skiprows(all_parsers): data = """index,A,B,C,D foo,2,3,4,5 bar,7,8,9,10 baz,12,13,14,15 qux,12,13,14,15 foo2,12,13,14,15 bar2,12,13,14,15 """ parser = all_parsers kwargs = dict(index_col=0) lines = list(csv.reader(StringIO(data))) reader = TextParser(lines, chunksize=2, skiprows=[1], **kwargs) expected = parser.read_csv(StringIO(data), **kwargs) chunks = list(reader) tm.assert_frame_equal(chunks[0], expected[1:3])
Example #12
Source File: data.py From Computable with MIT License | 5 votes |
def _parse_options_data(table): rows = table.xpath('.//tr') header = _unpack(rows[0], kind='th') data = [_unpack(row, kind='td') for row in rows[1:]] # Use ',' as a thousands separator as we're pulling from the US site. return TextParser(data, names=header, na_values=['N/A'], thousands=',').get_chunk()
Example #13
Source File: test_common.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_read_data_list(all_parsers): parser = all_parsers kwargs = dict(index_col=0) data = "A,B,C\nfoo,1,2,3\nbar,4,5,6" data_list = [["A", "B", "C"], ["foo", "1", "2", "3"], ["bar", "4", "5", "6"]] expected = parser.read_csv(StringIO(data), **kwargs) parser = TextParser(data_list, chunksize=2, **kwargs) result = parser.read() tm.assert_frame_equal(result, expected)
Example #14
Source File: common.py From vnpy_crypto with MIT License | 5 votes |
def test_read_text_list(self): data = """A,B,C\nfoo,1,2,3\nbar,4,5,6""" as_list = [['A', 'B', 'C'], ['foo', '1', '2', '3'], ['bar', '4', '5', '6']] df = self.read_csv(StringIO(data), index_col=0) parser = TextParser(as_list, index_col=0, chunksize=2) chunk = parser.read(None) tm.assert_frame_equal(chunk, df)
Example #15
Source File: common.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_read_text_list(self): data = """A,B,C\nfoo,1,2,3\nbar,4,5,6""" as_list = [['A', 'B', 'C'], ['foo', '1', '2', '3'], ['bar', '4', '5', '6']] df = self.read_csv(StringIO(data), index_col=0) parser = TextParser(as_list, index_col=0, chunksize=2) chunk = parser.read(None) tm.assert_frame_equal(chunk, df)
Example #16
Source File: test_common.py From recruit with Apache License 2.0 | 5 votes |
def test_read_data_list(all_parsers): parser = all_parsers kwargs = dict(index_col=0) data = "A,B,C\nfoo,1,2,3\nbar,4,5,6" data_list = [["A", "B", "C"], ["foo", "1", "2", "3"], ["bar", "4", "5", "6"]] expected = parser.read_csv(StringIO(data), **kwargs) parser = TextParser(data_list, chunksize=2, **kwargs) result = parser.read() tm.assert_frame_equal(result, expected)
Example #17
Source File: common.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_read_text_list(self): data = """A,B,C\nfoo,1,2,3\nbar,4,5,6""" as_list = [['A', 'B', 'C'], ['foo', '1', '2', '3'], ['bar', '4', '5', '6']] df = self.read_csv(StringIO(data), index_col=0) parser = TextParser(as_list, index_col=0, chunksize=2) chunk = parser.read(None) tm.assert_frame_equal(chunk, df)
Example #18
Source File: common.py From vnpy_crypto with MIT License | 4 votes |
def test_iterator(self): # See gh-6607 reader = self.read_csv(StringIO(self.data1), index_col=0, iterator=True) df = self.read_csv(StringIO(self.data1), index_col=0) chunk = reader.read(3) tm.assert_frame_equal(chunk, df[:3]) last_chunk = reader.read(5) tm.assert_frame_equal(last_chunk, df[3:]) # pass list lines = list(csv.reader(StringIO(self.data1))) parser = TextParser(lines, index_col=0, chunksize=2) df = self.read_csv(StringIO(self.data1), index_col=0) chunks = list(parser) tm.assert_frame_equal(chunks[0], df[:2]) tm.assert_frame_equal(chunks[1], df[2:4]) tm.assert_frame_equal(chunks[2], df[4:]) # pass skiprows parser = TextParser(lines, index_col=0, chunksize=2, skiprows=[1]) chunks = list(parser) tm.assert_frame_equal(chunks[0], df[1:3]) treader = self.read_table(StringIO(self.data1), sep=',', index_col=0, iterator=True) assert isinstance(treader, TextFileReader) # gh-3967: stopping iteration when chunksize is specified data = """A,B,C foo,1,2,3 bar,4,5,6 baz,7,8,9 """ reader = self.read_csv(StringIO(data), iterator=True) result = list(reader) expected = DataFrame(dict(A=[1, 4, 7], B=[2, 5, 8], C=[ 3, 6, 9]), index=['foo', 'bar', 'baz']) tm.assert_frame_equal(result[0], expected) # chunksize = 1 reader = self.read_csv(StringIO(data), chunksize=1) result = list(reader) expected = DataFrame(dict(A=[1, 4, 7], B=[2, 5, 8], C=[ 3, 6, 9]), index=['foo', 'bar', 'baz']) assert len(result) == 3 tm.assert_frame_equal(pd.concat(result), expected) # skipfooter is not supported with the C parser yet if self.engine == 'python': # test bad parameter (skipfooter) reader = self.read_csv(StringIO(self.data1), index_col=0, iterator=True, skipfooter=1) pytest.raises(ValueError, reader.read, 3)
Example #19
Source File: common.py From elasticintel with GNU General Public License v3.0 | 4 votes |
def test_iterator(self): # See gh-6607 reader = self.read_csv(StringIO(self.data1), index_col=0, iterator=True) df = self.read_csv(StringIO(self.data1), index_col=0) chunk = reader.read(3) tm.assert_frame_equal(chunk, df[:3]) last_chunk = reader.read(5) tm.assert_frame_equal(last_chunk, df[3:]) # pass list lines = list(csv.reader(StringIO(self.data1))) parser = TextParser(lines, index_col=0, chunksize=2) df = self.read_csv(StringIO(self.data1), index_col=0) chunks = list(parser) tm.assert_frame_equal(chunks[0], df[:2]) tm.assert_frame_equal(chunks[1], df[2:4]) tm.assert_frame_equal(chunks[2], df[4:]) # pass skiprows parser = TextParser(lines, index_col=0, chunksize=2, skiprows=[1]) chunks = list(parser) tm.assert_frame_equal(chunks[0], df[1:3]) treader = self.read_table(StringIO(self.data1), sep=',', index_col=0, iterator=True) assert isinstance(treader, TextFileReader) # gh-3967: stopping iteration when chunksize is specified data = """A,B,C foo,1,2,3 bar,4,5,6 baz,7,8,9 """ reader = self.read_csv(StringIO(data), iterator=True) result = list(reader) expected = DataFrame(dict(A=[1, 4, 7], B=[2, 5, 8], C=[ 3, 6, 9]), index=['foo', 'bar', 'baz']) tm.assert_frame_equal(result[0], expected) # chunksize = 1 reader = self.read_csv(StringIO(data), chunksize=1) result = list(reader) expected = DataFrame(dict(A=[1, 4, 7], B=[2, 5, 8], C=[ 3, 6, 9]), index=['foo', 'bar', 'baz']) assert len(result) == 3 tm.assert_frame_equal(pd.concat(result), expected) # skipfooter is not supported with the C parser yet if self.engine == 'python': # test bad parameter (skipfooter) reader = self.read_csv(StringIO(self.data1), index_col=0, iterator=True, skipfooter=1) pytest.raises(ValueError, reader.read, 3)
Example #20
Source File: common.py From twitter-stock-recommendation with MIT License | 4 votes |
def test_iterator(self): # See gh-6607 reader = self.read_csv(StringIO(self.data1), index_col=0, iterator=True) df = self.read_csv(StringIO(self.data1), index_col=0) chunk = reader.read(3) tm.assert_frame_equal(chunk, df[:3]) last_chunk = reader.read(5) tm.assert_frame_equal(last_chunk, df[3:]) # pass list lines = list(csv.reader(StringIO(self.data1))) parser = TextParser(lines, index_col=0, chunksize=2) df = self.read_csv(StringIO(self.data1), index_col=0) chunks = list(parser) tm.assert_frame_equal(chunks[0], df[:2]) tm.assert_frame_equal(chunks[1], df[2:4]) tm.assert_frame_equal(chunks[2], df[4:]) # pass skiprows parser = TextParser(lines, index_col=0, chunksize=2, skiprows=[1]) chunks = list(parser) tm.assert_frame_equal(chunks[0], df[1:3]) treader = self.read_table(StringIO(self.data1), sep=',', index_col=0, iterator=True) assert isinstance(treader, TextFileReader) # gh-3967: stopping iteration when chunksize is specified data = """A,B,C foo,1,2,3 bar,4,5,6 baz,7,8,9 """ reader = self.read_csv(StringIO(data), iterator=True) result = list(reader) expected = DataFrame(dict(A=[1, 4, 7], B=[2, 5, 8], C=[ 3, 6, 9]), index=['foo', 'bar', 'baz']) tm.assert_frame_equal(result[0], expected) # chunksize = 1 reader = self.read_csv(StringIO(data), chunksize=1) result = list(reader) expected = DataFrame(dict(A=[1, 4, 7], B=[2, 5, 8], C=[ 3, 6, 9]), index=['foo', 'bar', 'baz']) assert len(result) == 3 tm.assert_frame_equal(pd.concat(result), expected) # skipfooter is not supported with the C parser yet if self.engine == 'python': # test bad parameter (skipfooter) reader = self.read_csv(StringIO(self.data1), index_col=0, iterator=True, skipfooter=1) pytest.raises(ValueError, reader.read, 3)