Python pandas.io.parsers.TextParser() Examples

The following are 20 code examples of pandas.io.parsers.TextParser(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.io.parsers , or try the search function .
Example #1
Source File: test_common.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_reader_list(all_parsers):
    data = """index,A,B,C,D
foo,2,3,4,5
bar,7,8,9,10
baz,12,13,14,15
qux,12,13,14,15
foo2,12,13,14,15
bar2,12,13,14,15
"""
    parser = all_parsers
    kwargs = dict(index_col=0)

    lines = list(csv.reader(StringIO(data)))
    reader = TextParser(lines, chunksize=2, **kwargs)

    expected = parser.read_csv(StringIO(data), **kwargs)
    chunks = list(reader)

    tm.assert_frame_equal(chunks[0], expected[:2])
    tm.assert_frame_equal(chunks[1], expected[2:4])
    tm.assert_frame_equal(chunks[2], expected[4:]) 
Example #2
Source File: html.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def _data_to_frame(**kwargs):
    head, body, foot = kwargs.pop('data')
    header = kwargs.pop('header')
    kwargs['skiprows'] = _get_skiprows(kwargs['skiprows'])
    if head:
        rows = lrange(len(head))
        body = head + body
        if header is None:  # special case when a table has <th> elements
            header = 0 if rows == [0] else rows

    if foot:
        body += [foot]

    # fill out elements of body that are "ragged"
    _expand_elements(body)
    tp = TextParser(body, header=header, **kwargs)
    df = tp.read()
    return df 
Example #3
Source File: html.py    From Splunking-Crime with GNU Affero General Public License v3.0 6 votes vote down vote up
def _data_to_frame(**kwargs):
    head, body, foot = kwargs.pop('data')
    header = kwargs.pop('header')
    kwargs['skiprows'] = _get_skiprows(kwargs['skiprows'])
    if head:
        rows = lrange(len(head))
        body = head + body
        if header is None:  # special case when a table has <th> elements
            header = 0 if rows == [0] else rows

    if foot:
        body += [foot]

    # fill out elements of body that are "ragged"
    _expand_elements(body)
    tp = TextParser(body, header=header, **kwargs)
    df = tp.read()
    return df 
Example #4
Source File: gspread_dataframe.py    From gspread-dataframe with MIT License 6 votes vote down vote up
def get_as_dataframe(worksheet,
                     evaluate_formulas=False,
                     **options):
    r"""
    Returns the worksheet contents as a DataFrame.

    :param worksheet: the worksheet.
    :param evaluate_formulas: if True, get the value of a cell after
            formula evaluation; otherwise get the formula itself if present.
            Defaults to False.
    :param \*\*options: all the options for pandas.io.parsers.TextParser,
            according to the version of pandas that is installed.
            (Note: TextParser supports only the default 'python' parser engine,
            not the C engine.)
    :returns: pandas.DataFrame
    """
    all_values = _get_all_values(worksheet, evaluate_formulas)
    return TextParser(all_values, **options).read(options.get('nrows', None)) 
Example #5
Source File: html.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def _data_to_frame(**kwargs):
    head, body, foot = kwargs.pop('data')
    header = kwargs.pop('header')
    kwargs['skiprows'] = _get_skiprows(kwargs['skiprows'])
    if head:
        body = head + body

        # Infer header when there is a <thead> or top <th>-only rows
        if header is None:
            if len(head) == 1:
                header = 0
            else:
                # ignore all-empty-text rows
                header = [i for i, row in enumerate(head)
                          if any(text for text in row)]

    if foot:
        body += foot

    # fill out elements of body that are "ragged"
    _expand_elements(body)
    tp = TextParser(body, header=header, **kwargs)
    df = tp.read()
    return df 
Example #6
Source File: test_common.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_reader_list_skiprows(all_parsers):
    data = """index,A,B,C,D
foo,2,3,4,5
bar,7,8,9,10
baz,12,13,14,15
qux,12,13,14,15
foo2,12,13,14,15
bar2,12,13,14,15
"""
    parser = all_parsers
    kwargs = dict(index_col=0)

    lines = list(csv.reader(StringIO(data)))
    reader = TextParser(lines, chunksize=2, skiprows=[1], **kwargs)

    expected = parser.read_csv(StringIO(data), **kwargs)
    chunks = list(reader)

    tm.assert_frame_equal(chunks[0], expected[1:3]) 
Example #7
Source File: test_common.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_reader_list(all_parsers):
    data = """index,A,B,C,D
foo,2,3,4,5
bar,7,8,9,10
baz,12,13,14,15
qux,12,13,14,15
foo2,12,13,14,15
bar2,12,13,14,15
"""
    parser = all_parsers
    kwargs = dict(index_col=0)

    lines = list(csv.reader(StringIO(data)))
    reader = TextParser(lines, chunksize=2, **kwargs)

    expected = parser.read_csv(StringIO(data), **kwargs)
    chunks = list(reader)

    tm.assert_frame_equal(chunks[0], expected[:2])
    tm.assert_frame_equal(chunks[1], expected[2:4])
    tm.assert_frame_equal(chunks[2], expected[4:]) 
Example #8
Source File: html.py    From Computable with MIT License 6 votes vote down vote up
def _data_to_frame(data, header, index_col, skiprows, infer_types,
                   parse_dates, tupleize_cols, thousands):
    head, body, _ = data  # _ is footer which is rarely used: ignore for now

    if head:
        body = [head] + body

        if header is None:  # special case when a table has <th> elements
            header = 0

    # fill out elements of body that are "ragged"
    _expand_elements(body)

    tp = TextParser(body, header=header, index_col=index_col,
                    skiprows=_get_skiprows(skiprows),
                    parse_dates=parse_dates, tupleize_cols=tupleize_cols,
                    thousands=thousands)
    df = tp.read()

    if infer_types:  # TODO: rm this code so infer_types has no effect in 0.14
        df = df.convert_objects(convert_dates='coerce')
    else:
        df = df.applymap(text_type)
    return df 
Example #9
Source File: html.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def _data_to_frame(**kwargs):
    head, body, foot = kwargs.pop('data')
    header = kwargs.pop('header')
    kwargs['skiprows'] = _get_skiprows(kwargs['skiprows'])
    if head:
        rows = lrange(len(head))
        body = head + body
        if header is None:  # special case when a table has <th> elements
            header = 0 if rows == [0] else rows

    if foot:
        body += [foot]

    # fill out elements of body that are "ragged"
    _expand_elements(body)
    tp = TextParser(body, header=header, **kwargs)
    df = tp.read()
    return df 
Example #10
Source File: html.py    From recruit with Apache License 2.0 6 votes vote down vote up
def _data_to_frame(**kwargs):
    head, body, foot = kwargs.pop('data')
    header = kwargs.pop('header')
    kwargs['skiprows'] = _get_skiprows(kwargs['skiprows'])
    if head:
        body = head + body

        # Infer header when there is a <thead> or top <th>-only rows
        if header is None:
            if len(head) == 1:
                header = 0
            else:
                # ignore all-empty-text rows
                header = [i for i, row in enumerate(head)
                          if any(text for text in row)]

    if foot:
        body += foot

    # fill out elements of body that are "ragged"
    _expand_elements(body)
    tp = TextParser(body, header=header, **kwargs)
    df = tp.read()
    return df 
Example #11
Source File: test_common.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_reader_list_skiprows(all_parsers):
    data = """index,A,B,C,D
foo,2,3,4,5
bar,7,8,9,10
baz,12,13,14,15
qux,12,13,14,15
foo2,12,13,14,15
bar2,12,13,14,15
"""
    parser = all_parsers
    kwargs = dict(index_col=0)

    lines = list(csv.reader(StringIO(data)))
    reader = TextParser(lines, chunksize=2, skiprows=[1], **kwargs)

    expected = parser.read_csv(StringIO(data), **kwargs)
    chunks = list(reader)

    tm.assert_frame_equal(chunks[0], expected[1:3]) 
Example #12
Source File: data.py    From Computable with MIT License 5 votes vote down vote up
def _parse_options_data(table):
    rows = table.xpath('.//tr')
    header = _unpack(rows[0], kind='th')
    data = [_unpack(row, kind='td') for row in rows[1:]]
    # Use ',' as a thousands separator as we're pulling from the US site.
    return TextParser(data, names=header, na_values=['N/A'],
                      thousands=',').get_chunk() 
Example #13
Source File: test_common.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_read_data_list(all_parsers):
    parser = all_parsers
    kwargs = dict(index_col=0)
    data = "A,B,C\nfoo,1,2,3\nbar,4,5,6"

    data_list = [["A", "B", "C"], ["foo", "1", "2", "3"],
                 ["bar", "4", "5", "6"]]
    expected = parser.read_csv(StringIO(data), **kwargs)

    parser = TextParser(data_list, chunksize=2, **kwargs)
    result = parser.read()

    tm.assert_frame_equal(result, expected) 
Example #14
Source File: common.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_read_text_list(self):
        data = """A,B,C\nfoo,1,2,3\nbar,4,5,6"""
        as_list = [['A', 'B', 'C'], ['foo', '1', '2', '3'], ['bar',
                                                             '4', '5', '6']]
        df = self.read_csv(StringIO(data), index_col=0)

        parser = TextParser(as_list, index_col=0, chunksize=2)
        chunk = parser.read(None)

        tm.assert_frame_equal(chunk, df) 
Example #15
Source File: common.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_read_text_list(self):
        data = """A,B,C\nfoo,1,2,3\nbar,4,5,6"""
        as_list = [['A', 'B', 'C'], ['foo', '1', '2', '3'], ['bar',
                                                             '4', '5', '6']]
        df = self.read_csv(StringIO(data), index_col=0)

        parser = TextParser(as_list, index_col=0, chunksize=2)
        chunk = parser.read(None)

        tm.assert_frame_equal(chunk, df) 
Example #16
Source File: test_common.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_read_data_list(all_parsers):
    parser = all_parsers
    kwargs = dict(index_col=0)
    data = "A,B,C\nfoo,1,2,3\nbar,4,5,6"

    data_list = [["A", "B", "C"], ["foo", "1", "2", "3"],
                 ["bar", "4", "5", "6"]]
    expected = parser.read_csv(StringIO(data), **kwargs)

    parser = TextParser(data_list, chunksize=2, **kwargs)
    result = parser.read()

    tm.assert_frame_equal(result, expected) 
Example #17
Source File: common.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_read_text_list(self):
        data = """A,B,C\nfoo,1,2,3\nbar,4,5,6"""
        as_list = [['A', 'B', 'C'], ['foo', '1', '2', '3'], ['bar',
                                                             '4', '5', '6']]
        df = self.read_csv(StringIO(data), index_col=0)

        parser = TextParser(as_list, index_col=0, chunksize=2)
        chunk = parser.read(None)

        tm.assert_frame_equal(chunk, df) 
Example #18
Source File: common.py    From vnpy_crypto with MIT License 4 votes vote down vote up
def test_iterator(self):
        # See gh-6607
        reader = self.read_csv(StringIO(self.data1), index_col=0,
                               iterator=True)
        df = self.read_csv(StringIO(self.data1), index_col=0)

        chunk = reader.read(3)
        tm.assert_frame_equal(chunk, df[:3])

        last_chunk = reader.read(5)
        tm.assert_frame_equal(last_chunk, df[3:])

        # pass list
        lines = list(csv.reader(StringIO(self.data1)))
        parser = TextParser(lines, index_col=0, chunksize=2)

        df = self.read_csv(StringIO(self.data1), index_col=0)

        chunks = list(parser)
        tm.assert_frame_equal(chunks[0], df[:2])
        tm.assert_frame_equal(chunks[1], df[2:4])
        tm.assert_frame_equal(chunks[2], df[4:])

        # pass skiprows
        parser = TextParser(lines, index_col=0, chunksize=2, skiprows=[1])
        chunks = list(parser)
        tm.assert_frame_equal(chunks[0], df[1:3])

        treader = self.read_table(StringIO(self.data1), sep=',', index_col=0,
                                  iterator=True)
        assert isinstance(treader, TextFileReader)

        # gh-3967: stopping iteration when chunksize is specified
        data = """A,B,C
foo,1,2,3
bar,4,5,6
baz,7,8,9
"""
        reader = self.read_csv(StringIO(data), iterator=True)
        result = list(reader)
        expected = DataFrame(dict(A=[1, 4, 7], B=[2, 5, 8], C=[
            3, 6, 9]), index=['foo', 'bar', 'baz'])
        tm.assert_frame_equal(result[0], expected)

        # chunksize = 1
        reader = self.read_csv(StringIO(data), chunksize=1)
        result = list(reader)
        expected = DataFrame(dict(A=[1, 4, 7], B=[2, 5, 8], C=[
            3, 6, 9]), index=['foo', 'bar', 'baz'])
        assert len(result) == 3
        tm.assert_frame_equal(pd.concat(result), expected)

        # skipfooter is not supported with the C parser yet
        if self.engine == 'python':
            # test bad parameter (skipfooter)
            reader = self.read_csv(StringIO(self.data1), index_col=0,
                                   iterator=True, skipfooter=1)
            pytest.raises(ValueError, reader.read, 3) 
Example #19
Source File: common.py    From elasticintel with GNU General Public License v3.0 4 votes vote down vote up
def test_iterator(self):
        # See gh-6607
        reader = self.read_csv(StringIO(self.data1), index_col=0,
                               iterator=True)
        df = self.read_csv(StringIO(self.data1), index_col=0)

        chunk = reader.read(3)
        tm.assert_frame_equal(chunk, df[:3])

        last_chunk = reader.read(5)
        tm.assert_frame_equal(last_chunk, df[3:])

        # pass list
        lines = list(csv.reader(StringIO(self.data1)))
        parser = TextParser(lines, index_col=0, chunksize=2)

        df = self.read_csv(StringIO(self.data1), index_col=0)

        chunks = list(parser)
        tm.assert_frame_equal(chunks[0], df[:2])
        tm.assert_frame_equal(chunks[1], df[2:4])
        tm.assert_frame_equal(chunks[2], df[4:])

        # pass skiprows
        parser = TextParser(lines, index_col=0, chunksize=2, skiprows=[1])
        chunks = list(parser)
        tm.assert_frame_equal(chunks[0], df[1:3])

        treader = self.read_table(StringIO(self.data1), sep=',', index_col=0,
                                  iterator=True)
        assert isinstance(treader, TextFileReader)

        # gh-3967: stopping iteration when chunksize is specified
        data = """A,B,C
foo,1,2,3
bar,4,5,6
baz,7,8,9
"""
        reader = self.read_csv(StringIO(data), iterator=True)
        result = list(reader)
        expected = DataFrame(dict(A=[1, 4, 7], B=[2, 5, 8], C=[
            3, 6, 9]), index=['foo', 'bar', 'baz'])
        tm.assert_frame_equal(result[0], expected)

        # chunksize = 1
        reader = self.read_csv(StringIO(data), chunksize=1)
        result = list(reader)
        expected = DataFrame(dict(A=[1, 4, 7], B=[2, 5, 8], C=[
            3, 6, 9]), index=['foo', 'bar', 'baz'])
        assert len(result) == 3
        tm.assert_frame_equal(pd.concat(result), expected)

        # skipfooter is not supported with the C parser yet
        if self.engine == 'python':
            # test bad parameter (skipfooter)
            reader = self.read_csv(StringIO(self.data1), index_col=0,
                                   iterator=True, skipfooter=1)
            pytest.raises(ValueError, reader.read, 3) 
Example #20
Source File: common.py    From twitter-stock-recommendation with MIT License 4 votes vote down vote up
def test_iterator(self):
        # See gh-6607
        reader = self.read_csv(StringIO(self.data1), index_col=0,
                               iterator=True)
        df = self.read_csv(StringIO(self.data1), index_col=0)

        chunk = reader.read(3)
        tm.assert_frame_equal(chunk, df[:3])

        last_chunk = reader.read(5)
        tm.assert_frame_equal(last_chunk, df[3:])

        # pass list
        lines = list(csv.reader(StringIO(self.data1)))
        parser = TextParser(lines, index_col=0, chunksize=2)

        df = self.read_csv(StringIO(self.data1), index_col=0)

        chunks = list(parser)
        tm.assert_frame_equal(chunks[0], df[:2])
        tm.assert_frame_equal(chunks[1], df[2:4])
        tm.assert_frame_equal(chunks[2], df[4:])

        # pass skiprows
        parser = TextParser(lines, index_col=0, chunksize=2, skiprows=[1])
        chunks = list(parser)
        tm.assert_frame_equal(chunks[0], df[1:3])

        treader = self.read_table(StringIO(self.data1), sep=',', index_col=0,
                                  iterator=True)
        assert isinstance(treader, TextFileReader)

        # gh-3967: stopping iteration when chunksize is specified
        data = """A,B,C
foo,1,2,3
bar,4,5,6
baz,7,8,9
"""
        reader = self.read_csv(StringIO(data), iterator=True)
        result = list(reader)
        expected = DataFrame(dict(A=[1, 4, 7], B=[2, 5, 8], C=[
            3, 6, 9]), index=['foo', 'bar', 'baz'])
        tm.assert_frame_equal(result[0], expected)

        # chunksize = 1
        reader = self.read_csv(StringIO(data), chunksize=1)
        result = list(reader)
        expected = DataFrame(dict(A=[1, 4, 7], B=[2, 5, 8], C=[
            3, 6, 9]), index=['foo', 'bar', 'baz'])
        assert len(result) == 3
        tm.assert_frame_equal(pd.concat(result), expected)

        # skipfooter is not supported with the C parser yet
        if self.engine == 'python':
            # test bad parameter (skipfooter)
            reader = self.read_csv(StringIO(self.data1), index_col=0,
                                   iterator=True, skipfooter=1)
            pytest.raises(ValueError, reader.read, 3)