Python Examples of pandas.compat.StringIO

Source File: test_usecols.py From recruit with Apache License 2.0

6 votes

def test_usecols_with_multi_byte_characters(all_parsers, usecols):
    data = """あああ,いい,ううう,ええええ
0.056674973,8,True,a
2.613230982,2,False,b
3.568935038,7,False,a"""
    parser = all_parsers

    exp_data = {
        "あああ": {
            0: 0.056674972999999997,
            1: 2.6132309819999997,
            2: 3.5689350380000002
        },
        "いい": {0: 8, 1: 2, 2: 7}
    }
    expected = DataFrame(exp_data)

    result = parser.read_csv(StringIO(data), usecols=usecols)
    tm.assert_frame_equal(result, expected)

Source File: test_dtypes.py From recruit with Apache License 2.0

6 votes

def test_dtype_per_column(all_parsers):
    parser = all_parsers
    data = """\
one,two
1,2.5
2,3.5
3,4.5
4,5.5"""
    expected = DataFrame([[1, "2.5"], [2, "3.5"], [3, "4.5"], [4, "5.5"]],
                         columns=["one", "two"])
    expected["one"] = expected["one"].astype(np.float64)
    expected["two"] = expected["two"].astype(object)

    result = parser.read_csv(StringIO(data), dtype={"one": np.float64,
                                                    1: str})
    tm.assert_frame_equal(result, expected)

Source File: test_usecols.py From recruit with Apache License 2.0

6 votes

def test_usecols_with_parse_dates(all_parsers, usecols):
    # see gh-9755
    data = """a,b,c,d,e
0,1,20140101,0900,4
0,1,20140102,1000,4"""
    parser = all_parsers
    parse_dates = [[1, 2]]

    cols = {
        "a": [0, 0],
        "c_d": [
            Timestamp("2014-01-01 09:00:00"),
            Timestamp("2014-01-02 10:00:00")
        ]
    }
    expected = DataFrame(cols, columns=["c_d", "a"])
    result = parser.read_csv(StringIO(data), usecols=usecols,
                             parse_dates=parse_dates)
    tm.assert_frame_equal(result, expected)

Source File: test_usecols.py From recruit with Apache License 2.0

6 votes

def test_usecols_with_parse_dates2(all_parsers):
    # see gh-13604
    parser = all_parsers
    data = """2008-02-07 09:40,1032.43
2008-02-07 09:50,1042.54
2008-02-07 10:00,1051.65"""

    names = ["date", "values"]
    usecols = names[:]
    parse_dates = [0]

    index = Index([Timestamp("2008-02-07 09:40"),
                   Timestamp("2008-02-07 09:50"),
                   Timestamp("2008-02-07 10:00")],
                  name="date")
    cols = {"values": [1032.43, 1042.54, 1051.65]}
    expected = DataFrame(cols, index=index)

    result = parser.read_csv(StringIO(data), parse_dates=parse_dates,
                             index_col=0, usecols=usecols,
                             header=None, names=names)
    tm.assert_frame_equal(result, expected)

Source File: test_usecols.py From recruit with Apache License 2.0

6 votes

def test_usecols_with_parse_dates3(all_parsers):
    # see gh-14792
    parser = all_parsers
    data = """a,b,c,d,e,f,g,h,i,j
2016/09/21,1,1,2,3,4,5,6,7,8"""

    usecols = list("abcdefghij")
    parse_dates = [0]

    cols = {"a": Timestamp("2016-09-21"),
            "b": [1], "c": [1], "d": [2],
            "e": [3], "f": [4], "g": [5],
            "h": [6], "i": [7], "j": [8]}
    expected = DataFrame(cols, columns=usecols)

    result = parser.read_csv(StringIO(data), usecols=usecols,
                             parse_dates=parse_dates)
    tm.assert_frame_equal(result, expected)

Source File: test_dtypes.py From recruit with Apache License 2.0

6 votes

def test_categorical_dtype_chunksize_infer_categories(all_parsers):
    # see gh-10153
    parser = all_parsers
    data = """a,b
1,a
1,b
1,b
2,c"""
    expecteds = [DataFrame({"a": [1, 1],
                            "b": Categorical(["a", "b"])}),
                 DataFrame({"a": [1, 2],
                            "b": Categorical(["b", "c"])},
                           index=[2, 3])]
    actuals = parser.read_csv(StringIO(data), dtype={"b": "category"},
                              chunksize=2)

    for actual, expected in zip(actuals, expecteds):
        tm.assert_frame_equal(actual, expected)

Source File: test_usecols.py From recruit with Apache License 2.0

6 votes

def test_usecols_with_unicode_strings(all_parsers):
    # see gh-13219
    data = """AAA,BBB,CCC,DDD
0.056674973,8,True,a
2.613230982,2,False,b
3.568935038,7,False,a"""
    parser = all_parsers

    exp_data = {
        "AAA": {
            0: 0.056674972999999997,
            1: 2.6132309819999997,
            2: 3.5689350380000002
        },
        "BBB": {0: 8, 1: 2, 2: 7}
    }
    expected = DataFrame(exp_data)

    result = parser.read_csv(StringIO(data), usecols=[u"AAA", u"BBB"])
    tm.assert_frame_equal(result, expected)

Source File: test_usecols.py From recruit with Apache License 2.0

6 votes

def test_usecols_with_single_byte_unicode_strings(all_parsers):
    # see gh-13219
    data = """A,B,C,D
0.056674973,8,True,a
2.613230982,2,False,b
3.568935038,7,False,a"""
    parser = all_parsers

    exp_data = {
        "A": {
            0: 0.056674972999999997,
            1: 2.6132309819999997,
            2: 3.5689350380000002
        },
        "B": {0: 8, 1: 2, 2: 7}
    }
    expected = DataFrame(exp_data)

    result = parser.read_csv(StringIO(data), usecols=[u"A", u"B"])
    tm.assert_frame_equal(result, expected)

Source File: test_dtypes.py From recruit with Apache License 2.0

6 votes

def test_categorical_dtype_chunksize_explicit_categories(all_parsers):
    # see gh-10153
    parser = all_parsers
    data = """a,b
1,a
1,b
1,b
2,c"""
    cats = ["a", "b", "c"]
    expecteds = [DataFrame({"a": [1, 1],
                            "b": Categorical(["a", "b"],
                                             categories=cats)}),
                 DataFrame({"a": [1, 2],
                            "b": Categorical(["b", "c"],
                                             categories=cats)},
                           index=[2, 3])]
    dtype = CategoricalDtype(cats)
    actuals = parser.read_csv(StringIO(data), dtype={"b": dtype}, chunksize=2)

    for actual, expected in zip(actuals, expecteds):
        tm.assert_frame_equal(actual, expected)

Source File: test_dtypes.py From recruit with Apache License 2.0

6 votes

def test_categorical_category_dtype(all_parsers, categories, ordered):
    parser = all_parsers
    data = """a,b
1,a
1,b
1,b
2,c"""
    expected = DataFrame({
        "a": [1, 1, 1, 2],
        "b": Categorical(["a", "b", "b", "c"],
                         categories=categories,
                         ordered=ordered)
    })

    dtype = {"b": CategoricalDtype(categories=categories,
                                   ordered=ordered)}
    result = parser.read_csv(StringIO(data), dtype=dtype)
    tm.assert_frame_equal(result, expected)

Source File: test_repr_info.py From recruit with Apache License 2.0

6 votes

def test_repr(self):
        buf = StringIO()

        # small one
        foo = repr(self.frame)
        self.frame.info(verbose=False, buf=buf)

        # even smaller
        self.frame.reindex(columns=['A']).info(verbose=False, buf=buf)
        self.frame.reindex(columns=['A', 'B']).info(verbose=False, buf=buf)

        # exhausting cases in DataFrame.info

        # columns but no index
        no_index = DataFrame(columns=[0, 1, 3])
        foo = repr(no_index)  # noqa

        # no columns or index
        self.empty.info(buf=buf)

        df = DataFrame(["a\n\r\tb"], columns=["a\n\r\td"], index=["a\n\r\tf"])
        assert "\t" not in repr(df)
        assert "\r" not in repr(df)
        assert "a\n" not in repr(df)

Source File: test_transform.py From recruit with Apache License 2.0

6 votes

def test_transform_casting():
    # 13046
    data = """
    idx     A         ID3              DATETIME
    0   B-028  b76cd912ff "2014-10-08 13:43:27"
    1   B-054  4a57ed0b02 "2014-10-08 14:26:19"
    2   B-076  1a682034f8 "2014-10-08 14:29:01"
    3   B-023  b76cd912ff "2014-10-08 18:39:34"
    4   B-023  f88g8d7sds "2014-10-08 18:40:18"
    5   B-033  b76cd912ff "2014-10-08 18:44:30"
    6   B-032  b76cd912ff "2014-10-08 18:46:00"
    7   B-037  b76cd912ff "2014-10-08 18:52:15"
    8   B-046  db959faf02 "2014-10-08 18:59:59"
    9   B-053  b76cd912ff "2014-10-08 19:17:48"
    10  B-065  b76cd912ff "2014-10-08 19:21:38"
    """
    df = pd.read_csv(StringIO(data), sep=r'\s+',
                     index_col=[0], parse_dates=['DATETIME'])

    result = df.groupby('ID3')['DATETIME'].transform(lambda x: x.diff())
    assert is_timedelta64_dtype(result.dtype)

    result = df[['ID3', 'DATETIME']].groupby('ID3').transform(
        lambda x: x.diff())
    assert is_timedelta64_dtype(result.DATETIME.dtype)

Source File: test_query_eval.py From recruit with Apache License 2.0

6 votes

def test_query_with_nested_strings(self, parser, engine):
        skip_if_no_pandas_parser(parser)
        raw = """id          event          timestamp
        1   "page 1 load"   1/1/2014 0:00:01
        1   "page 1 exit"   1/1/2014 0:00:31
        2   "page 2 load"   1/1/2014 0:01:01
        2   "page 2 exit"   1/1/2014 0:01:31
        3   "page 3 load"   1/1/2014 0:02:01
        3   "page 3 exit"   1/1/2014 0:02:31
        4   "page 1 load"   2/1/2014 1:00:01
        4   "page 1 exit"   2/1/2014 1:00:31
        5   "page 2 load"   2/1/2014 1:01:01
        5   "page 2 exit"   2/1/2014 1:01:31
        6   "page 3 load"   2/1/2014 1:02:01
        6   "page 3 exit"   2/1/2014 1:02:31
        """
        df = pd.read_csv(StringIO(raw), sep=r'\s{2,}', engine='python',
                         parse_dates=['timestamp'])
        expected = df[df.event == '"page 1 load"']
        res = df.query("""'"page 1 load"' in event""", parser=parser,
                       engine=engine)
        assert_frame_equal(expected, res)

Source File: test_loc.py From recruit with Apache License 2.0

6 votes

def test_loc_setitem_consistency_slice_column_len(self):
        # .loc[:,column] setting with slice == len of the column
        # GH10408
        data = """Level_0,,,Respondent,Respondent,Respondent,OtherCat,OtherCat
Level_1,,,Something,StartDate,EndDate,Yes/No,SomethingElse
Region,Site,RespondentID,,,,,
Region_1,Site_1,3987227376,A,5/25/2015 10:59,5/25/2015 11:22,Yes,
Region_1,Site_1,3980680971,A,5/21/2015 9:40,5/21/2015 9:52,Yes,Yes
Region_1,Site_2,3977723249,A,5/20/2015 8:27,5/20/2015 8:41,Yes,
Region_1,Site_2,3977723089,A,5/20/2015 8:33,5/20/2015 9:09,Yes,No"""

        df = pd.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1, 2])
        df.loc[:, ('Respondent', 'StartDate')] = pd.to_datetime(df.loc[:, (
            'Respondent', 'StartDate')])
        df.loc[:, ('Respondent', 'EndDate')] = pd.to_datetime(df.loc[:, (
            'Respondent', 'EndDate')])
        df.loc[:, ('Respondent', 'Duration')] = df.loc[:, (
            'Respondent', 'EndDate')] - df.loc[:, ('Respondent', 'StartDate')]

        df.loc[:, ('Respondent', 'Duration')] = df.loc[:, (
            'Respondent', 'Duration')].astype('timedelta64[s]')
        expected = Series([1380, 720, 840, 2160.], index=df.index,
                          name=('Respondent', 'Duration'))
        tm.assert_series_equal(df[('Respondent', 'Duration')], expected)

Source File: test_repr_info.py From recruit with Apache License 2.0

6 votes

def test_info_wide(self):
        from pandas import set_option, reset_option
        io = StringIO()
        df = DataFrame(np.random.randn(5, 101))
        df.info(buf=io)

        io = StringIO()
        df.info(buf=io, max_cols=101)
        rs = io.getvalue()
        assert len(rs.splitlines()) > 100
        xp = rs

        set_option('display.max_info_columns', 101)
        io = StringIO()
        df.info(buf=io)
        assert rs == xp
        reset_option('display.max_info_columns')

Source File: test_dtypes.py From recruit with Apache License 2.0

5 votes

def test_empty_dtype(all_parsers, dtype, expected):
    # see gh-14712
    parser = all_parsers
    data = "a,b"

    result = parser.read_csv(StringIO(data), header=0, dtype=dtype)
    tm.assert_frame_equal(result, expected)

Source File: test_dtypes.py From recruit with Apache License 2.0

5 votes

def test_numeric_dtype(all_parsers, dtype):
    data = "0\n1"
    parser = all_parsers
    expected = DataFrame([0, 1], dtype=dtype)

    result = parser.read_csv(StringIO(data), header=None, dtype=dtype)
    tm.assert_frame_equal(expected, result)

Source File: test_usecols.py From recruit with Apache License 2.0

5 votes

def test_callable_usecols(all_parsers, usecols, expected):
    # see gh-14154
    data = """AaA,bBb,CCC,ddd
0.056674973,8,True,a
2.613230982,2,False,b
3.568935038,7,False,a"""
    parser = all_parsers

    result = parser.read_csv(StringIO(data), usecols=usecols)
    tm.assert_frame_equal(result, expected)

Source File: test_usecols.py From recruit with Apache License 2.0

5 votes

def test_usecols(all_parsers, usecols):
    data = """\
a,b,c
1,2,3
4,5,6
7,8,9
10,11,12"""
    parser = all_parsers
    result = parser.read_csv(StringIO(data), usecols=usecols)

    expected = DataFrame([[2, 3], [5, 6], [8, 9],
                          [11, 12]], columns=["b", "c"])
    tm.assert_frame_equal(result, expected)

Source File: test_usecols.py From recruit with Apache License 2.0

5 votes

def test_usecols_with_names(all_parsers):
    data = """\
a,b,c
1,2,3
4,5,6
7,8,9
10,11,12"""
    parser = all_parsers
    names = ["foo", "bar"]
    result = parser.read_csv(StringIO(data), names=names,
                             usecols=[1, 2], header=0)

    expected = DataFrame([[2, 3], [5, 6], [8, 9],
                          [11, 12]], columns=names)
    tm.assert_frame_equal(result, expected)

Source File: test_usecols.py From recruit with Apache License 2.0

5 votes

def test_usecols_with_mixed_encoding_strings(all_parsers, usecols):
    data = """AAA,BBB,CCC,DDD
0.056674973,8,True,a
2.613230982,2,False,b
3.568935038,7,False,a"""
    parser = all_parsers

    with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
        parser.read_csv(StringIO(data), usecols=usecols)

Source File: test_usecols.py From recruit with Apache License 2.0

5 votes

def test_usecols_index_col_conflict2(all_parsers):
    # see gh-4201: test that index_col as integer reflects usecols
    parser = all_parsers
    data = "a,b,c,d\nA,a,1,one\nB,b,2,two"

    expected = DataFrame({"b": ["a", "b"], "c": [1, 2], "d": ("one", "two")})
    expected = expected.set_index(["b", "c"])

    result = parser.read_csv(StringIO(data), usecols=["b", "c", "d"],
                             index_col=["b", "c"])
    tm.assert_frame_equal(result, expected)

Source File: test_dtypes.py From recruit with Apache License 2.0

5 votes

def test_dtype_with_converters(all_parsers):
    parser = all_parsers
    data = """a,b
1.1,2.2
1.2,2.3"""

    # Dtype spec ignored if converted specified.
    with tm.assert_produces_warning(ParserWarning):
        result = parser.read_csv(StringIO(data), dtype={"a": "i8"},
                                 converters={"a": lambda x: str(x)})
    expected = DataFrame({"a": ["1.1", "1.2"], "b": [2.2, 2.3]})
    tm.assert_frame_equal(result, expected)

Source File: test_dtypes.py From recruit with Apache License 2.0

5 votes

def test_raise_on_passed_int_dtype_with_nas(all_parsers):
    # see gh-2631
    parser = all_parsers
    data = """YEAR, DOY, a
2001,106380451,10
2001,,11
2001,106380451,67"""

    msg = ("Integer column has NA values" if parser.engine == "c" else
           "Unable to convert column DOY")
    with pytest.raises(ValueError, match=msg):
        parser.read_csv(StringIO(data), dtype={"DOY": np.int64},
                        skipinitialspace=True)

Source File: test_dtypes.py From recruit with Apache License 2.0

5 votes

def test_empty_with_dup_column_pass_dtype_by_indexes_warn(all_parsers):
    # see gh-9424
    parser = all_parsers
    expected = concat([Series([], name="one", dtype="u1"),
                       Series([], name="one.1", dtype="f")], axis=1)
    expected.index = expected.index.astype(object)

    with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
        data = ""
        result = parser.read_csv(StringIO(data), names=["one", "one"],
                                 dtype={0: "u1", 1: "f"})
        tm.assert_frame_equal(result, expected)

Source File: test_dtypes.py From recruit with Apache License 2.0

5 votes

def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers):
    parser = all_parsers

    data = "one,one"
    result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"})

    expected = DataFrame({"one": np.empty(0, dtype="u1"),
                          "one.1": np.empty(0, dtype="f")},
                         index=Index([], dtype=object))
    tm.assert_frame_equal(result, expected)

Source File: test_dtypes.py From recruit with Apache License 2.0

5 votes

def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers):
    parser = all_parsers

    data = "one,one"
    result = parser.read_csv(StringIO(data), dtype={"one": "u1", "one.1": "f"})

    expected = DataFrame({"one": np.empty(0, dtype="u1"),
                          "one.1": np.empty(0, dtype="f")},
                         index=Index([], dtype=object))
    tm.assert_frame_equal(result, expected)

Source File: test_dtypes.py From recruit with Apache License 2.0

5 votes

def test_empty_with_multi_index_pass_dtype(all_parsers):
    parser = all_parsers

    data = "one,two,three"
    result = parser.read_csv(StringIO(data), index_col=["one", "two"],
                             dtype={"one": "u1", 1: "f8"})

    exp_idx = MultiIndex.from_arrays([np.empty(0, dtype="u1"),
                                      np.empty(0, dtype=np.float64)],
                                     names=["one", "two"])
    expected = DataFrame({"three": np.empty(0, dtype=np.object)},
                         index=exp_idx)
    tm.assert_frame_equal(result, expected)

Source File: test_dtypes.py From recruit with Apache License 2.0

5 votes

def test_empty_with_index_pass_dtype(all_parsers):
    parser = all_parsers

    data = "one,two"
    result = parser.read_csv(StringIO(data), index_col=["one"],
                             dtype={"one": "u1", 1: "f"})

    expected = DataFrame({"two": np.empty(0, dtype="f")},
                         index=Index([], dtype="u1", name="one"))
    tm.assert_frame_equal(result, expected)

Source File: test_dtypes.py From recruit with Apache License 2.0

5 votes

def test_empty_pass_dtype(all_parsers):
    parser = all_parsers

    data = "one,two"
    result = parser.read_csv(StringIO(data), dtype={"one": "u1"})

    expected = DataFrame({"one": np.empty(0, dtype="u1"),
                          "two": np.empty(0, dtype=np.object)},
                         index=Index([], dtype=object))
    tm.assert_frame_equal(result, expected)

Python pandas.compat.StringIO() Examples