Python pandas.compat.StringIO() Examples
The following are 30
code examples of pandas.compat.StringIO().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas.compat
, or try the search function
.
Example #1
Source File: test_usecols.py From recruit with Apache License 2.0 | 6 votes |
def test_usecols_with_multi_byte_characters(all_parsers, usecols): data = """あああ,いい,ううう,ええええ 0.056674973,8,True,a 2.613230982,2,False,b 3.568935038,7,False,a""" parser = all_parsers exp_data = { "あああ": { 0: 0.056674972999999997, 1: 2.6132309819999997, 2: 3.5689350380000002 }, "いい": {0: 8, 1: 2, 2: 7} } expected = DataFrame(exp_data) result = parser.read_csv(StringIO(data), usecols=usecols) tm.assert_frame_equal(result, expected)
Example #2
Source File: test_dtypes.py From recruit with Apache License 2.0 | 6 votes |
def test_dtype_per_column(all_parsers): parser = all_parsers data = """\ one,two 1,2.5 2,3.5 3,4.5 4,5.5""" expected = DataFrame([[1, "2.5"], [2, "3.5"], [3, "4.5"], [4, "5.5"]], columns=["one", "two"]) expected["one"] = expected["one"].astype(np.float64) expected["two"] = expected["two"].astype(object) result = parser.read_csv(StringIO(data), dtype={"one": np.float64, 1: str}) tm.assert_frame_equal(result, expected)
Example #3
Source File: test_usecols.py From recruit with Apache License 2.0 | 6 votes |
def test_usecols_with_parse_dates(all_parsers, usecols): # see gh-9755 data = """a,b,c,d,e 0,1,20140101,0900,4 0,1,20140102,1000,4""" parser = all_parsers parse_dates = [[1, 2]] cols = { "a": [0, 0], "c_d": [ Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00") ] } expected = DataFrame(cols, columns=["c_d", "a"]) result = parser.read_csv(StringIO(data), usecols=usecols, parse_dates=parse_dates) tm.assert_frame_equal(result, expected)
Example #4
Source File: test_usecols.py From recruit with Apache License 2.0 | 6 votes |
def test_usecols_with_parse_dates2(all_parsers): # see gh-13604 parser = all_parsers data = """2008-02-07 09:40,1032.43 2008-02-07 09:50,1042.54 2008-02-07 10:00,1051.65""" names = ["date", "values"] usecols = names[:] parse_dates = [0] index = Index([Timestamp("2008-02-07 09:40"), Timestamp("2008-02-07 09:50"), Timestamp("2008-02-07 10:00")], name="date") cols = {"values": [1032.43, 1042.54, 1051.65]} expected = DataFrame(cols, index=index) result = parser.read_csv(StringIO(data), parse_dates=parse_dates, index_col=0, usecols=usecols, header=None, names=names) tm.assert_frame_equal(result, expected)
Example #5
Source File: test_usecols.py From recruit with Apache License 2.0 | 6 votes |
def test_usecols_with_parse_dates3(all_parsers): # see gh-14792 parser = all_parsers data = """a,b,c,d,e,f,g,h,i,j 2016/09/21,1,1,2,3,4,5,6,7,8""" usecols = list("abcdefghij") parse_dates = [0] cols = {"a": Timestamp("2016-09-21"), "b": [1], "c": [1], "d": [2], "e": [3], "f": [4], "g": [5], "h": [6], "i": [7], "j": [8]} expected = DataFrame(cols, columns=usecols) result = parser.read_csv(StringIO(data), usecols=usecols, parse_dates=parse_dates) tm.assert_frame_equal(result, expected)
Example #6
Source File: test_dtypes.py From recruit with Apache License 2.0 | 6 votes |
def test_categorical_dtype_chunksize_infer_categories(all_parsers): # see gh-10153 parser = all_parsers data = """a,b 1,a 1,b 1,b 2,c""" expecteds = [DataFrame({"a": [1, 1], "b": Categorical(["a", "b"])}), DataFrame({"a": [1, 2], "b": Categorical(["b", "c"])}, index=[2, 3])] actuals = parser.read_csv(StringIO(data), dtype={"b": "category"}, chunksize=2) for actual, expected in zip(actuals, expecteds): tm.assert_frame_equal(actual, expected)
Example #7
Source File: test_usecols.py From recruit with Apache License 2.0 | 6 votes |
def test_usecols_with_unicode_strings(all_parsers): # see gh-13219 data = """AAA,BBB,CCC,DDD 0.056674973,8,True,a 2.613230982,2,False,b 3.568935038,7,False,a""" parser = all_parsers exp_data = { "AAA": { 0: 0.056674972999999997, 1: 2.6132309819999997, 2: 3.5689350380000002 }, "BBB": {0: 8, 1: 2, 2: 7} } expected = DataFrame(exp_data) result = parser.read_csv(StringIO(data), usecols=[u"AAA", u"BBB"]) tm.assert_frame_equal(result, expected)
Example #8
Source File: test_usecols.py From recruit with Apache License 2.0 | 6 votes |
def test_usecols_with_single_byte_unicode_strings(all_parsers): # see gh-13219 data = """A,B,C,D 0.056674973,8,True,a 2.613230982,2,False,b 3.568935038,7,False,a""" parser = all_parsers exp_data = { "A": { 0: 0.056674972999999997, 1: 2.6132309819999997, 2: 3.5689350380000002 }, "B": {0: 8, 1: 2, 2: 7} } expected = DataFrame(exp_data) result = parser.read_csv(StringIO(data), usecols=[u"A", u"B"]) tm.assert_frame_equal(result, expected)
Example #9
Source File: test_dtypes.py From recruit with Apache License 2.0 | 6 votes |
def test_categorical_dtype_chunksize_explicit_categories(all_parsers): # see gh-10153 parser = all_parsers data = """a,b 1,a 1,b 1,b 2,c""" cats = ["a", "b", "c"] expecteds = [DataFrame({"a": [1, 1], "b": Categorical(["a", "b"], categories=cats)}), DataFrame({"a": [1, 2], "b": Categorical(["b", "c"], categories=cats)}, index=[2, 3])] dtype = CategoricalDtype(cats) actuals = parser.read_csv(StringIO(data), dtype={"b": dtype}, chunksize=2) for actual, expected in zip(actuals, expecteds): tm.assert_frame_equal(actual, expected)
Example #10
Source File: test_dtypes.py From recruit with Apache License 2.0 | 6 votes |
def test_categorical_category_dtype(all_parsers, categories, ordered): parser = all_parsers data = """a,b 1,a 1,b 1,b 2,c""" expected = DataFrame({ "a": [1, 1, 1, 2], "b": Categorical(["a", "b", "b", "c"], categories=categories, ordered=ordered) }) dtype = {"b": CategoricalDtype(categories=categories, ordered=ordered)} result = parser.read_csv(StringIO(data), dtype=dtype) tm.assert_frame_equal(result, expected)
Example #11
Source File: test_repr_info.py From recruit with Apache License 2.0 | 6 votes |
def test_repr(self): buf = StringIO() # small one foo = repr(self.frame) self.frame.info(verbose=False, buf=buf) # even smaller self.frame.reindex(columns=['A']).info(verbose=False, buf=buf) self.frame.reindex(columns=['A', 'B']).info(verbose=False, buf=buf) # exhausting cases in DataFrame.info # columns but no index no_index = DataFrame(columns=[0, 1, 3]) foo = repr(no_index) # noqa # no columns or index self.empty.info(buf=buf) df = DataFrame(["a\n\r\tb"], columns=["a\n\r\td"], index=["a\n\r\tf"]) assert "\t" not in repr(df) assert "\r" not in repr(df) assert "a\n" not in repr(df)
Example #12
Source File: test_transform.py From recruit with Apache License 2.0 | 6 votes |
def test_transform_casting(): # 13046 data = """ idx A ID3 DATETIME 0 B-028 b76cd912ff "2014-10-08 13:43:27" 1 B-054 4a57ed0b02 "2014-10-08 14:26:19" 2 B-076 1a682034f8 "2014-10-08 14:29:01" 3 B-023 b76cd912ff "2014-10-08 18:39:34" 4 B-023 f88g8d7sds "2014-10-08 18:40:18" 5 B-033 b76cd912ff "2014-10-08 18:44:30" 6 B-032 b76cd912ff "2014-10-08 18:46:00" 7 B-037 b76cd912ff "2014-10-08 18:52:15" 8 B-046 db959faf02 "2014-10-08 18:59:59" 9 B-053 b76cd912ff "2014-10-08 19:17:48" 10 B-065 b76cd912ff "2014-10-08 19:21:38" """ df = pd.read_csv(StringIO(data), sep=r'\s+', index_col=[0], parse_dates=['DATETIME']) result = df.groupby('ID3')['DATETIME'].transform(lambda x: x.diff()) assert is_timedelta64_dtype(result.dtype) result = df[['ID3', 'DATETIME']].groupby('ID3').transform( lambda x: x.diff()) assert is_timedelta64_dtype(result.DATETIME.dtype)
Example #13
Source File: test_query_eval.py From recruit with Apache License 2.0 | 6 votes |
def test_query_with_nested_strings(self, parser, engine): skip_if_no_pandas_parser(parser) raw = """id event timestamp 1 "page 1 load" 1/1/2014 0:00:01 1 "page 1 exit" 1/1/2014 0:00:31 2 "page 2 load" 1/1/2014 0:01:01 2 "page 2 exit" 1/1/2014 0:01:31 3 "page 3 load" 1/1/2014 0:02:01 3 "page 3 exit" 1/1/2014 0:02:31 4 "page 1 load" 2/1/2014 1:00:01 4 "page 1 exit" 2/1/2014 1:00:31 5 "page 2 load" 2/1/2014 1:01:01 5 "page 2 exit" 2/1/2014 1:01:31 6 "page 3 load" 2/1/2014 1:02:01 6 "page 3 exit" 2/1/2014 1:02:31 """ df = pd.read_csv(StringIO(raw), sep=r'\s{2,}', engine='python', parse_dates=['timestamp']) expected = df[df.event == '"page 1 load"'] res = df.query("""'"page 1 load"' in event""", parser=parser, engine=engine) assert_frame_equal(expected, res)
Example #14
Source File: test_loc.py From recruit with Apache License 2.0 | 6 votes |
def test_loc_setitem_consistency_slice_column_len(self): # .loc[:,column] setting with slice == len of the column # GH10408 data = """Level_0,,,Respondent,Respondent,Respondent,OtherCat,OtherCat Level_1,,,Something,StartDate,EndDate,Yes/No,SomethingElse Region,Site,RespondentID,,,,, Region_1,Site_1,3987227376,A,5/25/2015 10:59,5/25/2015 11:22,Yes, Region_1,Site_1,3980680971,A,5/21/2015 9:40,5/21/2015 9:52,Yes,Yes Region_1,Site_2,3977723249,A,5/20/2015 8:27,5/20/2015 8:41,Yes, Region_1,Site_2,3977723089,A,5/20/2015 8:33,5/20/2015 9:09,Yes,No""" df = pd.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1, 2]) df.loc[:, ('Respondent', 'StartDate')] = pd.to_datetime(df.loc[:, ( 'Respondent', 'StartDate')]) df.loc[:, ('Respondent', 'EndDate')] = pd.to_datetime(df.loc[:, ( 'Respondent', 'EndDate')]) df.loc[:, ('Respondent', 'Duration')] = df.loc[:, ( 'Respondent', 'EndDate')] - df.loc[:, ('Respondent', 'StartDate')] df.loc[:, ('Respondent', 'Duration')] = df.loc[:, ( 'Respondent', 'Duration')].astype('timedelta64[s]') expected = Series([1380, 720, 840, 2160.], index=df.index, name=('Respondent', 'Duration')) tm.assert_series_equal(df[('Respondent', 'Duration')], expected)
Example #15
Source File: test_repr_info.py From recruit with Apache License 2.0 | 6 votes |
def test_info_wide(self): from pandas import set_option, reset_option io = StringIO() df = DataFrame(np.random.randn(5, 101)) df.info(buf=io) io = StringIO() df.info(buf=io, max_cols=101) rs = io.getvalue() assert len(rs.splitlines()) > 100 xp = rs set_option('display.max_info_columns', 101) io = StringIO() df.info(buf=io) assert rs == xp reset_option('display.max_info_columns')
Example #16
Source File: test_dtypes.py From recruit with Apache License 2.0 | 5 votes |
def test_empty_dtype(all_parsers, dtype, expected): # see gh-14712 parser = all_parsers data = "a,b" result = parser.read_csv(StringIO(data), header=0, dtype=dtype) tm.assert_frame_equal(result, expected)
Example #17
Source File: test_dtypes.py From recruit with Apache License 2.0 | 5 votes |
def test_numeric_dtype(all_parsers, dtype): data = "0\n1" parser = all_parsers expected = DataFrame([0, 1], dtype=dtype) result = parser.read_csv(StringIO(data), header=None, dtype=dtype) tm.assert_frame_equal(expected, result)
Example #18
Source File: test_usecols.py From recruit with Apache License 2.0 | 5 votes |
def test_callable_usecols(all_parsers, usecols, expected): # see gh-14154 data = """AaA,bBb,CCC,ddd 0.056674973,8,True,a 2.613230982,2,False,b 3.568935038,7,False,a""" parser = all_parsers result = parser.read_csv(StringIO(data), usecols=usecols) tm.assert_frame_equal(result, expected)
Example #19
Source File: test_usecols.py From recruit with Apache License 2.0 | 5 votes |
def test_usecols(all_parsers, usecols): data = """\ a,b,c 1,2,3 4,5,6 7,8,9 10,11,12""" parser = all_parsers result = parser.read_csv(StringIO(data), usecols=usecols) expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=["b", "c"]) tm.assert_frame_equal(result, expected)
Example #20
Source File: test_usecols.py From recruit with Apache License 2.0 | 5 votes |
def test_usecols_with_names(all_parsers): data = """\ a,b,c 1,2,3 4,5,6 7,8,9 10,11,12""" parser = all_parsers names = ["foo", "bar"] result = parser.read_csv(StringIO(data), names=names, usecols=[1, 2], header=0) expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=names) tm.assert_frame_equal(result, expected)
Example #21
Source File: test_usecols.py From recruit with Apache License 2.0 | 5 votes |
def test_usecols_with_mixed_encoding_strings(all_parsers, usecols): data = """AAA,BBB,CCC,DDD 0.056674973,8,True,a 2.613230982,2,False,b 3.568935038,7,False,a""" parser = all_parsers with pytest.raises(ValueError, match=_msg_validate_usecols_arg): parser.read_csv(StringIO(data), usecols=usecols)
Example #22
Source File: test_usecols.py From recruit with Apache License 2.0 | 5 votes |
def test_usecols_index_col_conflict2(all_parsers): # see gh-4201: test that index_col as integer reflects usecols parser = all_parsers data = "a,b,c,d\nA,a,1,one\nB,b,2,two" expected = DataFrame({"b": ["a", "b"], "c": [1, 2], "d": ("one", "two")}) expected = expected.set_index(["b", "c"]) result = parser.read_csv(StringIO(data), usecols=["b", "c", "d"], index_col=["b", "c"]) tm.assert_frame_equal(result, expected)
Example #23
Source File: test_dtypes.py From recruit with Apache License 2.0 | 5 votes |
def test_dtype_with_converters(all_parsers): parser = all_parsers data = """a,b 1.1,2.2 1.2,2.3""" # Dtype spec ignored if converted specified. with tm.assert_produces_warning(ParserWarning): result = parser.read_csv(StringIO(data), dtype={"a": "i8"}, converters={"a": lambda x: str(x)}) expected = DataFrame({"a": ["1.1", "1.2"], "b": [2.2, 2.3]}) tm.assert_frame_equal(result, expected)
Example #24
Source File: test_dtypes.py From recruit with Apache License 2.0 | 5 votes |
def test_raise_on_passed_int_dtype_with_nas(all_parsers): # see gh-2631 parser = all_parsers data = """YEAR, DOY, a 2001,106380451,10 2001,,11 2001,106380451,67""" msg = ("Integer column has NA values" if parser.engine == "c" else "Unable to convert column DOY") with pytest.raises(ValueError, match=msg): parser.read_csv(StringIO(data), dtype={"DOY": np.int64}, skipinitialspace=True)
Example #25
Source File: test_dtypes.py From recruit with Apache License 2.0 | 5 votes |
def test_empty_with_dup_column_pass_dtype_by_indexes_warn(all_parsers): # see gh-9424 parser = all_parsers expected = concat([Series([], name="one", dtype="u1"), Series([], name="one.1", dtype="f")], axis=1) expected.index = expected.index.astype(object) with tm.assert_produces_warning(UserWarning, check_stacklevel=False): data = "" result = parser.read_csv(StringIO(data), names=["one", "one"], dtype={0: "u1", 1: "f"}) tm.assert_frame_equal(result, expected)
Example #26
Source File: test_dtypes.py From recruit with Apache License 2.0 | 5 votes |
def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers): parser = all_parsers data = "one,one" result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"}) expected = DataFrame({"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")}, index=Index([], dtype=object)) tm.assert_frame_equal(result, expected)
Example #27
Source File: test_dtypes.py From recruit with Apache License 2.0 | 5 votes |
def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers): parser = all_parsers data = "one,one" result = parser.read_csv(StringIO(data), dtype={"one": "u1", "one.1": "f"}) expected = DataFrame({"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")}, index=Index([], dtype=object)) tm.assert_frame_equal(result, expected)
Example #28
Source File: test_dtypes.py From recruit with Apache License 2.0 | 5 votes |
def test_empty_with_multi_index_pass_dtype(all_parsers): parser = all_parsers data = "one,two,three" result = parser.read_csv(StringIO(data), index_col=["one", "two"], dtype={"one": "u1", 1: "f8"}) exp_idx = MultiIndex.from_arrays([np.empty(0, dtype="u1"), np.empty(0, dtype=np.float64)], names=["one", "two"]) expected = DataFrame({"three": np.empty(0, dtype=np.object)}, index=exp_idx) tm.assert_frame_equal(result, expected)
Example #29
Source File: test_dtypes.py From recruit with Apache License 2.0 | 5 votes |
def test_empty_with_index_pass_dtype(all_parsers): parser = all_parsers data = "one,two" result = parser.read_csv(StringIO(data), index_col=["one"], dtype={"one": "u1", 1: "f"}) expected = DataFrame({"two": np.empty(0, dtype="f")}, index=Index([], dtype="u1", name="one")) tm.assert_frame_equal(result, expected)
Example #30
Source File: test_dtypes.py From recruit with Apache License 2.0 | 5 votes |
def test_empty_pass_dtype(all_parsers): parser = all_parsers data = "one,two" result = parser.read_csv(StringIO(data), dtype={"one": "u1"}) expected = DataFrame({"one": np.empty(0, dtype="u1"), "two": np.empty(0, dtype=np.object)}, index=Index([], dtype=object)) tm.assert_frame_equal(result, expected)