Python pandas.wide_to_long() Examples
The following are 30
code examples of pandas.wide_to_long().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: test_melt.py From coffeegrindsize with MIT License | 6 votes |
def test_invalid_suffixtype(self): # If all stubs names end with a string, but a numeric suffix is # assumed, an empty data frame is returned df = pd.DataFrame({'Aone': [1.0, 2.0], 'Atwo': [3.0, 4.0], 'Bone': [5.0, 6.0], 'X': ['X1', 'X2']}) df['id'] = df.index exp_data = {'X': '', 'Aone': [], 'Atwo': [], 'Bone': [], 'id': [], 'year': [], 'A': [], 'B': []} expected = pd.DataFrame(exp_data).astype({'year': 'int'}) expected = expected.set_index(['id', 'year']) expected.index.set_levels([0, 1], level=0, inplace=True) result = wide_to_long(df, ['A', 'B'], i='id', j='year') tm.assert_frame_equal(result.sort_index(axis=1), expected.sort_index(axis=1))
Example #2
Source File: test_melt.py From coffeegrindsize with MIT License | 6 votes |
def test_float_suffix(self): df = pd.DataFrame({ 'treatment_1.1': [1.0, 2.0], 'treatment_2.1': [3.0, 4.0], 'result_1.2': [5.0, 6.0], 'result_1': [0, 9], 'A': ['X1', 'X2']}) expected = pd.DataFrame({ 'A': ['X1', 'X1', 'X1', 'X1', 'X2', 'X2', 'X2', 'X2'], 'colname': [1, 1.1, 1.2, 2.1, 1, 1.1, 1.2, 2.1], 'result': [0.0, np.nan, 5.0, np.nan, 9.0, np.nan, 6.0, np.nan], 'treatment': [np.nan, 1.0, np.nan, 3.0, np.nan, 2.0, np.nan, 4.0]}) expected = expected.set_index(['A', 'colname']) result = wide_to_long(df, ['result', 'treatment'], i='A', j='colname', suffix='[0-9.]+', sep='_') tm.assert_frame_equal(result, expected)
Example #3
Source File: test_melt.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_unbalanced(self): # test that we can have a varying amount of time variables df = pd.DataFrame({'A2010': [1.0, 2.0], 'A2011': [3.0, 4.0], 'B2010': [5.0, 6.0], 'X': ['X1', 'X2']}) df['id'] = df.index exp_data = {'X': ['X1', 'X1', 'X2', 'X2'], 'A': [1.0, 3.0, 2.0, 4.0], 'B': [5.0, np.nan, 6.0, np.nan], 'id': [0, 0, 1, 1], 'year': [2010, 2011, 2010, 2011]} expected = pd.DataFrame(exp_data) expected = expected.set_index(['id', 'year'])[["X", "A", "B"]] result = wide_to_long(df, ['A', 'B'], i='id', j='year') tm.assert_frame_equal(result, expected)
Example #4
Source File: test_melt.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_invalid_separator(self): # if an invalid separator is supplied a empty data frame is returned sep = 'nope!' df = pd.DataFrame({'A2010': [1.0, 2.0], 'A2011': [3.0, 4.0], 'B2010': [5.0, 6.0], 'X': ['X1', 'X2']}) df['id'] = df.index exp_data = {'X': '', 'A2010': [], 'A2011': [], 'B2010': [], 'id': [], 'year': [], 'A': [], 'B': []} expected = pd.DataFrame(exp_data).astype({'year': 'int'}) expected = expected.set_index(['id', 'year'])[[ 'X', 'A2010', 'A2011', 'B2010', 'A', 'B']] expected.index.set_levels([0, 1], level=0, inplace=True) result = wide_to_long(df, ['A', 'B'], i='id', j='year', sep=sep) tm.assert_frame_equal(result.sort_index(axis=1), expected.sort_index(axis=1))
Example #5
Source File: test_melt.py From recruit with Apache License 2.0 | 6 votes |
def test_unbalanced(self): # test that we can have a varying amount of time variables df = pd.DataFrame({'A2010': [1.0, 2.0], 'A2011': [3.0, 4.0], 'B2010': [5.0, 6.0], 'X': ['X1', 'X2']}) df['id'] = df.index exp_data = {'X': ['X1', 'X1', 'X2', 'X2'], 'A': [1.0, 3.0, 2.0, 4.0], 'B': [5.0, np.nan, 6.0, np.nan], 'id': [0, 0, 1, 1], 'year': [2010, 2011, 2010, 2011]} expected = pd.DataFrame(exp_data) expected = expected.set_index(['id', 'year'])[["X", "A", "B"]] result = wide_to_long(df, ['A', 'B'], i='id', j='year') tm.assert_frame_equal(result, expected)
Example #6
Source File: test_melt.py From recruit with Apache License 2.0 | 6 votes |
def test_invalid_separator(self): # if an invalid separator is supplied a empty data frame is returned sep = 'nope!' df = pd.DataFrame({'A2010': [1.0, 2.0], 'A2011': [3.0, 4.0], 'B2010': [5.0, 6.0], 'X': ['X1', 'X2']}) df['id'] = df.index exp_data = {'X': '', 'A2010': [], 'A2011': [], 'B2010': [], 'id': [], 'year': [], 'A': [], 'B': []} expected = pd.DataFrame(exp_data).astype({'year': 'int'}) expected = expected.set_index(['id', 'year'])[[ 'X', 'A2010', 'A2011', 'B2010', 'A', 'B']] expected.index.set_levels([0, 1], level=0, inplace=True) result = wide_to_long(df, ['A', 'B'], i='id', j='year', sep=sep) tm.assert_frame_equal(result.sort_index(axis=1), expected.sort_index(axis=1))
Example #7
Source File: test_melt.py From recruit with Apache License 2.0 | 6 votes |
def test_col_substring_of_stubname(self): # GH22468 # Don't raise ValueError when a column name is a substring # of a stubname that's been passed as a string wide_data = {'node_id': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4}, 'A': {0: 0.80, 1: 0.0, 2: 0.25, 3: 1.0, 4: 0.81}, 'PA0': {0: 0.74, 1: 0.56, 2: 0.56, 3: 0.98, 4: 0.6}, 'PA1': {0: 0.77, 1: 0.64, 2: 0.52, 3: 0.98, 4: 0.67}, 'PA3': {0: 0.34, 1: 0.70, 2: 0.52, 3: 0.98, 4: 0.67} } wide_df = pd.DataFrame.from_dict(wide_data) expected = pd.wide_to_long(wide_df, stubnames=['PA'], i=['node_id', 'A'], j='time') result = pd.wide_to_long(wide_df, stubnames='PA', i=['node_id', 'A'], j='time') tm.assert_frame_equal(result, expected)
Example #8
Source File: test_melt.py From coffeegrindsize with MIT License | 6 votes |
def test_col_substring_of_stubname(self): # GH22468 # Don't raise ValueError when a column name is a substring # of a stubname that's been passed as a string wide_data = {'node_id': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4}, 'A': {0: 0.80, 1: 0.0, 2: 0.25, 3: 1.0, 4: 0.81}, 'PA0': {0: 0.74, 1: 0.56, 2: 0.56, 3: 0.98, 4: 0.6}, 'PA1': {0: 0.77, 1: 0.64, 2: 0.52, 3: 0.98, 4: 0.67}, 'PA3': {0: 0.34, 1: 0.70, 2: 0.52, 3: 0.98, 4: 0.67} } wide_df = pd.DataFrame.from_dict(wide_data) expected = pd.wide_to_long(wide_df, stubnames=['PA'], i=['node_id', 'A'], j='time') result = pd.wide_to_long(wide_df, stubnames='PA', i=['node_id', 'A'], j='time') tm.assert_frame_equal(result, expected)
Example #9
Source File: test_melt.py From vnpy_crypto with MIT License | 6 votes |
def test_unbalanced(self): # test that we can have a varying amount of time variables df = pd.DataFrame({'A2010': [1.0, 2.0], 'A2011': [3.0, 4.0], 'B2010': [5.0, 6.0], 'X': ['X1', 'X2']}) df['id'] = df.index exp_data = {'X': ['X1', 'X1', 'X2', 'X2'], 'A': [1.0, 3.0, 2.0, 4.0], 'B': [5.0, np.nan, 6.0, np.nan], 'id': [0, 0, 1, 1], 'year': [2010, 2011, 2010, 2011]} expected = pd.DataFrame(exp_data) expected = expected.set_index(['id', 'year'])[["X", "A", "B"]] result = wide_to_long(df, ['A', 'B'], i='id', j='year') tm.assert_frame_equal(result, expected)
Example #10
Source File: test_melt.py From recruit with Apache License 2.0 | 6 votes |
def test_invalid_suffixtype(self): # If all stubs names end with a string, but a numeric suffix is # assumed, an empty data frame is returned df = pd.DataFrame({'Aone': [1.0, 2.0], 'Atwo': [3.0, 4.0], 'Bone': [5.0, 6.0], 'X': ['X1', 'X2']}) df['id'] = df.index exp_data = {'X': '', 'Aone': [], 'Atwo': [], 'Bone': [], 'id': [], 'year': [], 'A': [], 'B': []} expected = pd.DataFrame(exp_data).astype({'year': 'int'}) expected = expected.set_index(['id', 'year']) expected.index.set_levels([0, 1], level=0, inplace=True) result = wide_to_long(df, ['A', 'B'], i='id', j='year') tm.assert_frame_equal(result.sort_index(axis=1), expected.sort_index(axis=1))
Example #11
Source File: test_melt.py From recruit with Apache License 2.0 | 6 votes |
def test_multiple_id_columns(self): # Taken from http://www.ats.ucla.edu/stat/stata/modules/reshapel.htm df = pd.DataFrame({ 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3], 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3], 'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], 'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9] }) expected = pd.DataFrame({ 'ht': [2.8, 3.4, 2.9, 3.8, 2.2, 2.9, 2.0, 3.2, 1.8, 2.8, 1.9, 2.4, 2.2, 3.3, 2.3, 3.4, 2.1, 2.9], 'famid': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3], 'birth': [1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3], 'age': [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2] }) expected = expected.set_index(['famid', 'birth', 'age'])[['ht']] result = wide_to_long(df, 'ht', i=['famid', 'birth'], j='age') tm.assert_frame_equal(result, expected)
Example #12
Source File: test_melt.py From recruit with Apache License 2.0 | 6 votes |
def test_float_suffix(self): df = pd.DataFrame({ 'treatment_1.1': [1.0, 2.0], 'treatment_2.1': [3.0, 4.0], 'result_1.2': [5.0, 6.0], 'result_1': [0, 9], 'A': ['X1', 'X2']}) expected = pd.DataFrame({ 'A': ['X1', 'X1', 'X1', 'X1', 'X2', 'X2', 'X2', 'X2'], 'colname': [1, 1.1, 1.2, 2.1, 1, 1.1, 1.2, 2.1], 'result': [0.0, np.nan, 5.0, np.nan, 9.0, np.nan, 6.0, np.nan], 'treatment': [np.nan, 1.0, np.nan, 3.0, np.nan, 2.0, np.nan, 4.0]}) expected = expected.set_index(['A', 'colname']) result = wide_to_long(df, ['result', 'treatment'], i='A', j='colname', suffix='[0-9.]+', sep='_') tm.assert_frame_equal(result, expected)
Example #13
Source File: test_melt.py From vnpy_crypto with MIT License | 6 votes |
def test_invalid_separator(self): # if an invalid separator is supplied a empty data frame is returned sep = 'nope!' df = pd.DataFrame({'A2010': [1.0, 2.0], 'A2011': [3.0, 4.0], 'B2010': [5.0, 6.0], 'X': ['X1', 'X2']}) df['id'] = df.index exp_data = {'X': '', 'A2010': [], 'A2011': [], 'B2010': [], 'id': [], 'year': [], 'A': [], 'B': []} expected = pd.DataFrame(exp_data).astype({'year': 'int'}) expected = expected.set_index(['id', 'year'])[[ 'X', 'A2010', 'A2011', 'B2010', 'A', 'B']] expected.index.set_levels([0, 1], level=0, inplace=True) result = wide_to_long(df, ['A', 'B'], i='id', j='year', sep=sep) tm.assert_frame_equal(result.sort_index(axis=1), expected.sort_index(axis=1))
Example #14
Source File: test_melt.py From vnpy_crypto with MIT License | 6 votes |
def test_invalid_suffixtype(self): # If all stubs names end with a string, but a numeric suffix is # assumed, an empty data frame is returned df = pd.DataFrame({'Aone': [1.0, 2.0], 'Atwo': [3.0, 4.0], 'Bone': [5.0, 6.0], 'X': ['X1', 'X2']}) df['id'] = df.index exp_data = {'X': '', 'Aone': [], 'Atwo': [], 'Bone': [], 'id': [], 'year': [], 'A': [], 'B': []} expected = pd.DataFrame(exp_data).astype({'year': 'int'}) expected = expected.set_index(['id', 'year']) expected.index.set_levels([0, 1], level=0, inplace=True) result = wide_to_long(df, ['A', 'B'], i='id', j='year') tm.assert_frame_equal(result.sort_index(axis=1), expected.sort_index(axis=1))
Example #15
Source File: test_melt.py From vnpy_crypto with MIT License | 6 votes |
def test_multiple_id_columns(self): # Taken from http://www.ats.ucla.edu/stat/stata/modules/reshapel.htm df = pd.DataFrame({ 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3], 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3], 'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], 'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9] }) expected = pd.DataFrame({ 'ht': [2.8, 3.4, 2.9, 3.8, 2.2, 2.9, 2.0, 3.2, 1.8, 2.8, 1.9, 2.4, 2.2, 3.3, 2.3, 3.4, 2.1, 2.9], 'famid': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3], 'birth': [1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3], 'age': [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2] }) expected = expected.set_index(['famid', 'birth', 'age'])[['ht']] result = wide_to_long(df, 'ht', i=['famid', 'birth'], j='age') tm.assert_frame_equal(result, expected)
Example #16
Source File: test_melt.py From coffeegrindsize with MIT License | 6 votes |
def test_multiple_id_columns(self): # Taken from http://www.ats.ucla.edu/stat/stata/modules/reshapel.htm df = pd.DataFrame({ 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3], 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3], 'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], 'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9] }) expected = pd.DataFrame({ 'ht': [2.8, 3.4, 2.9, 3.8, 2.2, 2.9, 2.0, 3.2, 1.8, 2.8, 1.9, 2.4, 2.2, 3.3, 2.3, 3.4, 2.1, 2.9], 'famid': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3], 'birth': [1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3], 'age': [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2] }) expected = expected.set_index(['famid', 'birth', 'age'])[['ht']] result = wide_to_long(df, 'ht', i=['famid', 'birth'], j='age') tm.assert_frame_equal(result, expected)
Example #17
Source File: test_melt.py From coffeegrindsize with MIT License | 6 votes |
def test_invalid_separator(self): # if an invalid separator is supplied a empty data frame is returned sep = 'nope!' df = pd.DataFrame({'A2010': [1.0, 2.0], 'A2011': [3.0, 4.0], 'B2010': [5.0, 6.0], 'X': ['X1', 'X2']}) df['id'] = df.index exp_data = {'X': '', 'A2010': [], 'A2011': [], 'B2010': [], 'id': [], 'year': [], 'A': [], 'B': []} expected = pd.DataFrame(exp_data).astype({'year': 'int'}) expected = expected.set_index(['id', 'year'])[[ 'X', 'A2010', 'A2011', 'B2010', 'A', 'B']] expected.index.set_levels([0, 1], level=0, inplace=True) result = wide_to_long(df, ['A', 'B'], i='id', j='year', sep=sep) tm.assert_frame_equal(result.sort_index(axis=1), expected.sort_index(axis=1))
Example #18
Source File: test_melt.py From vnpy_crypto with MIT License | 6 votes |
def test_float_suffix(self): df = pd.DataFrame({ 'treatment_1.1': [1.0, 2.0], 'treatment_2.1': [3.0, 4.0], 'result_1.2': [5.0, 6.0], 'result_1': [0, 9], 'A': ['X1', 'X2']}) expected = pd.DataFrame({ 'A': ['X1', 'X1', 'X1', 'X1', 'X2', 'X2', 'X2', 'X2'], 'colname': [1, 1.1, 1.2, 2.1, 1, 1.1, 1.2, 2.1], 'result': [0.0, np.nan, 5.0, np.nan, 9.0, np.nan, 6.0, np.nan], 'treatment': [np.nan, 1.0, np.nan, 3.0, np.nan, 2.0, np.nan, 4.0]}) expected = expected.set_index(['A', 'colname']) result = wide_to_long(df, ['result', 'treatment'], i='A', j='colname', suffix='[0-9.]+', sep='_') tm.assert_frame_equal(result, expected)
Example #19
Source File: test_subclass.py From vnpy_crypto with MIT License | 6 votes |
def test_subclassed_wide_to_long(self): # GH 9762 np.random.seed(123) x = np.random.randn(3) df = tm.SubclassedDataFrame({ "A1970": {0: "a", 1: "b", 2: "c"}, "A1980": {0: "d", 1: "e", 2: "f"}, "B1970": {0: 2.5, 1: 1.2, 2: .7}, "B1980": {0: 3.2, 1: 1.3, 2: .1}, "X": dict(zip(range(3), x))}) df["id"] = df.index exp_data = {"X": x.tolist() + x.tolist(), "A": ['a', 'b', 'c', 'd', 'e', 'f'], "B": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1], "year": [1970, 1970, 1970, 1980, 1980, 1980], "id": [0, 1, 2, 0, 1, 2]} expected = tm.SubclassedDataFrame(exp_data) expected = expected.set_index(['id', 'year'])[["X", "A", "B"]] long_frame = pd.wide_to_long(df, ["A", "B"], i="id", j="year") tm.assert_frame_equal(long_frame, expected)
Example #20
Source File: test_melt.py From coffeegrindsize with MIT License | 6 votes |
def test_unbalanced(self): # test that we can have a varying amount of time variables df = pd.DataFrame({'A2010': [1.0, 2.0], 'A2011': [3.0, 4.0], 'B2010': [5.0, 6.0], 'X': ['X1', 'X2']}) df['id'] = df.index exp_data = {'X': ['X1', 'X1', 'X2', 'X2'], 'A': [1.0, 3.0, 2.0, 4.0], 'B': [5.0, np.nan, 6.0, np.nan], 'id': [0, 0, 1, 1], 'year': [2010, 2011, 2010, 2011]} expected = pd.DataFrame(exp_data) expected = expected.set_index(['id', 'year'])[["X", "A", "B"]] result = wide_to_long(df, ['A', 'B'], i='id', j='year') tm.assert_frame_equal(result, expected)
Example #21
Source File: test_melt.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_unbalanced(self): # test that we can have a varying amount of time variables df = pd.DataFrame({'A2010': [1.0, 2.0], 'A2011': [3.0, 4.0], 'B2010': [5.0, 6.0], 'X': ['X1', 'X2']}) df['id'] = df.index exp_data = {'X': ['X1', 'X1', 'X2', 'X2'], 'A': [1.0, 3.0, 2.0, 4.0], 'B': [5.0, np.nan, 6.0, np.nan], 'id': [0, 0, 1, 1], 'year': [2010, 2011, 2010, 2011]} expected = pd.DataFrame(exp_data) expected = expected.set_index(['id', 'year'])[["X", "A", "B"]] result = wide_to_long(df, ['A', 'B'], i='id', j='year') tm.assert_frame_equal(result, expected)
Example #22
Source File: test_melt.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_invalid_separator(self): # if an invalid separator is supplied a empty data frame is returned sep = 'nope!' df = pd.DataFrame({'A2010': [1.0, 2.0], 'A2011': [3.0, 4.0], 'B2010': [5.0, 6.0], 'X': ['X1', 'X2']}) df['id'] = df.index exp_data = {'X': '', 'A2010': [], 'A2011': [], 'B2010': [], 'id': [], 'year': [], 'A': [], 'B': []} expected = pd.DataFrame(exp_data).astype({'year': 'int'}) expected = expected.set_index(['id', 'year'])[[ 'X', 'A2010', 'A2011', 'B2010', 'A', 'B']] expected.index.set_levels([0, 1], level=0, inplace=True) result = wide_to_long(df, ['A', 'B'], i='id', j='year', sep=sep) tm.assert_frame_equal(result.sort_index(axis=1), expected.sort_index(axis=1))
Example #23
Source File: test_melt.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_invalid_suffixtype(self): # If all stubs names end with a string, but a numeric suffix is # assumed, an empty data frame is returned df = pd.DataFrame({'Aone': [1.0, 2.0], 'Atwo': [3.0, 4.0], 'Bone': [5.0, 6.0], 'X': ['X1', 'X2']}) df['id'] = df.index exp_data = {'X': '', 'Aone': [], 'Atwo': [], 'Bone': [], 'id': [], 'year': [], 'A': [], 'B': []} expected = pd.DataFrame(exp_data).astype({'year': 'int'}) expected = expected.set_index(['id', 'year']) expected.index.set_levels([0, 1], level=0, inplace=True) result = wide_to_long(df, ['A', 'B'], i='id', j='year') tm.assert_frame_equal(result.sort_index(axis=1), expected.sort_index(axis=1))
Example #24
Source File: test_melt.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_multiple_id_columns(self): # Taken from http://www.ats.ucla.edu/stat/stata/modules/reshapel.htm df = pd.DataFrame({ 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3], 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3], 'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], 'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9] }) expected = pd.DataFrame({ 'ht': [2.8, 3.4, 2.9, 3.8, 2.2, 2.9, 2.0, 3.2, 1.8, 2.8, 1.9, 2.4, 2.2, 3.3, 2.3, 3.4, 2.1, 2.9], 'famid': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3], 'birth': [1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3], 'age': [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2] }) expected = expected.set_index(['famid', 'birth', 'age'])[['ht']] result = wide_to_long(df, 'ht', i=['famid', 'birth'], j='age') tm.assert_frame_equal(result, expected)
Example #25
Source File: test_reshape.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def test_multiple_id_columns(self): # Taken from http://www.ats.ucla.edu/stat/stata/modules/reshapel.htm df = pd.DataFrame({ 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3], 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3], 'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], 'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9] }) exp_frame = pd.DataFrame({ 'ht': [2.8, 3.4, 2.9, 3.8, 2.2, 2.9, 2.0, 3.2, 1.8, 2.8, 1.9, 2.4, 2.2, 3.3, 2.3, 3.4, 2.1, 2.9], 'famid': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3], 'birth': [1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3], 'age': ['1', '2', '1', '2', '1', '2', '1', '2', '1', '2', '1', '2', '1', '2', '1', '2', '1', '2'] }) exp_frame = exp_frame.set_index(['famid', 'birth', 'age'])[['ht']] long_frame = wide_to_long(df, 'ht', i=['famid', 'birth'], j='age') tm.assert_frame_equal(long_frame, exp_frame)
Example #26
Source File: test_melt.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_float_suffix(self): df = pd.DataFrame({ 'treatment_1.1': [1.0, 2.0], 'treatment_2.1': [3.0, 4.0], 'result_1.2': [5.0, 6.0], 'result_1': [0, 9], 'A': ['X1', 'X2']}) expected = pd.DataFrame({ 'A': ['X1', 'X1', 'X1', 'X1', 'X2', 'X2', 'X2', 'X2'], 'colname': [1, 1.1, 1.2, 2.1, 1, 1.1, 1.2, 2.1], 'result': [0.0, np.nan, 5.0, np.nan, 9.0, np.nan, 6.0, np.nan], 'treatment': [np.nan, 1.0, np.nan, 3.0, np.nan, 2.0, np.nan, 4.0]}) expected = expected.set_index(['A', 'colname']) result = wide_to_long(df, ['result', 'treatment'], i='A', j='colname', suffix='[0-9.]+', sep='_') tm.assert_frame_equal(result, expected)
Example #27
Source File: test_melt.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_col_substring_of_stubname(self): # GH22468 # Don't raise ValueError when a column name is a substring # of a stubname that's been passed as a string wide_data = {'node_id': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4}, 'A': {0: 0.80, 1: 0.0, 2: 0.25, 3: 1.0, 4: 0.81}, 'PA0': {0: 0.74, 1: 0.56, 2: 0.56, 3: 0.98, 4: 0.6}, 'PA1': {0: 0.77, 1: 0.64, 2: 0.52, 3: 0.98, 4: 0.67}, 'PA3': {0: 0.34, 1: 0.70, 2: 0.52, 3: 0.98, 4: 0.67} } wide_df = pd.DataFrame.from_dict(wide_data) expected = pd.wide_to_long(wide_df, stubnames=['PA'], i=['node_id', 'A'], j='time') result = pd.wide_to_long(wide_df, stubnames='PA', i=['node_id', 'A'], j='time') tm.assert_frame_equal(result, expected)
Example #28
Source File: test_reshape.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def test_invalid_suffixtype(self): # If all stubs names end with a string, but a numeric suffix is # assumed, an empty data frame is returned df = pd.DataFrame({'Aone': [1.0, 2.0], 'Atwo': [3.0, 4.0], 'Bone': [5.0, 6.0], 'X': ['X1', 'X2']}) df['id'] = df.index exp_data = {'X': '', 'Aone': [], 'Atwo': [], 'Bone': [], 'id': [], 'year': [], 'A': [], 'B': []} exp_frame = pd.DataFrame(exp_data) exp_frame = exp_frame.set_index(['id', 'year'])[[ 'X', 'Aone', 'Atwo', 'Bone', 'A', 'B']] exp_frame.index.set_levels([[0, 1], []], inplace=True) long_frame = wide_to_long(df, ['A', 'B'], i='id', j='year') tm.assert_frame_equal(long_frame.sort_index(axis=1), exp_frame.sort_index(axis=1))
Example #29
Source File: test_reshape.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def test_invalid_separator(self): # if an invalid separator is supplied a empty data frame is returned sep = 'nope!' df = pd.DataFrame({'A2010': [1.0, 2.0], 'A2011': [3.0, 4.0], 'B2010': [5.0, 6.0], 'X': ['X1', 'X2']}) df['id'] = df.index exp_data = {'X': '', 'A2010': [], 'A2011': [], 'B2010': [], 'id': [], 'year': [], 'A': [], 'B': []} exp_frame = pd.DataFrame(exp_data) exp_frame = exp_frame.set_index(['id', 'year'])[[ 'X', 'A2010', 'A2011', 'B2010', 'A', 'B']] exp_frame.index.set_levels([[0, 1], []], inplace=True) long_frame = wide_to_long(df, ['A', 'B'], i='id', j='year', sep=sep) tm.assert_frame_equal(long_frame.sort_index(axis=1), exp_frame.sort_index(axis=1))
Example #30
Source File: test_reshape.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def test_unbalanced(self): # test that we can have a varying amount of time variables df = pd.DataFrame({'A2010': [1.0, 2.0], 'A2011': [3.0, 4.0], 'B2010': [5.0, 6.0], 'X': ['X1', 'X2']}) df['id'] = df.index exp_data = {'X': ['X1', 'X1', 'X2', 'X2'], 'A': [1.0, 3.0, 2.0, 4.0], 'B': [5.0, np.nan, 6.0, np.nan], 'id': [0, 0, 1, 1], 'year': ['2010', '2011', '2010', '2011']} exp_frame = pd.DataFrame(exp_data) exp_frame = exp_frame.set_index(['id', 'year'])[["X", "A", "B"]] long_frame = wide_to_long(df, ['A', 'B'], i='id', j='year') tm.assert_frame_equal(long_frame, exp_frame)