Python Examples of pandas.wide_to

Source File: test_melt.py From coffeegrindsize with MIT License

6 votes

def test_invalid_suffixtype(self):
        # If all stubs names end with a string, but a numeric suffix is
        # assumed,  an empty data frame is returned
        df = pd.DataFrame({'Aone': [1.0, 2.0],
                           'Atwo': [3.0, 4.0],
                           'Bone': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': '',
                    'Aone': [],
                    'Atwo': [],
                    'Bone': [],
                    'id': [],
                    'year': [],
                    'A': [],
                    'B': []}
        expected = pd.DataFrame(exp_data).astype({'year': 'int'})

        expected = expected.set_index(['id', 'year'])
        expected.index.set_levels([0, 1], level=0, inplace=True)
        result = wide_to_long(df, ['A', 'B'], i='id', j='year')
        tm.assert_frame_equal(result.sort_index(axis=1),
                              expected.sort_index(axis=1))

Source File: test_melt.py From coffeegrindsize with MIT License

6 votes

def test_float_suffix(self):
        df = pd.DataFrame({
            'treatment_1.1': [1.0, 2.0],
            'treatment_2.1': [3.0, 4.0],
            'result_1.2': [5.0, 6.0],
            'result_1': [0, 9],
            'A': ['X1', 'X2']})
        expected = pd.DataFrame({
            'A': ['X1', 'X1', 'X1', 'X1', 'X2', 'X2', 'X2', 'X2'],
            'colname': [1, 1.1, 1.2, 2.1, 1, 1.1, 1.2, 2.1],
            'result': [0.0, np.nan, 5.0, np.nan, 9.0, np.nan, 6.0, np.nan],
            'treatment': [np.nan, 1.0, np.nan, 3.0, np.nan, 2.0, np.nan, 4.0]})
        expected = expected.set_index(['A', 'colname'])
        result = wide_to_long(df, ['result', 'treatment'],
                              i='A', j='colname', suffix='[0-9.]+', sep='_')
        tm.assert_frame_equal(result, expected)

Source File: test_melt.py From twitter-stock-recommendation with MIT License

6 votes

def test_unbalanced(self):
        # test that we can have a varying amount of time variables
        df = pd.DataFrame({'A2010': [1.0, 2.0],
                           'A2011': [3.0, 4.0],
                           'B2010': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': ['X1', 'X1', 'X2', 'X2'],
                    'A': [1.0, 3.0, 2.0, 4.0],
                    'B': [5.0, np.nan, 6.0, np.nan],
                    'id': [0, 0, 1, 1],
                    'year': [2010, 2011, 2010, 2011]}
        expected = pd.DataFrame(exp_data)
        expected = expected.set_index(['id', 'year'])[["X", "A", "B"]]
        result = wide_to_long(df, ['A', 'B'], i='id', j='year')
        tm.assert_frame_equal(result, expected)

Source File: test_melt.py From twitter-stock-recommendation with MIT License

6 votes

def test_invalid_separator(self):
        # if an invalid separator is supplied a empty data frame is returned
        sep = 'nope!'
        df = pd.DataFrame({'A2010': [1.0, 2.0],
                           'A2011': [3.0, 4.0],
                           'B2010': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': '',
                    'A2010': [],
                    'A2011': [],
                    'B2010': [],
                    'id': [],
                    'year': [],
                    'A': [],
                    'B': []}
        expected = pd.DataFrame(exp_data).astype({'year': 'int'})
        expected = expected.set_index(['id', 'year'])[[
            'X', 'A2010', 'A2011', 'B2010', 'A', 'B']]
        expected.index.set_levels([0, 1], level=0, inplace=True)
        result = wide_to_long(df, ['A', 'B'], i='id', j='year', sep=sep)
        tm.assert_frame_equal(result.sort_index(axis=1),
                              expected.sort_index(axis=1))

Source File: test_melt.py From recruit with Apache License 2.0

6 votes

def test_unbalanced(self):
        # test that we can have a varying amount of time variables
        df = pd.DataFrame({'A2010': [1.0, 2.0],
                           'A2011': [3.0, 4.0],
                           'B2010': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': ['X1', 'X1', 'X2', 'X2'],
                    'A': [1.0, 3.0, 2.0, 4.0],
                    'B': [5.0, np.nan, 6.0, np.nan],
                    'id': [0, 0, 1, 1],
                    'year': [2010, 2011, 2010, 2011]}
        expected = pd.DataFrame(exp_data)
        expected = expected.set_index(['id', 'year'])[["X", "A", "B"]]
        result = wide_to_long(df, ['A', 'B'], i='id', j='year')
        tm.assert_frame_equal(result, expected)

Source File: test_melt.py From recruit with Apache License 2.0

6 votes

def test_invalid_separator(self):
        # if an invalid separator is supplied a empty data frame is returned
        sep = 'nope!'
        df = pd.DataFrame({'A2010': [1.0, 2.0],
                           'A2011': [3.0, 4.0],
                           'B2010': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': '',
                    'A2010': [],
                    'A2011': [],
                    'B2010': [],
                    'id': [],
                    'year': [],
                    'A': [],
                    'B': []}
        expected = pd.DataFrame(exp_data).astype({'year': 'int'})
        expected = expected.set_index(['id', 'year'])[[
            'X', 'A2010', 'A2011', 'B2010', 'A', 'B']]
        expected.index.set_levels([0, 1], level=0, inplace=True)
        result = wide_to_long(df, ['A', 'B'], i='id', j='year', sep=sep)
        tm.assert_frame_equal(result.sort_index(axis=1),
                              expected.sort_index(axis=1))

Source File: test_melt.py From recruit with Apache License 2.0

6 votes

def test_col_substring_of_stubname(self):
        # GH22468
        # Don't raise ValueError when a column name is a substring
        # of a stubname that's been passed as a string
        wide_data = {'node_id': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4},
                     'A': {0: 0.80, 1: 0.0, 2: 0.25, 3: 1.0, 4: 0.81},
                     'PA0': {0: 0.74, 1: 0.56, 2: 0.56, 3: 0.98, 4: 0.6},
                     'PA1': {0: 0.77, 1: 0.64, 2: 0.52, 3: 0.98, 4: 0.67},
                     'PA3': {0: 0.34, 1: 0.70, 2: 0.52, 3: 0.98, 4: 0.67}
                     }
        wide_df = pd.DataFrame.from_dict(wide_data)
        expected = pd.wide_to_long(wide_df,
                                   stubnames=['PA'],
                                   i=['node_id', 'A'],
                                   j='time')
        result = pd.wide_to_long(wide_df,
                                 stubnames='PA',
                                 i=['node_id', 'A'],
                                 j='time')
        tm.assert_frame_equal(result, expected)

Source File: test_melt.py From coffeegrindsize with MIT License

6 votes

def test_col_substring_of_stubname(self):
        # GH22468
        # Don't raise ValueError when a column name is a substring
        # of a stubname that's been passed as a string
        wide_data = {'node_id': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4},
                     'A': {0: 0.80, 1: 0.0, 2: 0.25, 3: 1.0, 4: 0.81},
                     'PA0': {0: 0.74, 1: 0.56, 2: 0.56, 3: 0.98, 4: 0.6},
                     'PA1': {0: 0.77, 1: 0.64, 2: 0.52, 3: 0.98, 4: 0.67},
                     'PA3': {0: 0.34, 1: 0.70, 2: 0.52, 3: 0.98, 4: 0.67}
                     }
        wide_df = pd.DataFrame.from_dict(wide_data)
        expected = pd.wide_to_long(wide_df,
                                   stubnames=['PA'],
                                   i=['node_id', 'A'],
                                   j='time')
        result = pd.wide_to_long(wide_df,
                                 stubnames='PA',
                                 i=['node_id', 'A'],
                                 j='time')
        tm.assert_frame_equal(result, expected)

Source File: test_melt.py From vnpy_crypto with MIT License

6 votes

def test_unbalanced(self):
        # test that we can have a varying amount of time variables
        df = pd.DataFrame({'A2010': [1.0, 2.0],
                           'A2011': [3.0, 4.0],
                           'B2010': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': ['X1', 'X1', 'X2', 'X2'],
                    'A': [1.0, 3.0, 2.0, 4.0],
                    'B': [5.0, np.nan, 6.0, np.nan],
                    'id': [0, 0, 1, 1],
                    'year': [2010, 2011, 2010, 2011]}
        expected = pd.DataFrame(exp_data)
        expected = expected.set_index(['id', 'year'])[["X", "A", "B"]]
        result = wide_to_long(df, ['A', 'B'], i='id', j='year')
        tm.assert_frame_equal(result, expected)

Source File: test_melt.py From recruit with Apache License 2.0

6 votes

def test_invalid_suffixtype(self):
        # If all stubs names end with a string, but a numeric suffix is
        # assumed,  an empty data frame is returned
        df = pd.DataFrame({'Aone': [1.0, 2.0],
                           'Atwo': [3.0, 4.0],
                           'Bone': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': '',
                    'Aone': [],
                    'Atwo': [],
                    'Bone': [],
                    'id': [],
                    'year': [],
                    'A': [],
                    'B': []}
        expected = pd.DataFrame(exp_data).astype({'year': 'int'})

        expected = expected.set_index(['id', 'year'])
        expected.index.set_levels([0, 1], level=0, inplace=True)
        result = wide_to_long(df, ['A', 'B'], i='id', j='year')
        tm.assert_frame_equal(result.sort_index(axis=1),
                              expected.sort_index(axis=1))

Source File: test_melt.py From recruit with Apache License 2.0

6 votes

def test_multiple_id_columns(self):
        # Taken from http://www.ats.ucla.edu/stat/stata/modules/reshapel.htm
        df = pd.DataFrame({
            'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3],
            'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3],
            'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
            'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9]
        })
        expected = pd.DataFrame({
            'ht': [2.8, 3.4, 2.9, 3.8, 2.2, 2.9, 2.0, 3.2, 1.8,
                   2.8, 1.9, 2.4, 2.2, 3.3, 2.3, 3.4, 2.1, 2.9],
            'famid': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3],
            'birth': [1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3],
            'age': [1, 2, 1, 2, 1, 2, 1, 2, 1,
                    2, 1, 2, 1, 2, 1, 2, 1, 2]
        })
        expected = expected.set_index(['famid', 'birth', 'age'])[['ht']]
        result = wide_to_long(df, 'ht', i=['famid', 'birth'], j='age')
        tm.assert_frame_equal(result, expected)

Source File: test_melt.py From recruit with Apache License 2.0

6 votes

def test_float_suffix(self):
        df = pd.DataFrame({
            'treatment_1.1': [1.0, 2.0],
            'treatment_2.1': [3.0, 4.0],
            'result_1.2': [5.0, 6.0],
            'result_1': [0, 9],
            'A': ['X1', 'X2']})
        expected = pd.DataFrame({
            'A': ['X1', 'X1', 'X1', 'X1', 'X2', 'X2', 'X2', 'X2'],
            'colname': [1, 1.1, 1.2, 2.1, 1, 1.1, 1.2, 2.1],
            'result': [0.0, np.nan, 5.0, np.nan, 9.0, np.nan, 6.0, np.nan],
            'treatment': [np.nan, 1.0, np.nan, 3.0, np.nan, 2.0, np.nan, 4.0]})
        expected = expected.set_index(['A', 'colname'])
        result = wide_to_long(df, ['result', 'treatment'],
                              i='A', j='colname', suffix='[0-9.]+', sep='_')
        tm.assert_frame_equal(result, expected)

Source File: test_melt.py From vnpy_crypto with MIT License

6 votes

def test_invalid_separator(self):
        # if an invalid separator is supplied a empty data frame is returned
        sep = 'nope!'
        df = pd.DataFrame({'A2010': [1.0, 2.0],
                           'A2011': [3.0, 4.0],
                           'B2010': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': '',
                    'A2010': [],
                    'A2011': [],
                    'B2010': [],
                    'id': [],
                    'year': [],
                    'A': [],
                    'B': []}
        expected = pd.DataFrame(exp_data).astype({'year': 'int'})
        expected = expected.set_index(['id', 'year'])[[
            'X', 'A2010', 'A2011', 'B2010', 'A', 'B']]
        expected.index.set_levels([0, 1], level=0, inplace=True)
        result = wide_to_long(df, ['A', 'B'], i='id', j='year', sep=sep)
        tm.assert_frame_equal(result.sort_index(axis=1),
                              expected.sort_index(axis=1))

Source File: test_melt.py From vnpy_crypto with MIT License

6 votes

def test_invalid_suffixtype(self):
        # If all stubs names end with a string, but a numeric suffix is
        # assumed,  an empty data frame is returned
        df = pd.DataFrame({'Aone': [1.0, 2.0],
                           'Atwo': [3.0, 4.0],
                           'Bone': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': '',
                    'Aone': [],
                    'Atwo': [],
                    'Bone': [],
                    'id': [],
                    'year': [],
                    'A': [],
                    'B': []}
        expected = pd.DataFrame(exp_data).astype({'year': 'int'})

        expected = expected.set_index(['id', 'year'])
        expected.index.set_levels([0, 1], level=0, inplace=True)
        result = wide_to_long(df, ['A', 'B'], i='id', j='year')
        tm.assert_frame_equal(result.sort_index(axis=1),
                              expected.sort_index(axis=1))

Source File: test_melt.py From vnpy_crypto with MIT License

6 votes

def test_multiple_id_columns(self):
        # Taken from http://www.ats.ucla.edu/stat/stata/modules/reshapel.htm
        df = pd.DataFrame({
            'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3],
            'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3],
            'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
            'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9]
        })
        expected = pd.DataFrame({
            'ht': [2.8, 3.4, 2.9, 3.8, 2.2, 2.9, 2.0, 3.2, 1.8,
                   2.8, 1.9, 2.4, 2.2, 3.3, 2.3, 3.4, 2.1, 2.9],
            'famid': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3],
            'birth': [1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3],
            'age': [1, 2, 1, 2, 1, 2, 1, 2, 1,
                    2, 1, 2, 1, 2, 1, 2, 1, 2]
        })
        expected = expected.set_index(['famid', 'birth', 'age'])[['ht']]
        result = wide_to_long(df, 'ht', i=['famid', 'birth'], j='age')
        tm.assert_frame_equal(result, expected)

Source File: test_melt.py From coffeegrindsize with MIT License

6 votes

def test_multiple_id_columns(self):
        # Taken from http://www.ats.ucla.edu/stat/stata/modules/reshapel.htm
        df = pd.DataFrame({
            'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3],
            'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3],
            'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
            'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9]
        })
        expected = pd.DataFrame({
            'ht': [2.8, 3.4, 2.9, 3.8, 2.2, 2.9, 2.0, 3.2, 1.8,
                   2.8, 1.9, 2.4, 2.2, 3.3, 2.3, 3.4, 2.1, 2.9],
            'famid': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3],
            'birth': [1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3],
            'age': [1, 2, 1, 2, 1, 2, 1, 2, 1,
                    2, 1, 2, 1, 2, 1, 2, 1, 2]
        })
        expected = expected.set_index(['famid', 'birth', 'age'])[['ht']]
        result = wide_to_long(df, 'ht', i=['famid', 'birth'], j='age')
        tm.assert_frame_equal(result, expected)

Source File: test_melt.py From coffeegrindsize with MIT License

6 votes

def test_invalid_separator(self):
        # if an invalid separator is supplied a empty data frame is returned
        sep = 'nope!'
        df = pd.DataFrame({'A2010': [1.0, 2.0],
                           'A2011': [3.0, 4.0],
                           'B2010': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': '',
                    'A2010': [],
                    'A2011': [],
                    'B2010': [],
                    'id': [],
                    'year': [],
                    'A': [],
                    'B': []}
        expected = pd.DataFrame(exp_data).astype({'year': 'int'})
        expected = expected.set_index(['id', 'year'])[[
            'X', 'A2010', 'A2011', 'B2010', 'A', 'B']]
        expected.index.set_levels([0, 1], level=0, inplace=True)
        result = wide_to_long(df, ['A', 'B'], i='id', j='year', sep=sep)
        tm.assert_frame_equal(result.sort_index(axis=1),
                              expected.sort_index(axis=1))

Source File: test_melt.py From vnpy_crypto with MIT License

6 votes

def test_float_suffix(self):
        df = pd.DataFrame({
            'treatment_1.1': [1.0, 2.0],
            'treatment_2.1': [3.0, 4.0],
            'result_1.2': [5.0, 6.0],
            'result_1': [0, 9],
            'A': ['X1', 'X2']})
        expected = pd.DataFrame({
            'A': ['X1', 'X1', 'X1', 'X1', 'X2', 'X2', 'X2', 'X2'],
            'colname': [1, 1.1, 1.2, 2.1, 1, 1.1, 1.2, 2.1],
            'result': [0.0, np.nan, 5.0, np.nan, 9.0, np.nan, 6.0, np.nan],
            'treatment': [np.nan, 1.0, np.nan, 3.0, np.nan, 2.0, np.nan, 4.0]})
        expected = expected.set_index(['A', 'colname'])
        result = wide_to_long(df, ['result', 'treatment'],
                              i='A', j='colname', suffix='[0-9.]+', sep='_')
        tm.assert_frame_equal(result, expected)

Source File: test_subclass.py From vnpy_crypto with MIT License

6 votes

def test_subclassed_wide_to_long(self):
        # GH 9762

        np.random.seed(123)
        x = np.random.randn(3)
        df = tm.SubclassedDataFrame({
            "A1970": {0: "a", 1: "b", 2: "c"},
            "A1980": {0: "d", 1: "e", 2: "f"},
            "B1970": {0: 2.5, 1: 1.2, 2: .7},
            "B1980": {0: 3.2, 1: 1.3, 2: .1},
            "X": dict(zip(range(3), x))})

        df["id"] = df.index
        exp_data = {"X": x.tolist() + x.tolist(),
                    "A": ['a', 'b', 'c', 'd', 'e', 'f'],
                    "B": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1],
                    "year": [1970, 1970, 1970, 1980, 1980, 1980],
                    "id": [0, 1, 2, 0, 1, 2]}
        expected = tm.SubclassedDataFrame(exp_data)
        expected = expected.set_index(['id', 'year'])[["X", "A", "B"]]
        long_frame = pd.wide_to_long(df, ["A", "B"], i="id", j="year")

        tm.assert_frame_equal(long_frame, expected)

Source File: test_melt.py From coffeegrindsize with MIT License

6 votes

def test_unbalanced(self):
        # test that we can have a varying amount of time variables
        df = pd.DataFrame({'A2010': [1.0, 2.0],
                           'A2011': [3.0, 4.0],
                           'B2010': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': ['X1', 'X1', 'X2', 'X2'],
                    'A': [1.0, 3.0, 2.0, 4.0],
                    'B': [5.0, np.nan, 6.0, np.nan],
                    'id': [0, 0, 1, 1],
                    'year': [2010, 2011, 2010, 2011]}
        expected = pd.DataFrame(exp_data)
        expected = expected.set_index(['id', 'year'])[["X", "A", "B"]]
        result = wide_to_long(df, ['A', 'B'], i='id', j='year')
        tm.assert_frame_equal(result, expected)